AI Vector Search #1 - 기본 기능
AI Vector Search #2 - HNSW 벡터 인덱스
AI Vector Search #3 - IVF 벡터 인덱스
IVF(Inverted File Flat) 벡터 인덱스는 Neighbor Partition 벡터 인덱스의 한 형태로 Neighbor 파티션 또는 클러스터를 사용해 검색 영역을 좁혀 조회 효율을 높이는 파티션 기반 인덱스입니다. 이 글에서는 IVF 벡터 인덱스의 구조와 동작에 대해 간단히 살펴보겠습니다.
테스트 버전은 아래와 같습니다.
-- 1
SELECT version_full FROM product_component_version;
VERSION_FULL
------------
23.4.0.24.05
1 row selected.
테스트를 위해 아래와 같이 테이블을 생성하겠습니다.
-- 2
DROP TABLE t1 PURGE;
CREATE TABLE t1 (c1, c2, c3, CONSTRAINT t1_pk PRIMARY KEY (c1, c2)) AS
SELECT a.c1, b.c1, TO_VECTOR (JSON_ARRAY (a.c1, b.c1))
FROM (SELECT ROWNUM AS c1 FROM XMLTABLE ('1 to 3')) a
, (SELECT ROWNUM AS c1 FROM XMLTABLE ('1 to 3')) b;
아래와 같이 IVF 벡터 인덱스를 생성하겠습니다.
-- 3-1: Inverted File Flat (IVF)
CREATE VECTOR INDEX t1_x1 ON t1 (c3) ORGANIZATION NEIGHBOR PARTITIONS DISTANCE EUCLIDEAN WITH TARGET ACCURACY 95;
-- 3-2
SELECT idx_owner#, idx_name, idx_base_table_objn, idx_base_table_owner#
FROM vecsys.vector$index;
IDX_OWNER# IDX_NAME IDX_BASE_TABLE_OBJN IDX_BASE_TABLE_OWNER#
---------- -------- ------------------- ---------------------
133 T1_X1 90026 133
1 row selected.
IVF 벡터 인덱스의 index_type 값은 VECTOR, index_subtype 값은 NEIGHBOR_PARTITIONS_IVF입니다. IVF 벡터 인덱스는 인덱스 세그먼트가 생성되지 않으며, 암시적으로 추가 오브젝트를 생성합니다. *_INDEXES 뷰를 조회하면 2개의 B-tree 인덱스와 2개의 테이블이 생성된 것을 확인할 수 있습니다.
-- 4-1
SELECT index_name, index_type, index_subtype
FROM user_indexes
WHERE index_name LIKE '%T1_X1%';
INDEX_NAME INDEX_TYPE INDEX_SUBTYPE
---------- ---------- -----------------------
T1_X1 VECTOR NEIGHBOR_PARTITIONS_IVF
1 row selected.
-- 4-2
SELECT segment_name, segment_type
FROM user_segments
WHERE segment_name = 'T1_X1';
no rows selected
-- 4-3
SELECT index_name, index_type, table_name, uniqueness, partitioned
FROM user_indexes
WHERE table_name LIKE '%T1_X1%';
INDEX_NAME INDEX_TYPE TABLE_NAME UNIQUENESS PARTITIONED
------------------------ ---------- ------------------------------------------------------- ---------- -----------
SYS_IL0000089895C00002$$ LOB VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROIDS UNIQUE NO
SYS_C008623 NORMAL VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROIDS UNIQUE NO
SYS_IL0000089899C00003$$ LOB VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS UNIQUE YES
SYS_C008625 NORMAL VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS UNIQUE NO
4 rows selected.
테이블의 접두어는 VECTOR${index_name}${sequence}$IVF_FLAT_이며, 접미어가 CENTROIDS인 비파티션 테이블과 접미어가 CENTROID_PARTITIONS인 파티션 테이블이 생성됩니다.
-- 5-1
SELECT table_name, partitioned
FROM user_tables
WHERE table_name LIKE '%T1_X1%';
TABLE_NAME PARTITIONED
------------------------------------------------------- -----------
VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROIDS NO
VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS YES
2 rows selected.
-- 5-2
SELECT table_name, partitioning_type, subpartitioning_type, partition_count
FROM user_part_tables
WHERE table_name LIKE '%T1_X1%';
TABLE_NAME PARTITIONING_TYPE SUBPARTITIONING_TYPE PARTITION_COUNT
------------------------------------------------------- ----------------- -------------------- ---------------
VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS LIST NONE 10
1 row selected.
-- 5-3
SELECT name, column_name, column_position
FROM user_part_key_columns
WHERE name LIKE '%T1_X1%';
NAME COLUMN_NAME COLUMN_POSITION
------------------------------------------------------- ----------- ---------------
VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS CENTROID_ID 1
1 row selected.
-- 5-4
SELECT *
FROM vector$t1_x1$89891_89894_0$ivf_flat_centroids;
CENTROID_ID CENTROID_VECTOR
----------- -------------------
1 [1.0E+000,2.0E+000]
2 [1.0E+000,1.0E+000]
3 [2.0E+000,3.0E+000]
4 [2.0E+000,2.0E+000]
5 [3.0E+000,1.0E+000]
6 [3.0E+000,2.0E+000]
7 [1.0E+000,3.0E+000]
8 [2.0E+000,1.0E+000]
9 [3.0E+000,3.0E+000]
9 rows selected.
-- 5-5
SELECT *
FROM vector$t1_x1$89891_89894_0$ivf_flat_centroid_partitions;
BASE_TABLE_ROWID CENTROID_ID DATA_VECTOR
------------------ ----------- -------------------
AAAV8jAAAAAACIjAAB 1 [1.0E+000,2.0E+000]
AAAV8jAAAAAACIjAAA 2 [1.0E+000,1.0E+000]
AAAV8jAAAAAACIjAAF 3 [2.0E+000,3.0E+000]
AAAV8jAAAAAACIjAAE 4 [2.0E+000,2.0E+000]
AAAV8jAAAAAACIjAAG 5 [3.0E+000,1.0E+000]
AAAV8jAAAAAACIjAAH 6 [3.0E+000,2.0E+000]
AAAV8jAAAAAACIjAAC 7 [1.0E+000,3.0E+000]
AAAV8jAAAAAACIjAAD 8 [2.0E+000,1.0E+000]
AAAV8jAAAAAACIjAAI 9 [3.0E+000,3.0E+000]
9 rows selected.
아래는 VECTOR_INDEX_SCAN 힌트를 사용한 실행 계획입니다. Hint Report 항목에서 VECTOR_INDEX_SCAN 힌트는 HNSW 인덱스에만 동작한다는 것을 알 수 있습니다. (no HNSW vector index matching VECTOR_DISTANCE in ORDER BY)
-- 6
SELECT /*+ VECTOR_INDEX_SCAN(T1) */
c1, c2, c3
, VECTOR_DISTANCE (c3, '[1,1]', EUCLIDEAN) AS euclidean
FROM t1
ORDER BY VECTOR_DISTANCE (c3, '[1,1]', EUCLIDEAN)
FETCH APPROX FIRST 5 ROWS ONLY;
--------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | Buffers | OMem | 1Mem | Used-Mem |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | | | |
|* 1 | COUNT STOPKEY | | 1 | 3 | | | |
| 2 | VIEW | | 1 | 3 | | | |
|* 3 | SORT ORDER BY STOPKEY| | 1 | 3 | 2048 | 2048 | 2048 (0)|
| 4 | TABLE ACCESS FULL | T1 | 1 | 3 | | | |
--------------------------------------------------------------------------------------
Hint Report (identified by operation id / Query Block Name / Object Alias):
Total hints for statement: 1 (U - Unused (1))
---------------------------------------------------------------------------
4 - SEL$1 / "T1"@"SEL$1"
U - VECTOR_INDEX_SCAN(T1) / no HNSW vector index matching VECTOR_DISTANCE in ORDER BY
아래는 VECTOR_INDEX_SCAN 힌트를 사용한 실행 계획입니다. 문서와 동일한 실행 계획을 생성하기 위해 SWAP_JOIN_INPUTS 힌트를 추가로 사용했습니다. IVF 인덱스 기능은 전통적인 인덱스 동작이 아닌 추가로 생성한 오브젝트를 활용한 쿼리 변환 방식으로 구현된 것으로 보입니다.
-- 7
SELECT /*+ VECTOR_INDEX_TRANSFORM(T1) SWAP_JOIN_INPUTS(VW_IVCR_2D77159E) */
c1, c2, c3, VECTOR_DISTANCE (c3, '[1,1]', EUCLIDEAN) AS euclidean
FROM t1
WHERE (c1, c2) != (1, 1)
ORDER BY VECTOR_DISTANCE (c3, '[1,1]', EUCLIDEAN)
FETCH APPROX FIRST 5 ROWS ONLY;
---------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | A-Rows | Buffers |
---------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 5 | 22 |
|* 1 | COUNT STOPKEY | | 1 | 5 | 22 |
| 2 | VIEW | | 1 | 5 | 22 |
|* 3 | SORT ORDER BY STOPKEY | | 1 | 5 | 22 |
|* 4 | HASH JOIN | | 1 | 5 | 22 |
| 5 | VIEW | VW_IVCR_2D77159E | 1 | 6 | 7 |
|* 6 | COUNT STOPKEY | | 1 | 6 | 7 |
| 7 | VIEW | VW_IVCN_9A1D2119 | 1 | 6 | 7 |
|* 8 | SORT ORDER BY STOPKEY | | 1 | 6 | 7 |
| 9 | TABLE ACCESS FULL | VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROIDS | 1 | 9 | 7 |
| 10 | NESTED LOOPS | | 1 | 8 | 15 |
|* 11 | TABLE ACCESS FULL | T1 | 1 | 8 | 3 |
| 12 | TABLE ACCESS BY GLOBAL INDEX ROWID| VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS | 8 | 8 | 12 |
|* 13 | INDEX UNIQUE SCAN | SYS_C008699 | 8 | 8 | 4 |
---------------------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter(ROWNUM<=5)
3 - filter(ROWNUM<=5)
4 - access("VW_IVCR_2D77159E"."CENTROID_ID"="VTIX_CNPART"."CENTROID_ID")
6 - filter(ROWNUM<=6)
8 - filter(ROWNUM<=6)
11 - filter(("T1"."C2"<>1 OR "T1"."C1"<>1))
13 - access("T1".ROWID="VTIX_CNPART"."BASE_TABLE_ROWID")
아래는 10053 트레이스의 Final query입니다. 변환된 쿼리의 성능에 대해서는 향후 추가적인 검증이 필요할 것으로 판단됩니다.
-- 8
SELECT "from$_subquery$_002"."C1" "C1"
, "from$_subquery$_002"."C2" "C2"
, "from$_subquery$_002"."C3" "C3"
, "from$_subquery$_002"."EUCLIDEAN" "EUCLIDEAN"
FROM (SELECT "T1"."C1" "C1"
, "T1"."C2" "C2"
, "T1"."C3" /*+ LOB_BY_VALUE */ "C3"
, VECTOR_DISTANCE ("T1"."C3" /*+ LOB_BY_VALUE */, VECTOR ('[1,1]', *, * /*+ USEBLOBPCW_QVCGMD */), EUCLIDEAN) "EUCLIDEAN"
FROM (SELECT "VW_IVCN_9A1D2119"."CENTROID_ID" "CENTROID_ID"
FROM (SELECT "VTIX_CENTRD"."CENTROID_ID" "CENTROID_ID"
FROM "TUNA"."VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROIDS" "VTIX_CENTRD"
ORDER BY VECTOR_DISTANCE (
"VTIX_CENTRD"."CENTROID_VECTOR"
, VECTOR ('[1,1]', *, * /*+ USEBLOBPCW_QVCGMD */)
, EUCLIDEAN)) "VW_IVCN_9A1D2119"
WHERE ROWNUM <= 6) "VW_IVCR_2D77159E"
, "TUNA"."VECTOR$T1_X1$89891_89894_0$IVF_FLAT_CENTROID_PARTITIONS" "VTIX_CNPART"
, "TUNA"."T1" "T1"
WHERE ( "T1"."C2" <> 1
OR "T1"."C1" <> 1)
AND "T1".ROWID = "VTIX_CNPART"."BASE_TABLE_ROWID"
AND "VW_IVCR_2D77159E"."CENTROID_ID" = "VTIX_CNPART"."CENTROID_ID"
ORDER BY VECTOR_DISTANCE ("T1"."C3" /*+ LOB_BY_VALUE */, VECTOR ('[1,1]', *, * /*+ USEBLOBPCW_QVCGMD */), EUCLIDEAN)
) "from$_subquery$_002"
WHERE ROWNUM <= 5
HNSW 벡터 인덱스와 달리 IVF 인덱스를 생성한 테이블은 DML을 지원합니다.
-- 9
INSERT INTO t1 SELECT * FROM t1;
9 rows created.