FLOAT32 Vector Generator

2025. 4. 10.·Oracle/Vector

Oracle AI Vector Search User's Guide는 SQL Quick Start Using a FLOAT32 Vector Generator 절에서 FLOAT32 벡터를 생성할 수 있는 패키지를 제공합니다. 이 패키지는 차원 값이 커질수록 CLOB 타입의 벡터에서 좌표 값을 가져오는 get_coordinate 함수에 의해 성능이 급격히 저하됩니다.

아래 예제는 500 차원의 벡터를 100 로우 생성한 결과를 보여줍니다.

-- 1
BEGIN
    vector_gen_pkg.generate_vectors (
        num_vectors     => 100 -- Number of vectors to generate. Must be 1 or above
      , dimensions      => 500 -- Number of dimensions of each vector. Must be above 1 but less than 500
      , num_clusters    => 6   -- Number of clusters to create. Must be 1 or above
      , cluster_spread  => 1   -- Relative closeness of each vector in each cluster (using standard deviation). Must be grather than 0
      , min_value       => 0   -- Minimum value for a vector coordinate
      , max_value       => 100 -- Maximum value for a vector coordinate. Min value must be smaller than max value
    );
END;
/

Elapsed: 00:01:20.00


아래는 Varray를 사용하여 성능을 개선한 FLOAT32 Vector Generator 패키지입니다.

-- 2-1
CREATE OR REPLACE PACKAGE vector_gen_pkg
AS
    PROCEDURE generate_vectors (
        num_vectors    IN PLS_INTEGER -- Number of vectors to generate
      , dimensions     IN PLS_INTEGER -- Number of dimensions of each vector
      , num_clusters   IN PLS_INTEGER -- Number of clusters to create
      , cluster_spread IN NUMBER      -- Relative closeness of each vector in each cluster (using standard deviation)
      , min_value      IN NUMBER      -- Minimum value for a vector coordinate
      , max_value      IN NUMBER      -- Maximum value for a vector coordinate
    );
END vector_gen_pkg;
/

SHOW ERROR

-- 2-2
CREATE OR REPLACE PACKAGE BODY vector_gen_pkg
AS
    TYPE t_vector IS VARRAY(500) OF NUMBER;

    PROCEDURE generate_random_vector (
        dimensions IN  PLS_INTEGER
      , min_value  IN  NUMBER
      , max_value  IN  NUMBER
      , vec        OUT t_vector
    )
    IS
        e t_vector := t_vector ();
    BEGIN
        FOR i IN 1 .. dimensions LOOP
            e.EXTEND;
            e(e.LAST) := DBMS_RANDOM.VALUE (min_value, max_value);
        END LOOP;

        vec := e;
    END generate_random_vector;

    PROCEDURE generate_clustered_vector (
        centroid       IN  t_vector
      , cluster_spread IN  NUMBER
      , vec            OUT t_vector
    )
    IS
        e t_vector := t_vector ();
    BEGIN
        FOR i IN 1 .. centroid.COUNT LOOP
            e.EXTEND;
            e(e.LAST) := centroid(i) + (DBMS_RANDOM.NORMAL * cluster_spread);
        END LOOP;

        vec := e;
    END generate_clustered_vector;

    FUNCTION normalize_vector (
        vec IN t_vector
    )
        RETURN t_vector
    IS
        e t_vector := t_vector ();
        n NUMBER;
    BEGIN
        n := VECTOR_NORM (VECTOR (JSON_SERIALIZE (JSON (vec))));

        FOR i IN 1 .. vec.COUNT LOOP
            e.EXTEND;
            e(i) := vec(i) / n;
        END LOOP;

        RETURN e;
    END normalize_vector;

    PROCEDURE generate_vectors (
        num_vectors    IN PLS_INTEGER -- Must be 1 or above
      , dimensions     IN PLS_INTEGER -- Must be above 1 but less than 500
      , num_clusters   IN PLS_INTEGER -- Must be 1 or above
      , cluster_spread IN NUMBER      -- Must be grather than 0
      , min_value      IN NUMBER
      , max_value      IN NUMBER
    )
    IS
        TYPE t_vectors IS TABLE OF t_vector INDEX BY PLS_INTEGER;

        centroids           t_vectors;
        vectors_per_cluster PLS_INTEGER;
        remaining_vectors   PLS_INTEGER;
        vec                 t_vector;
        idx                 PLS_INTEGER := 1;
        max_id              NUMBER;
        working_vector      t_vector;
        v                   VECTOR;
        nv                  VECTOR;
    BEGIN
        IF    (num_vectors) <= 0
           OR (num_clusters < 1)
           OR (num_vectors < num_clusters)
           OR (dimensions <= 0)
           OR (dimensions > 500)
           OR (cluster_spread <= 0)
           OR (min_value >= max_value)
        THEN
            RETURN;
        END IF;

        SELECT MAX (ID) INTO max_id FROM genvec;

        IF max_id IS NULL THEN
            max_id := 0;
        END IF;

        -- Generate cluster centroids
        FOR i IN 1 .. num_clusters LOOP
            generate_random_vector (dimensions, min_value, max_value, centroids(i));
            working_vector := normalize_vector (centroids(i));
            v := VECTOR (JSON_SERIALIZE (JSON (centroids(i))));
            nv := VECTOR (JSON_SERIALIZE (JSON (working_vector)));

            INSERT
              INTO genvec
            VALUES (max_id + idx, v, 'C' || i, nv, DBMS_RANDOM.VALUE (3, 600000000));

            idx := idx + 1;
        END LOOP;

        -- Calculate vectors per cluster
        vectors_per_cluster := TRUNC (num_vectors / num_clusters);
        remaining_vectors := num_vectors MOD num_clusters;

        -- Generate vectors for each cluster
        IF vectors_per_cluster > 1 THEN
            FOR i IN 1 .. num_clusters LOOP
                FOR j IN 1 .. (vectors_per_cluster - 1) LOOP
                    generate_clustered_vector (centroids(i), cluster_spread, vec);
                    working_vector := normalize_vector (vec);
                    v := VECTOR (JSON_SERIALIZE (JSON (centroids(i))));
                    nv := VECTOR (JSON_SERIALIZE (JSON (working_vector)));

                    INSERT
                      INTO genvec
                    VALUES (max_id + idx, v, 'C' || i || '-' || j, nv, DBMS_RANDOM.VALUE (3, 600000000));

                    idx := idx + 1;
                END LOOP;
            END LOOP;
        END IF;

        -- Handle remaining vectors: all associated with cluster 1
        IF remaining_vectors > 0 THEN
            FOR j IN 1 .. remaining_vectors LOOP
                generate_clustered_vector (centroids (1), cluster_spread, vec);
                working_vector := normalize_vector (vec);
                    v := VECTOR (JSON_SERIALIZE (JSON (centroids(1))));
                    nv := VECTOR (JSON_SERIALIZE (JSON (working_vector)));

                INSERT
                  INTO genvec
                VALUES (max_id + idx, v, 'C1-' || idx, nv, DBMS_RANDOM.VALUE (3, 600000000));

                idx := idx + 1;
            END LOOP;
        END IF;

        COMMIT;
    END generate_vectors;
END vector_gen_pkg;
/

SHOW ERROR


아래 예제는 500 차원의 벡터를 100 로우 생성한 결과를 보여줍니다. 수행 시간이 80초에서 0.66초로 감소한 것을 볼 수 있습니다.

-- 3
BEGIN
    vector_gen_pkg.generate_vectors (
        num_vectors     => 100 -- Number of vectors to generate. Must be 1 or above
      , dimensions      => 500 -- Number of dimensions of each vector. Must be above 1 but less than 500
      , num_clusters    => 6   -- Number of clusters to create. Must be 1 or above
      , cluster_spread  => 1   -- Relative closeness of each vector in each cluster (using standard deviation). Must be grather than 0
      , min_value       => 0   -- Minimum value for a vector coordinate
      , max_value       => 100 -- Maximum value for a vector coordinate. Min value must be smaller than max value
    );
END;
/

Elapsed: 00:00:00.66
저작자표시 비영리 변경금지 (새창열림)
'Oracle/Vector' 카테고리의 다른 글
  • Hybrid Vector Index
  • Relational Data Vectorization
  • ONNX Model
  • AI Vector Search #3 - IVF 벡터 인덱스
정희락
정희락
2007년부터 Oracle Database 성능 최적화에 관심을 가져왔습니다. 현재 한국오라클 Engineered Systems Solution Engineering 팀에서 Solution Engineer로 근무하고 있습니다. 이 블로그는 개인적인 연구 목적으로 운영되며 Oracle 사의 공식적인 입장을 대변하지 않습니다.
  • 정희락
    TunA
    정희락
  • 전체
    오늘
    어제
    • 분류 전체보기 (206)
      • Oracle (177)
        • SQL (36)
        • PLSQL (10)
        • Performance (75)
        • Administration (37)
        • Installation (3)
        • Utilities (1)
        • JSON (8)
        • Vector (7)
      • Exadata (16)
      • SQL*Plus (2)
      • Linux (5)
      • Resources (6)
  • 블로그 메뉴

    • 홈
    • 태그
    • 방명록
  • 도서

    • 불친절한 SQL 프로그래밍
    • 불친절한 PL/SQL 프로그래밍
  • 링크

    • Connor McDonald
    • Frits Hoogland
    • Jonathan Lewis
    • Julian Dontcheff
    • Julian Dyke
    • Kun Sun
    • Maria Colgan
    • Martin Bach
    • Mike Dietrich
    • Tanel Poder
  • 공지사항

  • 인기 글

  • 태그

    12c
    19c
    21c
    23ai
    case study
  • 최근 댓글

  • 최근 글

  • hELLO· Designed By정상우.v4.10.3
정희락
FLOAT32 Vector Generator
상단으로

티스토리툴바