diff --git a/README.md b/README.md index ad23f6d..51ad0b3 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ with the power of remote vector databases, by introducing a new remote vector in ```sql CREATE TABLE products (name text, embedding vector(1536), price float); -CREATE INDEX my_remote_index ON products USING pinecone (embedding, price) with (host = 'my-pinecone-index.pinecone.io'); +CREATE INDEX my_remote_index ON products USING remote (embedding, price) with (host = 'my-pinecone-index.pinecone.io'); -- [insert, update, and delete billions of records in products] SELECT * FROM products WHERE price < 40.0 ORDER BY embedding <-> '[...]' LIMIT 10; -- pinecone performs this query, including the price predicate @@ -86,11 +86,11 @@ ALTER DATABASE mydb SET pinecone.api_key = 'xxxxxxxx-xxxx-xxxx-xxxx–xxxxxxxxxx There are two ways to specify the pinecone index: - By providing the host of an existing pinecone index. For example, ```sql -CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io'); +CREATE INDEX my_remote_index ON products USING remote (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io'); ``` - By specifying the `spec` of the pinecone index. For example, ```sql -CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (spec = '"spec": { +CREATE INDEX my_remote_index ON products USING remote (embedding) with (spec = '{ "serverless": { "region": "us-west-2", "cloud": "aws" diff --git a/src/remote/clients/pinecone/pinecone.c b/src/remote/clients/pinecone/pinecone.c index fd3f9bc..f08bb31 100644 --- a/src/remote/clients/pinecone/pinecone.c +++ b/src/remote/clients/pinecone/pinecone.c @@ -36,6 +36,48 @@ const char* vector_metric_to_pinecone_metric[VECTOR_METRIC_COUNT] = { "dotproduct" }; +int extract_dimension(const cJSON *root) { + const cJSON *dimension = cJSON_GetObjectItemCaseSensitive(root, "dimension"); + if (dimension == NULL || !cJSON_IsNumber(dimension)) { + fprintf(stderr, "Error: 'dimension' value not found or not a number.\n"); + return -1; + } + return dimension->valueint; +} + +char* get_index_name(char* host) { + const char* prefix = "pgvr-"; + const char* start; + const char* end; + char* name = NULL; + + // Find the prefix in the URL + start = strstr(host, prefix); + if (start == NULL) { + fprintf(stderr, "Prefix not found in URL.\n"); + return NULL; + } + + // Find the end of the name + end = start + strlen(prefix); + while (isdigit(*end)) { + end++; + } + + // Allocate memory for the name + name = (char*)malloc(end - start + 1); + if (name == NULL) { + fprintf(stderr, "Memory allocation failed.\n"); + return NULL; + } + + // Copy the name into the allocated memory + strncpy(name, start, end - start); + name[end - start] = '\0'; // Null-terminate the string + + return name; +} + void pinecone_spec_validator(const char* spec) { if (spec == NULL || cJSON_Parse(spec) == NULL) { ereport(ERROR, @@ -105,6 +147,12 @@ char* pinecone_create_host_from_spec(int dimensions, VectorMetric metric, char* // CREATE AND MISC void pinecone_validate_host_schema(char* host, int dimensions, VectorMetric metric, Relation index) { // TODO: check that the host's schema matches the table + char* index_name = get_index_name(host); + cJSON* index_details = describe_index( pinecone_api_key, index_name); + int pinecone_dimensions = extract_dimension(index_details); + if(pinecone_dimensions != dimensions){ + elog(ERROR, "Vector Dimension of the local table is %d but the dimension specified in the remote index is %d", dimensions, pinecone_dimensions); + } return; } @@ -289,6 +337,7 @@ bool pinecone_bulk_upsert(char* host, PreparedBulkInsert prepared_vectors, int } + RemoteIndexInterface pinecone_remote_index_interface = { // create index .create_host_from_spec = pinecone_create_host_from_spec,