From 29227c03bb330e2c0d1353cc08210aeeadcb7d62 Mon Sep 17 00:00:00 2001 From: lohith Date: Wed, 8 May 2024 07:20:26 +0000 Subject: [PATCH 1/4] Update readme to replace pinecone with remote --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ad23f6d..215fc23 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ with the power of remote vector databases, by introducing a new remote vector in ```sql CREATE TABLE products (name text, embedding vector(1536), price float); -CREATE INDEX my_remote_index ON products USING pinecone (embedding, price) with (host = 'my-pinecone-index.pinecone.io'); +CREATE INDEX my_remote_index ON products USING remote (embedding, price) with (host = 'my-pinecone-index.pinecone.io'); -- [insert, update, and delete billions of records in products] SELECT * FROM products WHERE price < 40.0 ORDER BY embedding <-> '[...]' LIMIT 10; -- pinecone performs this query, including the price predicate @@ -86,11 +86,11 @@ ALTER DATABASE mydb SET pinecone.api_key = 'xxxxxxxx-xxxx-xxxx-xxxx–xxxxxxxxxx There are two ways to specify the pinecone index: - By providing the host of an existing pinecone index. For example, ```sql -CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io'); +CREATE INDEX my_remote_index ON products USING remote (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io'); ``` - By specifying the `spec` of the pinecone index. For example, ```sql -CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (spec = '"spec": { +CREATE INDEX my_remote_index ON products USING remote (embedding) with (spec = '"spec": { "serverless": { "region": "us-west-2", "cloud": "aws" From a46d8730fae97d601ddc3e99e9b035dd015f9087 Mon Sep 17 00:00:00 2001 From: lohith Date: Wed, 8 May 2024 08:34:14 +0000 Subject: [PATCH 2/4] Readme Changes to spec json for creating index --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 215fc23..51ad0b3 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ CREATE INDEX my_remote_index ON products USING remote (embedding) with (host = ' ``` - By specifying the `spec` of the pinecone index. For example, ```sql -CREATE INDEX my_remote_index ON products USING remote (embedding) with (spec = '"spec": { +CREATE INDEX my_remote_index ON products USING remote (embedding) with (spec = '{ "serverless": { "region": "us-west-2", "cloud": "aws" From 9a59dfc8846067af720de675aef32ef0c6c44e3c Mon Sep 17 00:00:00 2001 From: lohith Date: Sat, 18 May 2024 13:15:50 +0000 Subject: [PATCH 3/4] Added Implementation for pinecone_validate_host_schema --- src/remote/clients/pinecone/pinecone.c | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/remote/clients/pinecone/pinecone.c b/src/remote/clients/pinecone/pinecone.c index fd3f9bc..f3e4ff7 100644 --- a/src/remote/clients/pinecone/pinecone.c +++ b/src/remote/clients/pinecone/pinecone.c @@ -36,6 +36,48 @@ const char* vector_metric_to_pinecone_metric[VECTOR_METRIC_COUNT] = { "dotproduct" }; +int extract_dimension(const cJSON *root) { + const cJSON *dimension = cJSON_GetObjectItemCaseSensitive(root, "dimension"); + if (dimension == NULL || !cJSON_IsNumber(dimension)) { + fprintf(stderr, "Error: 'dimension' value not found or not a number.\n"); + return -1; + } + return dimension->valueint; +} + +char* get_index_name(char* host) { + const char* prefix = "pgvr-"; + const char* start; + const char* end; + char* name = NULL; + + // Find the prefix in the URL + start = strstr(host, prefix); + if (start == NULL) { + fprintf(stderr, "Prefix not found in URL.\n"); + return NULL; + } + + // Find the end of the name + end = start + strlen(prefix); + while (isdigit(*end)) { + end++; + } + + // Allocate memory for the name + name = (char*)malloc(end - start + 1); + if (name == NULL) { + fprintf(stderr, "Memory allocation failed.\n"); + return NULL; + } + + // Copy the name into the allocated memory + strncpy(name, start, end - start); + name[end - start] = '\0'; // Null-terminate the string + + return name; +} + void pinecone_spec_validator(const char* spec) { if (spec == NULL || cJSON_Parse(spec) == NULL) { ereport(ERROR, @@ -85,6 +127,7 @@ char* pinecone_create_host_from_spec(int dimensions, VectorMetric metric, char* // pgvr- char* pinecone_index_name = palloc(20); sprintf(pinecone_index_name, "pgvr-%u", index->rd_id); + elog(WARNING,"Pinecone api key passed is %s index name is %s", pinecone_api_key, pinecone_index_name); // TODO: remote index name create_response = remote_create_index(pinecone_api_key, pinecone_index_name, dimensions, remote_metric_name, spec_json); host = cJSON_GetStringValue(cJSON_GetObjectItemCaseSensitive(create_response, "host")); @@ -105,6 +148,12 @@ char* pinecone_create_host_from_spec(int dimensions, VectorMetric metric, char* // CREATE AND MISC void pinecone_validate_host_schema(char* host, int dimensions, VectorMetric metric, Relation index) { // TODO: check that the host's schema matches the table + char* index_name = get_index_name(host); + cJSON* index_details = describe_index( pinecone_api_key, index_name); + int pinecone_dimensions = extract_dimension(index_details); + if(pinecone_dimensions != dimensions){ + elog(ERROR, "Vector Dimension of the local table is %d but the dimension specified in the remote index is %d", dimensions, pinecone_dimensions); + } return; } @@ -289,6 +338,7 @@ bool pinecone_bulk_upsert(char* host, PreparedBulkInsert prepared_vectors, int } + RemoteIndexInterface pinecone_remote_index_interface = { // create index .create_host_from_spec = pinecone_create_host_from_spec, From 202c8cc0ac084afe968a472f8d99519b7a86e03f Mon Sep 17 00:00:00 2001 From: lohith Date: Sat, 18 May 2024 13:20:27 +0000 Subject: [PATCH 4/4] Removed Debug Statements --- src/remote/clients/pinecone/pinecone.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/remote/clients/pinecone/pinecone.c b/src/remote/clients/pinecone/pinecone.c index f3e4ff7..f08bb31 100644 --- a/src/remote/clients/pinecone/pinecone.c +++ b/src/remote/clients/pinecone/pinecone.c @@ -127,7 +127,6 @@ char* pinecone_create_host_from_spec(int dimensions, VectorMetric metric, char* // pgvr- char* pinecone_index_name = palloc(20); sprintf(pinecone_index_name, "pgvr-%u", index->rd_id); - elog(WARNING,"Pinecone api key passed is %s index name is %s", pinecone_api_key, pinecone_index_name); // TODO: remote index name create_response = remote_create_index(pinecone_api_key, pinecone_index_name, dimensions, remote_metric_name, spec_json); host = cJSON_GetStringValue(cJSON_GetObjectItemCaseSensitive(create_response, "host"));