Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ with the power of remote vector databases, by introducing a new remote vector in

```sql
CREATE TABLE products (name text, embedding vector(1536), price float);
CREATE INDEX my_remote_index ON products USING pinecone (embedding, price) with (host = 'my-pinecone-index.pinecone.io');
CREATE INDEX my_remote_index ON products USING remote (embedding, price) with (host = 'my-pinecone-index.pinecone.io');
-- [insert, update, and delete billions of records in products]
SELECT * FROM products WHERE price < 40.0 ORDER BY embedding <-> '[...]' LIMIT 10; -- pinecone performs this query, including the price predicate

Expand Down Expand Up @@ -86,11 +86,11 @@ ALTER DATABASE mydb SET pinecone.api_key = 'xxxxxxxx-xxxx-xxxx-xxxx–xxxxxxxxxx
There are two ways to specify the pinecone index:
- By providing the host of an existing pinecone index. For example,
```sql
CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io');
CREATE INDEX my_remote_index ON products USING remote (embedding) with (host = 'example-23kshha.svc.us-east-1-aws.pinecone.io');
```
- By specifying the `spec` of the pinecone index. For example,
```sql
CREATE INDEX my_remote_index ON products USING pinecone (embedding) with (spec = '"spec": {
CREATE INDEX my_remote_index ON products USING remote (embedding) with (spec = '{
"serverless": {
"region": "us-west-2",
"cloud": "aws"
Expand Down
49 changes: 49 additions & 0 deletions src/remote/clients/pinecone/pinecone.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,48 @@ const char* vector_metric_to_pinecone_metric[VECTOR_METRIC_COUNT] = {
"dotproduct"
};

int extract_dimension(const cJSON *root) {
const cJSON *dimension = cJSON_GetObjectItemCaseSensitive(root, "dimension");
if (dimension == NULL || !cJSON_IsNumber(dimension)) {
fprintf(stderr, "Error: 'dimension' value not found or not a number.\n");
return -1;
}
return dimension->valueint;
}

char* get_index_name(char* host) {
const char* prefix = "pgvr-";
const char* start;
const char* end;
char* name = NULL;

// Find the prefix in the URL
start = strstr(host, prefix);
if (start == NULL) {
fprintf(stderr, "Prefix not found in URL.\n");
return NULL;
}

// Find the end of the name
end = start + strlen(prefix);
while (isdigit(*end)) {
end++;
}

// Allocate memory for the name
name = (char*)malloc(end - start + 1);
if (name == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
return NULL;
}

// Copy the name into the allocated memory
strncpy(name, start, end - start);
name[end - start] = '\0'; // Null-terminate the string

return name;
}

void pinecone_spec_validator(const char* spec) {
if (spec == NULL || cJSON_Parse(spec) == NULL) {
ereport(ERROR,
Expand Down Expand Up @@ -105,6 +147,12 @@ char* pinecone_create_host_from_spec(int dimensions, VectorMetric metric, char*
// CREATE AND MISC
void pinecone_validate_host_schema(char* host, int dimensions, VectorMetric metric, Relation index) {
// TODO: check that the host's schema matches the table
char* index_name = get_index_name(host);
cJSON* index_details = describe_index( pinecone_api_key, index_name);
int pinecone_dimensions = extract_dimension(index_details);
if(pinecone_dimensions != dimensions){
elog(ERROR, "Vector Dimension of the local table is %d but the dimension specified in the remote index is %d", dimensions, pinecone_dimensions);
}
return;
}

Expand Down Expand Up @@ -289,6 +337,7 @@ bool pinecone_bulk_upsert(char* host, PreparedBulkInsert prepared_vectors, int
}



RemoteIndexInterface pinecone_remote_index_interface = {
// create index
.create_host_from_spec = pinecone_create_host_from_spec,
Expand Down