Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
8106af3
Bumps backend Python image version to 3.12
falquaddoomi Nov 26, 2025
c8be231
Adds a few more data-loading, API backend deps
falquaddoomi Nov 26, 2025
426712e
Adds memcached service, changes backend data dir mountpoint
falquaddoomi Nov 26, 2025
d10fa5a
Maps db-exports into db container, adds dump-db.sh utility script
falquaddoomi Nov 26, 2025
906019f
Ignores dumpfiles in /db-exports by default
falquaddoomi Dec 3, 2025
6df9106
Implements the backend in one giant commit, sorry about that...
falquaddoomi Dec 12, 2025
fab05d4
DB now loads latest dump from ./db-exports, uses custom post-init loa…
falquaddoomi Dec 12, 2025
e88def8
run_stack.sh acquires database dump from bucket if local copy is old …
falquaddoomi Dec 12, 2025
9d699ef
Updates frontend api endpoints, types to match backend (WIP)
falquaddoomi Dec 12, 2025
c202430
Adds databases observed in data, but not in the mockup
falquaddoomi Dec 12, 2025
270230e
Debounces autocomplete backend query
falquaddoomi Dec 12, 2025
2b41db3
Updates Search field name refs to match what the backend returns
falquaddoomi Dec 12, 2025
62b47f5
Updates Cart field refs to match the backend
falquaddoomi Dec 12, 2025
001c23b
Displays the onto ID in the autocomplete results, searches by term na…
falquaddoomi Dec 12, 2025
10b3808
Disables the mocked responses(?)
falquaddoomi Dec 12, 2025
4cff9e1
frontend fixes
vincerubinetti Dec 14, 2025
1728add
fix autocomplete debounce
vincerubinetti Dec 14, 2025
4cd1d72
fix debounce, fix samples table, remove type remapping
vincerubinetti Jan 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,7 @@ __marimo__/

# MacOS stuff
.DS_Store

# project-specific ignores
# ignore dumpfiles that aren't explicitly checked in
/db-exports/*.dump
3 changes: 1 addition & 2 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Use Python 3.10 slim image as base
FROM python:3.10-slim
FROM python:3.12-slim

# Set working directory
WORKDIR /app
Expand Down
6 changes: 6 additions & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@ dependencies = [
"django-rest-framework>=0.1.0",
"drf-nested-routers>=0.95.0",
"gunicorn>=23.0.0",
"pandas>=2.3.3",
"tqdm>=4.67.1",
"pgpq>=0.9.0",
"psycopg[binary]>=3.2.10",
"django-filter>=25.2",
"python-memcached>=1.62",
"pymemcache>=4.0.0",
"duckdb>=1.4.2",
]

# [build-system]
Expand Down
Empty file added backend/src/api/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions backend/src/api/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.contrib import admin

from api.models import Organism, Platform, Sample, Series, SeriesRelations

admin.site.register(Organism)
admin.site.register(Platform)
admin.site.register(Sample)
admin.site.register(Series)
admin.site.register(SeriesRelations)
6 changes: 6 additions & 0 deletions backend/src/api/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ApiConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "api"
Empty file.
Empty file.
80 changes: 80 additions & 0 deletions backend/src/api/management/commands/bulk_import_search_parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import io
from pathlib import Path

from django.core.management.base import BaseCommand
from django.db import connection, transaction

import pyarrow.parquet as pq
import pyarrow.csv as pacsv
import pyarrow.compute as pc

from api.models import SearchTerm

class Command(BaseCommand):
help = "Import SearchTerm rows from a Parquet file using PostgreSQL COPY"

def add_arguments(self, parser):
parser.add_argument("parquet_path", help="Path to meta2onto_example_predictions.parquet")
parser.add_argument(
"--table",
default=SearchTerm._meta.db_table,
help="Target DB table name (default: SearchTerm._meta.db_table)",
)

def handle(self, *args, **options):
parquet_path = Path(options["parquet_path"])
table_name = options["table"]

# first, truncate SearchTerm
self.stdout.write(f"Truncating table {table_name} ...")
with connection.cursor() as cursor:
cursor.execute(f"TRUNCATE TABLE {table_name} RESTART IDENTITY CASCADE;")
self.stdout.write(self.style.SUCCESS(f"Table {table_name} truncated."))

# 1) Read Parquet
self.stdout.write(f"Reading Parquet file: {parquet_path}")
table = pq.read_table(parquet_path)

# 2) Select and rename columns to match DB schema
# Parquet: term, ID, prob, log2(prob/prior), related_words
# DB: term, sample_id, prob, log2_prob_prior, related_words
table = table.select(["term", "ID", "prob", "log2(prob/prior)", "related_words"])
table = table.rename_columns(
["term", "sample_id", "prob", "log2_prob_prior", "related_words"]
)

# # If ID might be missing / null, ensure it's numeric or null
# # (Arrow usually infers this correctly; adjust if needed)
# if table["sample_id"].type not in (pc.field("dummy", pc.int64()).type,):
# # Try to cast to int64; errors='ignore' will produce nulls for bad values
# table = table.set_column(
# table.schema.get_field_index("sample_id"),
# "sample_id",
# pc.cast(table["sample_id"], pc.int64()),
# )

# 3) Write to an in-memory CSV with header
self.stdout.write("Converting Arrow table to CSV in memory...")
buf = io.BytesIO()
pacsv.write_csv(table, buf)
buf.seek(0)

# 4) COPY into PostgreSQL using psycopg3's copy()
self.stdout.write(f"Copying into table {table_name} ...")
cols = ["term", "sample_id", "prob", "log2_prob_prior", "related_words"]
copy_sql = f"""
COPY {table_name} ({", ".join(cols)})
FROM STDIN WITH (FORMAT csv, HEADER true)
"""

# buf is bytes; psycopg3 Copy.write() accepts bytes for text/binary COPY
with transaction.atomic():
with connection.cursor() as cursor:
with cursor.copy(copy_sql) as copy: # <-- psycopg3 API
while True:
chunk = buf.read(1024 * 1024) # 1 MB chunks
if not chunk:
break
copy.write(chunk)

self.stdout.write(self.style.SUCCESS("Import completed successfully."))
Loading