Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5d8102f
updating values files for new format and new graphs
EvanDietzMorris Feb 6, 2026
91d3c62
adding graph metadata and schema files to charts
EvanDietzMorris Feb 6, 2026
920ccfe
adding ohd values
EvanDietzMorris Feb 6, 2026
28fe7a3
updating bl version
EvanDietzMorris Feb 6, 2026
4a8190c
updating robokopkg resources closer to standalone instance
EvanDietzMorris Feb 6, 2026
5a826a1
adding cebs
EvanDietzMorris Feb 6, 2026
c45fa15
adding new files to cebs
EvanDietzMorris Feb 6, 2026
5886044
fixing robokopkg memory allocation
EvanDietzMorris Feb 6, 2026
98c1456
increasing storage size for ubergraph
EvanDietzMorris Feb 6, 2026
6543d3d
increasing storage size for ubergraph (again)
EvanDietzMorris Feb 6, 2026
b69b55e
new reactome graph
EvanDietzMorris Feb 8, 2026
74ed192
graceful fallback for missing metadata files
EvanDietzMorris Feb 8, 2026
035f4bc
updated memgraph deployment to ingest 3 cypher files
hyi Feb 10, 2026
59778a0
refactored memgraph data ingestion script to work through some issues
hyi Feb 11, 2026
f84190d
updates trying to get memgraph data ingestion working
hyi Feb 13, 2026
fb78331
memgraph edge loading performance is now acceptable for large graphs
hyi Feb 16, 2026
d49f4bd
added graph query timeout for memgraph for benchmarking
hyi Feb 16, 2026
7da3cb8
fixed a few bugs with memgraph data ingestion script
hyi Feb 17, 2026
e4bb139
updated values for benchmarking neo4j vs memgraph
hyi Feb 17, 2026
073c3ff
set logging level to error for memgraph in serving mode
hyi Feb 17, 2026
af9694d
disabling default snapshotting for memgraph to save disk space
hyi Feb 17, 2026
2e5cefd
disable snapshotting for memgraph in serving mode in another condition
hyi Feb 17, 2026
1dc34a7
disabling snapshotting has to go with disabling WAL
hyi Feb 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions helm/plater/non-itrb-values/cebs/values-data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.6/CEBS_Automat/6a77f858e1b9852c/graph_6a77f858e1b9852c.db.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.6/CEBS_Automat/6a77f858e1b9852c/CEBS_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.6/CEBS_Automat/6a77f858e1b9852c/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.6/CEBS_Automat/6a77f858e1b9852c/testing_data.json
Binary file not shown.
10 changes: 8 additions & 2 deletions helm/plater/non-itrb-values/clinical-trials-kp/values-data.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/neo4j.dump
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/neo4j.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/CTKP_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/testing_data.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/CTKP_Automat/3a11bc62b676c6aa/testing_data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/neo4j.dump
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/neo4j.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/EHR_Clinical_Connections_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/testing_data.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/EHR_Clinical_Connections_Automat/613242929fd651b8/testing_data.json
10 changes: 8 additions & 2 deletions helm/plater/non-itrb-values/ehr-may-treat-kp/values-data.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/graph_e8ecb00fe2898f82.db.dump
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/graph_e8ecb00fe2898f82.db.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/MayTreatKP_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/testing_data.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MayTreatKP_Automat/e8ecb00fe2898f82/testing_data.json
8 changes: 7 additions & 1 deletion helm/plater/non-itrb-values/molepro/values-data.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MolePro_Automat/c8f64284fbeb81a9/graph_c8f64284fbeb81a9.db.dump
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MolePro_Automat/c8f64284fbeb81a9/graph_c8f64284fbeb81a9.db.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MolePro_Automat/c8f64284fbeb81a9/MolePro_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.1/MolePro_Automat/c8f64284fbeb81a9/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/MolePro_Automat/c8f64284fbeb81a9/testing_data.json
14 changes: 10 additions & 4 deletions helm/plater/non-itrb-values/textminingkp/values-data.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.1/TMKP_Automat/2ae17047aa1b5f3c/graph_2ae17047aa1b5f3c.db.dump
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.1/TMKP_Automat/2ae17047aa1b5f3c/TMKP_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.1/TMKP_Automat/2ae17047aa1b5f3c/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.1/TMKP_Automat/2ae17047aa1b5f3c/testing_data.json
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/neo4j.dump
memgraph:
dataUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/memgraph.cypher
graphMetadataUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/graph-metadata.json
schemaUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/schema.json
metadataUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/TMKP_Automat.meta.json
metaKGUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/graphs/TMKP_Automat/c6e319504bba0301/testing_data.json
Binary file modified helm/plater/non-itrb-values/textminingkp/values-populated.yaml
Binary file not shown.
10 changes: 8 additions & 2 deletions helm/plater/non-itrb-values/yobokop/values-data.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
dataUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/graph_40614106c036f010.db.dump
app:
neo4j:
dataUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/graph_40614106c036f010.db.dump
memgraph:
dataUrl: ""
graphMetadataUrl: ""
schemaUrl: ""
metadataUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/YobokopKG.meta.json
metaKGUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/meta_knowledge_graph.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/testing_data.json
testingDataUrl: https://stars.renci.org/var/plater/bl-4.2.0/YobokopKG/40614106c036f010/testing_data.json
206 changes: 192 additions & 14 deletions helm/plater/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,204 @@ data:
wget -O /data/neo4j.dump ${NEO4J_DATA_URL}
touch /data/done

run_memgraph.sh: |-
download_data_memgraph.sh: |-
#!/bin/sh
set -e
DATA_DIR="/data/memgraph"

# If snapshot already exists, skip ingestion
if [ ! -f /data/done ]; then
echo "Downloading Memgraph ingestion files..."
curl -L -o /data/memgraph_nodes.cypher ${MEMGRAPH_DATA_URL}_nodes.cypher
curl -L -o /data/memgraph_indexes.cypher ${MEMGRAPH_DATA_URL}_indexes.cypher

curl -L -o /data/edges_manifest.txt "${MEMGRAPH_DATA_URL}_edges_manifest.txt"
echo "Downloading all edge files in parallel..."
while IFS= read -r filename; do
curl -L -o "/data/${filename}" "${MEMGRAPH_DATA_URL}_${filename}" &
done < /data/edges_manifest.txt
wait
touch /data/done
else
echo "Files have been previously downloaded already"
fi

run_memgraph.sh: |-
#!/bin/bash
set -euo pipefail

DATA_DIR="/data/memgraph"
BATCH_SIZE=${BATCH_SIZE}
QUERY_TIMEOUT=${GRAPH_QUERY_TIMEOUT}

echo "Starting Memgraph..."
echo "Cleaning stream metadata directory..."
rm -rf $DATA_DIR/streams/*

# If snapshot already exists, skip ingestion
if [ -d "$DATA_DIR/snapshots" ] && [ "$(ls -A $DATA_DIR/snapshots 2>/dev/null)" ]; then
echo "Existing snapshot detected. Starting Memgraph in serving mode..."
exec /usr/lib/memgraph/memgraph \
--bolt-port=7687 \
--bolt-address=0.0.0.0 \
--storage-mode=IN_MEMORY_TRANSACTIONAL \
--storage-wal-enabled=false \
--storage-snapshot-interval-sec=0 \
--query-execution-timeout-sec=$QUERY_TIMEOUT \
--data-directory="$DATA_DIR"
fi

echo "Starting Memgraph in BULK INGEST mode..."

/usr/lib/memgraph/memgraph \
--bolt-port=7687 \
--bolt-address=0.0.0.0 \
--log-level=INFO \
--also-log-to-stderr \
&
--storage-mode=IN_MEMORY_ANALYTICAL \
--storage-snapshot-interval-sec=0 \
--query-execution-timeout-sec=0 \
--storage-wal-enabled=false \
--log-level=ERROR \
--memory-limit=0 \
--data-directory="$DATA_DIR" &

MEMGRAPH_PID=$!

echo "Waiting for Memgraph to accept connections...${MEMGRAPH_PID}"
echo "Waiting for Memgraph to accept connections..."
until echo "RETURN 1;" | /usr/bin/mgconsole --host localhost --port 7687 >/dev/null 2>&1; do
sleep 1
sleep 2
done

echo "Loading Memgraph nodes..."
/usr/bin/mgconsole --host localhost --port 7687 < /data/memgraph_nodes.cypher

echo "Waiting for storage to settle before creating indexes..."
sleep 5
echo "All nodes are loaded. Creating indexes (with retry)..."
until /usr/bin/mgconsole --host localhost --port 7687 < /data/memgraph_indexes.cypher; do
echo "Index creation failed, retrying in 5s..."
sleep 5
done

if [ -f /data/memgraph.cypher ]; then
echo "Loading Memgraph dump..."
/usr/bin/mgconsole --host localhost --port 7687 < /data/memgraph.cypher
echo "Memgraph dump loaded"
load_edge_file() {
filename="$1"
base=$(basename "$filename" .csv)
marker="/tmp/edge_${base}"
relpart=$(echo "$base" | sed 's/^edges_//')
reltype=$(echo "$relpart" | sed 's/_/:/')
echo "Starting $filename as relationship type $reltype"

if printf '
LOAD CSV FROM "/data/%s"
WITH HEADER DELIMITER "\t" AS row
CALL {
WITH row
MATCH (a:`biolink:NamedThing` {id: row.subject}),
(b:`biolink:NamedThing` {id: row.object})
CREATE (a)-[r:`%s`]->(b)
SET r += row
SET r.category = split(row.category, "\u001F")
FOREACH (_ IN CASE WHEN row.publications <> "" THEN [1] ELSE [] END |
SET r.publications = split(row.publications, "\u001F")
)
FOREACH (_ IN CASE WHEN row.has_evidence <> "" THEN [1] ELSE [] END |
SET r.has_evidence = split(row.has_evidence, "\u001F")
)
FOREACH (_ IN CASE WHEN row.anatomical_context_qualifier <> "" THEN [1] ELSE [] END |
SET r.anatomical_context_qualifier = split(row.anatomical_context_qualifier, "\u001F")
)
REMOVE r.subject, r.object, r.predicate
} IN TRANSACTIONS OF %s ROWS;
' "$filename" "$reltype" "$BATCH_SIZE" \
| /usr/bin/mgconsole --host localhost --port 7687
then
echo "Finished ${filename}"
touch "${marker}.done"
else
echo "No memgraph.cypher found, skipping load"
echo "FAILED $filename"
touch "${marker}.failed"
fi
}

echo "Loading edges in parallel..."

rm -f /tmp/edge_*.done /tmp/edge_*.failed

while IFS= read -r filename; do
load_edge_file "$filename" &
done < /data/edges_manifest.txt

expected=$(wc -l < /data/edges_manifest.txt)
echo "Waiting for $expected edge files to complete..."

while true; do
done_count=$(find /tmp -maxdepth 1 -name 'edge_*.done' | wc -l)
fail_count=$(find /tmp -maxdepth 1 -name 'edge_*.failed' | wc -l)
total=$((done_count + fail_count))

echo "Completed: $done_count, Failed: $fail_count, Total: $total / $expected"

if [ "$total" -eq "$expected" ]; then
break
fi

sleep 5
done

if [ "$fail_count" -ne 0 ]; then
echo "One or more edge loads failed. Aborting."
kill -TERM "$MEMGRAPH_PID"
wait "$MEMGRAPH_PID"
exit 1
fi

wait ${MEMGRAPH_PID}
echo "All edge loads completed successfully."

echo "Waiting for Memgraph to finish ingestion transactions..."

while true; do
active=$(echo "SHOW TRANSACTIONS;" | \
/usr/bin/mgconsole --host localhost --port 7687 | \
grep -c "transaction_id")

if [ "$active" -le 1 ]; then
break
fi

echo "Still $active active transactions..."
sleep 5
done

echo "Checking for active transactions..."
echo "SHOW TRANSACTIONS;" | /usr/bin/mgconsole --host localhost --port 7687

echo "Checking storage info before snapshot..."
echo "SHOW STORAGE INFO;" | /usr/bin/mgconsole --host localhost --port 7687
echo "Creating persistent snapshot (this may take a while for large graph)..."
echo "CREATE SNAPSHOT;" | /usr/bin/mgconsole --host localhost --port 7687

SNAP_STATUS=$?
echo "Snapshot exit code: $SNAP_STATUS"

if [ "$SNAP_STATUS" -ne 0 ]; then
echo "Snapshot failed!"
kill -TERM "$MEMGRAPH_PID"
wait "$MEMGRAPH_PID"
exit 1
fi

echo "Switching to serving mode..."

kill -TERM "$MEMGRAPH_PID"
wait "$MEMGRAPH_PID"

exec /usr/lib/memgraph/memgraph \
--bolt-port=7687 \
--bolt-address=0.0.0.0 \
--storage-mode=IN_MEMORY_TRANSACTIONAL \
--storage-wal-enabled=false \
--storage-snapshot-interval-sec=0 \
--log-level=ERROR \
--query-execution-timeout-sec=$QUERY_TIMEOUT \
--data-directory="$DATA_DIR"

health_check.sh: |-
#!/usr/bin/env bash
Expand All @@ -66,6 +237,11 @@ data:
RC=0

function download() {
if [ -z "$2" ]; then
echo "SKIPPING $1 (no URL provided), writing empty JSON file"
echo '{}' > "$LOCAL_DESTINATION/$1"
return 0
fi
CMD="wget -O $LOCAL_DESTINATION/$1 $2"
echo "RUNNING: $CMD"
$CMD
Expand All @@ -76,7 +252,9 @@ data:

download metadata.json ${PLATER_METADATA_URL} && \
download meta_knowledge_graph.json ${PLATER_METAKG_URL} && \
download sri_testing_data.json ${PLATER_TEST_DATA_URL}
download sri_testing_data.json ${PLATER_TEST_DATA_URL} && \
download graph_metadata.json ${PLATER_GRAPH_METADATA_URL} && \
download schema.json ${PLATER_SCHEMA_URL}
RC=$?
if [[ $RC == 0 ]]; then
break;
Expand Down
2 changes: 2 additions & 0 deletions helm/plater/templates/env-config-map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ data:
PLATER_TITLE: {{ .Release.Name }}
PLATER_VERSION: {{ .Values.image.plater.tag }}
PLATER_METADATA_URL: "{{ .Values.metadataUrl }}"
PLATER_GRAPH_METADATA_URL: "{{ .Values.graphMetadataUrl }}"
PLATER_SCHEMA_URL: "{{ .Values.schemaUrl }}"
PLATER_METAKG_URL: "{{ .Values.metaKGUrl }}"
PLATER_TEST_DATA_URL: "{{ .Values.testingDataUrl }}"
PROVENANCE_TAG: "{{ .Values.datasetDesc.provenanceTag }}"
Expand Down
Loading