bigict · chungongyu · Sep 11, 2025 · Sep 10, 2025
diff --git a/data/tcr_pmhc_db/test.idx b/data/tcr_pmhc_db/test.idx
@@ -0,0 +1,103 @@
+1bd2_P
+1fo0_P
+1g6r_P
+1kj2_P
+1lp9_P
+1mwa_P
+1nam_P
+1qse_P
+1rc3_P
+2bnq_P
+2bnr_P
+2ckb_P
+2e7l_P
+2esv_P
+2f53_P
+2f54_P
+2gj6_P
+2nx5_P
+2oi9_P
+2ol3_P
+2p5e_P
+2p5w_P
+2pye_P
+2vlk_P
+2vlr_P
+2ypl_P
+3dxa_P
+3e3q_P
+3ffc_P
+3gsn_P
+3h9s_P
+3hg1_P
+3kpr_P
+3mv7_P
+3mv8_P
+3mv9_P
+3o4l_P
+3pqy_P
+3qdg_P
+3qdj_P
+3qdm_P
+3qeq_P
+3qfj_P
+3tfk_P
+3tjh_P
+3uts_P
+3utt_P
+3vxm_P
+3vxr_P
+3vxs_P
+3w0w_P
+4eup_P
+4g8g_P
+4jfd_P
+4jfe_P
+4jff_P
+4l3e_P
+4mji_P
+4mnq_P
+4mvb_P
+4mxq_P
+4prp_P
+4qok_P
+4qrp_P
+5brz_P
+5bs0_P
+5c07_P
+5c08_P
+5c09_P
+5c0a_P
+5c0b_P
+5c0c_P
+5d2n_P
+5e9d_P
+5eu6_P
+5euo_P
+5hhm_P
+5hho_P
+5hyj_P
+5ivx_P
+5jhd_P
+5m00_P
+5men_P
+5nme_P
+5til_P
+5wlg_P
+5xov_P
+5yxn_P
+6TRo_P
+6amu_P
+6bj3_P
+6dkp_P
+6eqb_P
+6g9q_P
+6l9l_P
+6mtm_P
+6q3s_P
+6rp9_P
+6tmo_P
+6vma_P
+7jwj_P
+7n1f_P
+7rm4_P
diff --git a/predict.sh b/predict.sh
@@ -40,30 +40,47 @@ if [ $# -eq 0 ]; then
   help 1
 fi
 
+############################
+echo "initialize db"
+############################
+db_dir=${CWD}/data/tcr_pmhc_db
+
+for c in "M" "P" "A" "B"; do
+  if [ ! -e ${data_dir}_${c}.fa ]; then
+    find ${db_dir}/fasta -name "*_${c}.fasta" -exec awk '$0!=""{print $0}' {} \; > ${db_dir}_${c}.fa;
+  fi
+done
+
+
 csv_file=$*
 
-# convert csv to fasta files
+############################
+echo "convert csv to fasta files"
+############################
 python ${CWD}/main.py csv_to_fasta \
     --target_uri "${output_dir}${output_params}" \
     --pid_prefix tcr_pmhc_test_ \
     --default_y=1.0 \
     --verbose \
     ${csv_file}
 
-# make chain.idx
+############################
+echo "make chain.idx"
+############################
 cat ${output_dir}/mapping.idx_all | \
   cut -f2 | \
   awk -F _ '{printf("%s",$1);for (i=2;i<NF;++i) printf("_%s", $i); printf(" %s\n", $NF);}' | \
   sort -T . | \
   awk -f ${CWD}/scripts/collapse.awk  > ${output_dir}/chain.idx_all
 
-# filter out ones that has only one chain
-#   1. load dict a (in test dataset) from attr.idx_all
-#   2. filter out those that:
-#      i.  has no peptide
-#      ii. only have peptide & MHC and in dict a
-#      iii.has only one chain
-#
+############################
+echo "filter out ones that has only one chain"
+echo "  1. load dict a (in test dataset) from attr.idx_all"
+echo "  2. filter out those that:"
+echo "     i.  has no peptide"
+echo "     ii. only have peptide & MHC and in dict a"
+echo "     iii.has only one chain"
+############################
 cat ${output_dir}/chain.idx_all | \
   awk -v attr_idx=${output_dir}/attr.idx_all 'BEGIN{
       while(getline<attr_idx) {
@@ -84,7 +101,9 @@ cat ${output_dir}/chain.idx_all | \
         print $0;
     }' > ${output_dir}/chain.idx_all_blacklist
 
-# make attr.idx for test fold_i
+############################
+echo "make attr.idx"
+############################
 cat ${output_dir}/attr.idx_all | \
   awk -v blacklist=${output_dir}/chain.idx_all_blacklist 'BEGIN{
       a["xxxxxxxx"] = 1;
@@ -102,7 +121,9 @@ python ${CWD}/main.py attr_update_weight_and_task \
     --weight 1.0 \
     data/tcr_pmhc_db/attr.idx  >> ${output_dir}/attr.idx
 
-# build the dataset (test data included) mapping.idx and chain.idx
+############################
+echo "build the dataset: mapping.idx and chain.idx"
+############################
 cat ${CWD}/data/tcr_pmhc_db/mapping.idx ${output_dir}/mapping.idx_all > ${output_dir}/mapping.idx
 cat ${output_dir}/mapping.idx | \
   cut -f2 | \
@@ -111,7 +132,9 @@ cat ${output_dir}/mapping.idx | \
   awk -f ${CWD}/scripts/collapse.awk  > ${output_dir}/chain.idx
 
 
-# build fasta for each chain
+############################
+echo "build fasta for each chain"
+############################
 for c in "A" "B" "P" "M"; do
   python ${CWD}/main.py fasta_extract \
       --target_uri ${output_dir} \
@@ -123,14 +146,18 @@ for c in "A" "B" "P" "M"; do
   fi
 done
 
-# align A B M with jackhmmer
+############################
+echo "align chains A, B and M with jackhmmer"
+############################
 for c in "A" "B" "M"; do
   find ${CWD}/data/tcr_pmhc_db/fasta -name "*_${c}.fasta" > ${output_dir}/tcr_pmhc_db_${c}
   cat ${output_dir}/tcr_pmhc_db_${c} | ${CWD}/bin/mapred -m "uniref90_db=${output_dir}/tcr_pmhc_${c}.fa mgnify_db=${CWD}/data/tcr_pmhc_db_${c}.fa sh ${CWD}/scripts/run_jackhmmer.sh -o ${output_dir}/a3m" -c 10
   cat ${output_dir}/tcr_pmhc_db_${c} | ${CWD}/bin/mapred -m "PIPELINE_UNIREF_MAX_HITS=1000000 PIPELINE_MGNIFY_MAX_HITS=1000000 PIPELINE_DEDUPLICATE=0 sh ${CWD}/scripts/run_pipeline.sh -o ${output_dir}/a3m" -c 10
 done
 
-# align P with equal length
+############################
+echo "align chain P with equal length"
+############################
 python ${CWD}/main.py peptide_align \
   --output_dir ${output_dir}/a3m \
   --target_db ${output_dir}/tcr_pmhc_P.fa \
@@ -144,19 +171,23 @@ for c in "P"; do
 done
 
 # filter a3m with threshold=t
+############################
+echo "filter a3m (MHC): align_ratio>=${mhc_align_ratio_threshold}"
+############################
 if [ -d ${output_dir}/var ]; then
   rm -rf ${output_dir}/var
 fi
 
-echo "filter a3m (MHC): align_ratio>=${mhc_align_ratio_threshold}"
 cp -r ${output_dir}/a3m ${output_dir}/var
 python ${CWD}/main.py a3m_filter \
     --output_dir ${output_dir}/var \
     --aligned_ratio_threshold ${mhc_align_ratio_threshold} \
     --trim_gap \
     ${CWD}/data/tcr_pmhc_db/fasta/*_M.fasta
 
+############################
 echo "predict ${csv_file}"
+############################
 python main.py predict \
     ${model_args} \
     --output_dir ${output_dir}/pred \

diff --git a/profold2 b/profold2
-Original file line number
+Diff line change
@@ -0,0 +1,103 @@
+bd2_P
+fo0_P
+g6r_P
+kj2_P
+lp9_P
+mwa_P
+nam_P
+qse_P
+rc3_P
+bnq_P
+bnr_P
+ckb_P
+e7l_P
+esv_P
+f53_P
+f54_P
+gj6_P
+nx5_P
+oi9_P
+ol3_P
+p5e_P
+p5w_P
+pye_P
+vlk_P
+vlr_P
+ypl_P
+dxa_P
+e3q_P
+ffc_P
+gsn_P
+h9s_P
+hg1_P
+kpr_P
+mv7_P
+mv8_P
+mv9_P
+o4l_P
+pqy_P
+qdg_P
+qdj_P
+qdm_P
+qeq_P
+qfj_P
+tfk_P
+tjh_P
+uts_P
+utt_P
+vxm_P
+vxr_P
+vxs_P
+w0w_P
+eup_P
+g8g_P
+jfd_P
+jfe_P
+jff_P
+l3e_P
+mji_P
+mnq_P
+mvb_P
+mxq_P
+prp_P
+qok_P
+qrp_P
+brz_P
+bs0_P
+c07_P
+c08_P
+c09_P
+c0a_P
+c0b_P
+c0c_P
+d2n_P
+e9d_P
+eu6_P
+euo_P
+hhm_P
+hho_P
+hyj_P
+ivx_P
+jhd_P
+m00_P
+men_P
+nme_P
+til_P
+wlg_P
+xov_P
+yxn_P
+TRo_P
+amu_P
+bj3_P
+dkp_P
+eqb_P
+g9q_P
+l9l_P
+mtm_P
+q3s_P
+rp9_P
+tmo_P
+vma_P
+jwj_P
+n1f_P
+rm4_P
+0 −1		.dockerignore
+38 −0		.github/ISSUE_TEMPLATE/bug_report.md
+20 −0		.github/ISSUE_TEMPLATE/feature_request.md
+18 −0		.github/workflows/docker-image.yml
+11 −11		README.md
+0 −355		data_maker.py
+11 −22		docker/Dockerfile
+0 −42		docker/openmm.patch
+2 −0		docker/requirements.txt
+69 −51		docker/run_docker.py
+19 −8		examples/evaluate.job
+20 −8		examples/predict.job
+20 −8		examples/train.job
+32 −4		install_env.sh
+2 −66		main.py
+7 −13		profold2/command/evaluator.py
+79 −0		profold2/command/main.py
+3 −2		profold2/command/predictor.py
+0 −0		profold2/command/relaxer.py
+1 −18		profold2/command/trainer.py
+45 −19		profold2/command/worker.py
+6 −2		profold2/common/protein.py
+805 −285		profold2/common/residue_constants.py
+337 −113		profold2/data/dataset.py
+2 −2		profold2/data/mmcif_parsing.py
+7 −7		profold2/data/pipeline.py
+1 −1		profold2/data/tools/hhblits.py
+1 −1		profold2/data/tools/hhsearch.py
+1 −1		profold2/data/tools/hmmbuild.py
+1 −1		profold2/data/tools/hmmsearch.py
+1 −1		profold2/data/tools/jackhmmer.py
+1 −1		profold2/data/tools/kalign.py
+64 −10		profold2/data/utils.py
+121 −81		profold2/model/alphafold2.py
+173 −71		profold2/model/commons.py
+194 −103		profold2/model/evoformer.py
+56 −170		profold2/model/features.py
+30 −29		profold2/model/folding.py
+165 −41		profold2/model/functional.py
+187 −116		profold2/model/head.py
+6 −14		profold2/model/kernel/builder.py
+17 −6		profold2/relax/amber_minimize.py
+49 −53		profold2/tools/energy.py
+23 −719		profold2/utils.py
+7 −5		requirements.txt
+0 −7		serving.properties
+0 −179		serving_client.py
+0 −102		serving_handler.py
+0 −45		serving_model.py
+0 −29		serving_workflow.py
+0 −18		serving_workflow.yaml
+15 −2		setup.py
+0 −148		tests/test_utils.py
+0 −196		tools/ab_add_plddt.py
+83 −0		tools/bprna_eval.py
+2 −2		tools/contact_plot.py
+144 −14		tools/dataset.py
+140 −48		tools/dataset_from_pdb.py
+0 −108		tools/domain_split.py
+357 −0		tools/fr3d_interaction_to_basepair.py
+144 −0		tools/model_ops.py
+0 −556		tools/msa_select.py
+22 −20		tools/pdb_convert.py
+271 −0		tools/pdb_extract_comp_id.py
+94 −80		web/app.py
+31 −18		web/db.py
+9 −5		web/form.py
+88 −56		web/task.py
+12 −13		web/utils.py