diff --git a/data/tcr_pmhc_db/test.idx b/data/tcr_pmhc_db/test.idx
new file mode 100644
index 0000000..7b9a348
--- /dev/null
+++ b/data/tcr_pmhc_db/test.idx
@@ -0,0 +1,103 @@
+1bd2_P
+1fo0_P
+1g6r_P
+1kj2_P
+1lp9_P
+1mwa_P
+1nam_P
+1qse_P
+1rc3_P
+2bnq_P
+2bnr_P
+2ckb_P
+2e7l_P
+2esv_P
+2f53_P
+2f54_P
+2gj6_P
+2nx5_P
+2oi9_P
+2ol3_P
+2p5e_P
+2p5w_P
+2pye_P
+2vlk_P
+2vlr_P
+2ypl_P
+3dxa_P
+3e3q_P
+3ffc_P
+3gsn_P
+3h9s_P
+3hg1_P
+3kpr_P
+3mv7_P
+3mv8_P
+3mv9_P
+3o4l_P
+3pqy_P
+3qdg_P
+3qdj_P
+3qdm_P
+3qeq_P
+3qfj_P
+3tfk_P
+3tjh_P
+3uts_P
+3utt_P
+3vxm_P
+3vxr_P
+3vxs_P
+3w0w_P
+4eup_P
+4g8g_P
+4jfd_P
+4jfe_P
+4jff_P
+4l3e_P
+4mji_P
+4mnq_P
+4mvb_P
+4mxq_P
+4prp_P
+4qok_P
+4qrp_P
+5brz_P
+5bs0_P
+5c07_P
+5c08_P
+5c09_P
+5c0a_P
+5c0b_P
+5c0c_P
+5d2n_P
+5e9d_P
+5eu6_P
+5euo_P
+5hhm_P
+5hho_P
+5hyj_P
+5ivx_P
+5jhd_P
+5m00_P
+5men_P
+5nme_P
+5til_P
+5wlg_P
+5xov_P
+5yxn_P
+6TRo_P
+6amu_P
+6bj3_P
+6dkp_P
+6eqb_P
+6g9q_P
+6l9l_P
+6mtm_P
+6q3s_P
+6rp9_P
+6tmo_P
+6vma_P
+7jwj_P
+7n1f_P
+7rm4_P
diff --git a/predict.sh b/predict.sh
index eff6da9..34465b7 100755
--- a/predict.sh
+++ b/predict.sh
@@ -40,9 +40,23 @@ if [ $# -eq 0 ]; then
   help 1
 fi
 
+############################
+echo "initialize db"
+############################
+db_dir=${CWD}/data/tcr_pmhc_db
+
+for c in "M" "P" "A" "B"; do
+  if [ ! -e ${data_dir}_${c}.fa ]; then
+    find ${db_dir}/fasta -name "*_${c}.fasta" -exec awk '$0!=""{print $0}' {} \; > ${db_dir}_${c}.fa;
+  fi
+done
+
+
 csv_file=$*
 
-# convert csv to fasta files
+############################
+echo "convert csv to fasta files"
+############################
 python ${CWD}/main.py csv_to_fasta \
     --target_uri "${output_dir}${output_params}" \
     --pid_prefix tcr_pmhc_test_ \
@@ -50,20 +64,23 @@ python ${CWD}/main.py csv_to_fasta \
     --verbose \
     ${csv_file}
 
-# make chain.idx
+############################
+echo "make chain.idx"
+############################
 cat ${output_dir}/mapping.idx_all | \
   cut -f2 | \
   awk -F _ '{printf("%s",$1);for (i=2;i<NF;++i) printf("_%s", $i); printf(" %s\n", $NF);}' | \
   sort -T . | \
   awk -f ${CWD}/scripts/collapse.awk  > ${output_dir}/chain.idx_all
 
-# filter out ones that has only one chain
-#   1. load dict a (in test dataset) from attr.idx_all
-#   2. filter out those that:
-#      i.  has no peptide
-#      ii. only have peptide & MHC and in dict a
-#      iii.has only one chain
-#
+############################
+echo "filter out ones that has only one chain"
+echo "  1. load dict a (in test dataset) from attr.idx_all"
+echo "  2. filter out those that:"
+echo "     i.  has no peptide"
+echo "     ii. only have peptide & MHC and in dict a"
+echo "     iii.has only one chain"
+############################
 cat ${output_dir}/chain.idx_all | \
   awk -v attr_idx=${output_dir}/attr.idx_all 'BEGIN{
       while(getline<attr_idx) {
@@ -84,7 +101,9 @@ cat ${output_dir}/chain.idx_all | \
         print $0;
     }' > ${output_dir}/chain.idx_all_blacklist
 
-# make attr.idx for test fold_i
+############################
+echo "make attr.idx"
+############################
 cat ${output_dir}/attr.idx_all | \
   awk -v blacklist=${output_dir}/chain.idx_all_blacklist 'BEGIN{
       a["xxxxxxxx"] = 1;
@@ -102,7 +121,9 @@ python ${CWD}/main.py attr_update_weight_and_task \
     --weight 1.0 \
     data/tcr_pmhc_db/attr.idx  >> ${output_dir}/attr.idx
 
-# build the dataset (test data included) mapping.idx and chain.idx
+############################
+echo "build the dataset: mapping.idx and chain.idx"
+############################
 cat ${CWD}/data/tcr_pmhc_db/mapping.idx ${output_dir}/mapping.idx_all > ${output_dir}/mapping.idx
 cat ${output_dir}/mapping.idx | \
   cut -f2 | \
@@ -111,7 +132,9 @@ cat ${output_dir}/mapping.idx | \
   awk -f ${CWD}/scripts/collapse.awk  > ${output_dir}/chain.idx
 
 
-# build fasta for each chain
+############################
+echo "build fasta for each chain"
+############################
 for c in "A" "B" "P" "M"; do
   python ${CWD}/main.py fasta_extract \
       --target_uri ${output_dir} \
@@ -123,14 +146,18 @@ for c in "A" "B" "P" "M"; do
   fi
 done
 
-# align A B M with jackhmmer
+############################
+echo "align chains A, B and M with jackhmmer"
+############################
 for c in "A" "B" "M"; do
   find ${CWD}/data/tcr_pmhc_db/fasta -name "*_${c}.fasta" > ${output_dir}/tcr_pmhc_db_${c}
   cat ${output_dir}/tcr_pmhc_db_${c} | ${CWD}/bin/mapred -m "uniref90_db=${output_dir}/tcr_pmhc_${c}.fa mgnify_db=${CWD}/data/tcr_pmhc_db_${c}.fa sh ${CWD}/scripts/run_jackhmmer.sh -o ${output_dir}/a3m" -c 10
   cat ${output_dir}/tcr_pmhc_db_${c} | ${CWD}/bin/mapred -m "PIPELINE_UNIREF_MAX_HITS=1000000 PIPELINE_MGNIFY_MAX_HITS=1000000 PIPELINE_DEDUPLICATE=0 sh ${CWD}/scripts/run_pipeline.sh -o ${output_dir}/a3m" -c 10
 done
 
-# align P with equal length
+############################
+echo "align chain P with equal length"
+############################
 python ${CWD}/main.py peptide_align \
   --output_dir ${output_dir}/a3m \
   --target_db ${output_dir}/tcr_pmhc_P.fa \
@@ -144,11 +171,13 @@ for c in "P"; do
 done
 
 # filter a3m with threshold=t
+############################
+echo "filter a3m (MHC): align_ratio>=${mhc_align_ratio_threshold}"
+############################
 if [ -d ${output_dir}/var ]; then
   rm -rf ${output_dir}/var
 fi
 
-echo "filter a3m (MHC): align_ratio>=${mhc_align_ratio_threshold}"
 cp -r ${output_dir}/a3m ${output_dir}/var
 python ${CWD}/main.py a3m_filter \
     --output_dir ${output_dir}/var \
@@ -156,7 +185,9 @@ python ${CWD}/main.py a3m_filter \
     --trim_gap \
     ${CWD}/data/tcr_pmhc_db/fasta/*_M.fasta
 
+############################
 echo "predict ${csv_file}"
+############################
 python main.py predict \
     ${model_args} \
     --output_dir ${output_dir}/pred \
diff --git a/profold2 b/profold2
index d41d31e..8d908aa 160000
--- a/profold2
+++ b/profold2
@@ -1 +1 @@
-Subproject commit d41d31e173a547630669e34bcd4cf95c6bfca3db
+Subproject commit 8d908aa477f0ed8cbbfa46f36b25321b8a483382