diff --git a/draw.py b/draw.py index 025c577..964fd9d 100644 --- a/draw.py +++ b/draw.py @@ -26,7 +26,7 @@ def plot_data_list(wrong_files, wrong_data, figure_dir): except Exception: print("failed to create folder to store figures") return - for i in xrange(len(wrong_files)): + for i in range(len(wrong_files)): filename = wrong_files[i] f = os.path.join(figure_dir, filename.strip('/').replace("/", "-") + ".png") plot_data(wrong_data[i], f, filename[filename.rfind('/')+1:]) @@ -37,14 +37,14 @@ def plot_data(data, filename, title): base_num = len(ALL_BASES) cycles = len(data)/base_num percents = {} - for b in xrange(base_num): - percents[ALL_BASES[b]]=[ 0.0 for c in xrange(cycles)] + for b in range(base_num): + percents[ALL_BASES[b]]=[ 0.0 for c in range(cycles)] - for c in xrange(cycles): + for c in range(cycles): total = 0 - for b in xrange(base_num): + for b in range(base_num): total += data[c * base_num + b] - for b in xrange(base_num): + for b in range(base_num): percents[ALL_BASES[b]][c] = float(data[c * base_num + b]) / float(total) x = range(1, cycles+1) @@ -80,7 +80,7 @@ def plot_benchmark(scores_arr, algorithms_arr, filename): plt.ylim(0.97, 1.001) plt.ylabel('Score', size=16, color='#333333') plt.xlabel('Validation pass (sorted by score)', size=16, color='#333333') - for i in xrange(len(scores_arr)): + for i in range(len(scores_arr)): plt.plot(x, scores_arr[i], color = colors[i%5], label=algorithms_arr[i], alpha=0.5, linewidth=2, linestyle = linestyles[i%3]) plt.legend(loc='lower left') plt.savefig(filename) diff --git a/predict.py b/predict.py index 1744714..4d252b6 100755 --- a/predict.py +++ b/predict.py @@ -60,15 +60,14 @@ def load_model(options): print("Error: the model file not found: " + options.model_file) sys.exit(1) f = open(filename, "rb") - model = pickle.load(f) + if sys.version_info.major >2: + model = pickle.load(f, encoding='latin1') + else: + model = pickle.load(f) f.close() return model def main(): - if sys.version_info.major >2: - print('python3 is not supported yet, please use python2') - sys.exit(1) - (options, args) = parseCommand() data, samples = preprocess(options) @@ -76,10 +75,14 @@ def main(): model = load_model(options) labels = model.predict(data) - - for i in xrange(len(samples)): - if options.quite == False or (labels[i] == 0 and "cfdna" in samples[i].lower()) or (labels[i] == 1 and "cfdna" not in samples[i].lower()): - print(get_type_name(labels[i]) + ": " + samples[i]) + if sys.version_info.major >2: + for i in range(len(samples)): + if options.quite == False or (labels[i] == 0 and "cfdna" in samples[i].lower()) or (labels[i] == 1 and "cfdna" not in samples[i].lower()): + print(get_type_name(labels[i]) + ": " + samples[i]) + else: + for i in xrange(len(samples)): + if options.quite == False or (labels[i] == 0 and "cfdna" in samples[i].lower()) or (labels[i] == 1 and "cfdna" not in samples[i].lower()): + print(get_type_name(labels[i]) + ": " + samples[i]) plot_data_list(samples, data, "predict_fig") diff --git a/train.py b/train.py index eb29f0c..382274d 100755 --- a/train.py +++ b/train.py @@ -135,15 +135,26 @@ def bootstrap_split(data, label, samples): validation_set["data"] = [] validation_set["label"] = [] validation_set["samples"] = [] - for i in xrange(total_num): - if i in training_ids: - training_set["data"].append(data[i]) - training_set["label"].append(label[i]) - training_set["samples"].append(samples[i]) - else: - validation_set["data"].append(data[i]) - validation_set["label"].append(label[i]) - validation_set["samples"].append(samples[i]) + if sys.version_info.major >2: + for i in range(total_num): + if i in training_ids: + training_set["data"].append(data[i]) + training_set["label"].append(label[i]) + training_set["samples"].append(samples[i]) + else: + validation_set["data"].append(data[i]) + validation_set["label"].append(label[i]) + validation_set["samples"].append(samples[i]) + else: + for i in xrange(total_num): + if i in training_ids: + training_set["data"].append(data[i]) + training_set["label"].append(label[i]) + training_set["samples"].append(samples[i]) + else: + validation_set["data"].append(data[i]) + validation_set["label"].append(label[i]) + validation_set["samples"].append(samples[i]) return training_set, validation_set @@ -153,7 +164,7 @@ def train(model, data, label, samples, options, benchmark = False): scores = [] wrong_files = [] wrong_data = [] - for i in xrange(options.passes): + for i in range(options.passes): print(str(i+1) + " / " + str(options.passes)); training_set, validation_set = bootstrap_split(data, label, samples) model.fit(np.array(training_set["data"]), np.array(training_set["label"])) @@ -166,7 +177,7 @@ def train(model, data, label, samples, options, benchmark = False): # get wrongly predicted elements arr = np.array(validation_set["data"]) - for v in xrange(len(validation_set["data"])): + for v in range(len(validation_set["data"])): result = model.predict(arr[v:v+1]) if result[0] != validation_set["label"][v]: #print("Truth: " + str(validation_set["label"][v]) + ", predicted: " + str(result[0]) + ": " + validation_set["samples"][v]) @@ -238,10 +249,10 @@ def main(): GaussianNB(), svm.SVC(kernel='rbf')] scores_arr = [] - for m in xrange(len(models)): + for m in range(len(models)): print("\nbenchmark with: " + names[m]) scores_arr.append(train(models[m], data, label, samples, options, True)) - for m in xrange(len(models)): + for m in range(len(models)): print("\nbenchmark mean score with: " + names[m] + " mean " + str(np.mean(scores_arr[m])) + ", std " + str(np.std(scores_arr[m]))) print("\nploting benchmark result...") plot_benchmark(scores_arr, names, "benchmark.png")