NEI-LSR · hfeibes · Dec 11, 2025 · Dec 11, 2025
diff --git a/analysis/bin_compute_confidence.py b/analysis/bin_compute_confidence.py
@@ -13,85 +13,172 @@
 import numpy as np
 import os
 from datetime import datetime
+import matplotlib.pyplot as plt
 
-# Choose subject
-subject = 'w' # one of 'w', 'je', 'jo'
+# make sure text is saved in svgs as text, not path
+plt.rcParams['svg.fonttype'] = 'none'
+plt.rcParams['font.family'] = 'sans-serif'
+plt.rcParams['font.sans-serif'] = ['Helvetica']
+plt.rcParams['font.serif'] = ['Times']
+plt.rcParams['lines.linewidth'] = .75 * .4
 
-# Choose task
-tasks = ['Probe_4AFC', 'Train_2AFC_idtrials', 'Train_4AFC', 'Train_2AFC'] # options
-task = tasks[1] # which task to bin data for
 
+# Choose task
+probe_task = False
+nafc = [4]
 # Set directories
 data_dir = 'data/learning_data'
 out_dir = 'results/learning'
-# Load Data 
-subject_data_path = os.path.join(data_dir, subject + '_' + task + '.csv')
-subject_data = pd.read_csv(subject_data_path)
-
-# Get year of each trial for plotting later 
-try:
-    subject_data['year'] = [datetime.fromtimestamp(x/1000).strftime("%Y") for x in subject_data['timestamp']]
-except:
-    subject_data['year'] = '2016' # timestamp not currently in Train 2AFC id trial csvs, but all trials were in 2016
 
-# Split into color and shape trials on the basis of the choice
-# E.g, Probe_4AFC choose_shape means cued color, chose shape, but for Train_4AFC it means cued colored shape, chose shape
-choose_shape_trials = subject_data[subject_data['is_choice_color']==0].reset_index(drop=True) 
-choose_color_trials = subject_data[subject_data['is_choice_color']==1].reset_index(drop=True) 
-
-# Bin trials 
-# Define how many bins to use
-if task == 'Train_2AFC_idtrials':
-    n_in_bin = 50 # smaller than probe because many fewer trials 
-elif task == 'Train_2AFC':
-    n_in_bin = 75 # smaller than probe because many fewer trials 
+if probe_task:
+    bin_size = [1000]
+    task_names = ['_Probe_4AFC']
+    plot_size = (3.5, 2)
+    name = 'probe'
 else:
-    n_in_bin = 500
+    if len(nafc)==2:
+        bin_size = [200,1000]
+        task_names = ['_Train_2AFC', '_Train_4AFC']
+        plot_size = (2.5,2) #(3.5, 2)
+        name = 'train'
+    elif len(nafc) == 1 and nafc[0] == 4:
+        bin_size = [1000]
+        task_names = ['_Train_4AFC']
+        plot_size = (2.4,2)#(2.7 , 2)
+        name = 'train4afc'
+    else:
+        bin_size = [200]
+        task_names = ['_Train_2AFC']
+        plot_size = (.7,2)#(.8, 2)
+        name = 'train2afc'
+
+x = []
+y = []
+y_ci = []
+year_ticks = []
+for subject in ['w', 'je']:
+    subj_x = []
+    subj_y = []
+    subj_y_ci = []
+    subj_year_ticks = []
+    subj_years = []
+    for j, t in enumerate(task_names):
+        n_in_bin = bin_size[j]
+        subject_data = pd.read_csv(os.path.join(data_dir, subject + t + '.csv'))
+        # Get year of each trial for plotting later 
+
+
+        # Bin data and get nested list containing the outcome values (0 or 1) of all trials in that bin
+        binned_trials = [subject_data['chose_correct'][i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)]
+        ###
+        # counting sewssions in bins
+        sessions_ints = [int(x) for x in subject_data['days_from_20160101']]
+        sessions_binned = [sessions_ints[i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)]
+        id_bin = []
+        bin_how_many = []
+        for bb, b in enumerate(sessions_binned):
+            which_sessions = np.unique(b)
+            bin_how_many.append(which_sessions.shape[0])
+            id_bin.append(bb)
+
+        to_bev = pd.DataFrame({'bin num':id_bin, 'num sessions':bin_how_many})
+       # to_bev.to_csv('/home/ssbeast/Projects/HFTemp/ColorShapeContingency1/analysis/'+subject+'_sessions_bins.csv', index=False)
+        ###
+        # For each bin, bootstrap the accuracy 1000 times
+        n_boots = 1000
+        n_bins = len(binned_trials)
+        shape_color = np.zeros((3, n_bins)) # <metric, bin number>
+
+        for l, b in enumerate(binned_trials):
+            boot_accs = []
+            for i in range(n_boots):
+                sample = np.random.choice(b, size=n_in_bin) # array of 0s and 1s, resample to bin size
+                boot_accs.append(sample.mean()) # calculate accuracy for that sample of trials
+            boot_mean_acc = np.array(boot_accs).mean() # accuracy at trial l
+            boot_lcb = np.quantile(boot_accs, q=.025) # lower confidence bound of acccuracy at trial l
+            boot_ucb = np.quantile(boot_accs, q=.975) # upper confidence bound of accuracy at trial l
+            shape_color[0,l] = boot_mean_acc
+            shape_color[1,l] = boot_lcb
+            shape_color[2,l] = boot_ucb
+
+         # To preserve trial number as x axis, get trial number each bin would be centered on
+        x_vals = len(subject_data)
+        use_x = list(range(int(n_in_bin/2),x_vals+int(n_in_bin/2), n_in_bin))
+        use_x = use_x[:n_bins]
+
+        try:
+            subject_data['year'] = [datetime.fromtimestamp(x/1000).strftime("%Y") for x in subject_data['timestamp']]
+        except:
+            subject_data['year'] = '2016' # timestamp not currently in Train 2AFC id trial csvs, but all trials were in 2016
+        # For each bin, approximate which year most trials in that bin were completed in, for plotting later
+
+        if t == '_Train_2AFC':
+            year_bins, include_years = [0], ['2016']
+        else:
+            bin_year = [subject_data['year'][i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)] 
+            year_labels = [x.mode()[0] for x in bin_year] 
+            if len(year_labels) > n_bins: # if more years than bins, drop last, years are aligned to the first bin
+                year_labels = year_labels[:n_bins] 
+            # Only want to plot a year mark at the start of each year
+            year_changes = [i for i in range(len(year_labels)) if year_labels[i] != year_labels[i-1]] # which bins are year transitions
+            include_years = [year_labels[x] for x in year_changes] # keep those years
+            year_bins = [use_x[i] for i in year_changes] # get corresponding trial number (bin) values
+        if j == 1:
+            use_x = [x + subj_x[0][-1] for x in use_x]
+            year_bins = [x + subj_x[0][-1] for x in year_bins]
+            if include_years[0] == '2016':
+                include_years = include_years[1:]
+                year_bins = year_bins[1:]
+        subj_x.append(use_x)
+        subj_y.append(shape_color[0])
+        subj_y_ci.append(np.array([shape_color[1],shape_color[2]]).T)
+        subj_year_ticks.extend(year_bins)
+        subj_years.extend(include_years)
+    x.append(subj_x)
+    y.append(subj_y)
+    y_ci.append(subj_y_ci)
+    year_ticks.append([subj_year_ticks, subj_years])
+
+bin_data = [x,y]
+ci_data = [x, y_ci]
 
-# Bin data and get nested list containing the outcome values (0 or 1) of all trials in that bin
-binned_choose_shape = [choose_shape_trials['chose_correct'][i:i+n_in_bin] for i in range(0, len(choose_shape_trials), n_in_bin)]
-binned_choose_color = [choose_color_trials['chose_correct'][i:i+n_in_bin] for i in range(0, len(choose_color_trials), n_in_bin)]
+colors = ["black", "tab:gray"]#["#D95319", "tab:gray"]
+open_c = False
 
-# For each bin, approximate which year most trials in that bin were completed in, for plotting later
-bin_year = [choose_shape_trials['year'][i:i+n_in_bin] for i in range(0, len(choose_shape_trials), n_in_bin)] 
-bin_year_mode = [x.mode()[0] for x in bin_year] 
+fig, axs = plt.subplots(figsize = plot_size)
+for i in range(len(bin_data)):
+    for j in range(len(bin_data[0][i])):
+        if open_c:
+            axs.scatter(bin_data[0][i][j], bin_data[1][i][j], facecolor='none', edgecolor=colors[i], s=6, linewidth=.2, rasterized=False)
+        else:
+            axs.scatter(bin_data[0][i][j], bin_data[1][i][j], facecolor=colors[i], edgecolor=colors[i], s=6, linewidth=.5, rasterized=False)
+        if ci_data is not None:
+            axs.fill_between(ci_data[0][i][j], np.array(ci_data[1][i][j]).T[0], np.array(ci_data[1][i][j]).T[1], alpha=.2, color=colors[i], rasterized=False) # rasterize CIs else get svg rendering issues
+        yr_ax = axs.secondary_xaxis(location=0)
+        yr_ax.set_xticks(year_ticks[i][0],year_ticks[i][1],fontsize=7, color=colors[i], rotation=90)
+if np.max(bin_data[0][0][-1]) > 20000:
+    xtick = list(range(0, int(np.max(bin_data[0][0][-1])), 20000))
+elif np.max(bin_data[0][0][-1]) < 15000:
+    xtick = [0, 10000]
+else:
+    xtick = list(range(0, int(np.max(bin_data[0][0][-1])), 1000))
+axs.margins(.05)
 
-# Deal with last bins - may have few trials and one trial type may have one more bin than another
-n_bins = np.min([len(binned_choose_shape), len(binned_choose_color)]) # min n bins shared by both trial types
-if binned_choose_shape[n_bins-1].shape[0] < 10 or binned_choose_color[n_bins-1].shape[0] < 10:
-    n_bins = n_bins-1 # if either final bin has very few trials, don't include in plot
+if len(nafc)==2:
+    stop2 = np.max([np.max(bin_data[0][0][0]), np.max(bin_data[0][1][0])])
+    start4 = np.min([np.min(bin_data[0][0][1]), np.min(bin_data[0][1][1])])
+    stop4 = np.max([np.max(bin_data[0][0][1]), np.max(bin_data[0][1][1])])
 
-# For each bin, bootstrap the accuracy 1000 times
-n_boots = 1000
-shape_color = np.zeros((2, 3, n_bins)) # <trial type, metric, bin number>
-# For each trial type
-for t, trial_type in enumerate([binned_choose_shape, binned_choose_color]):
-    trial_type_binned = trial_type[:n_bins] 
-    # For each bin
-    for l, b in enumerate(trial_type_binned):
-        boot_accs = []
-        for i in range(n_boots):
-            sample = np.random.choice(b, size=n_in_bin) # array of 0s and 1s, resample to bin size
-            boot_accs.append(sample.mean()) # calculate accuracy for that sample of trials
-        boot_mean_acc = np.array(boot_accs).mean() # accuracy at trial l
-        boot_lcb = np.quantile(boot_accs, q=.025) # lower confidence bound of acccuracy at trial l
-        boot_ucb = np.quantile(boot_accs, q=.975) # upper confidence bound of accuracy at trial l
-        shape_color[t,0,l] = boot_mean_acc
-        shape_color[t,1,l] = boot_lcb
-        shape_color[t,2,l] = boot_ucb
+axs.tick_params(axis="both", length=2., pad=1)
+axs.tick_params(axis='x', pad=6)
 
-# To preserve trial number as x axis, get trial number each bin would be centered on
-x_vals = np.min([choose_shape_trials.shape[0],choose_color_trials.shape[0]])
-use_x = list(range(int(n_in_bin/2),x_vals+int(n_in_bin/2), n_in_bin))
-use_x = use_x[:n_bins]
+ytick = [.25,.5,1.]
+axs.set_yticks(ytick, labels=[str(yt) for yt in ytick], fontsize=7)
+axs.set_xticks(xtick)
+axs.set_xticklabels([int(xt/10000) for xt in xtick],fontsize=7)
 
-# Save out all accuracies, confidence intervals, x ticks, and years
-shape_accs = shape_color[0, 0, :]
-color_accs = shape_color[1, 0, :]
-shape_ci = shape_color[0, 1:,:].T
-color_ci = shape_color[1, 1:,:].T
-array_out_name = subject + '_'+task+'_learning_curve_data_'+str(n_boots)+'_all_binned.npz'
-out_path = os.path.join(out_dir, array_out_name)
-np.savez(out_path, color_x=use_x,color_accs=color_accs,
-         shape_x=use_x,shape_accs=shape_accs,color_i=use_x,
-         color_ci=color_ci,shape_i=use_x,shape_ci=shape_ci, bin_year = bin_year_mode)
+fig.tight_layout()
+#fig.savefig(os.path.join(out_dir, name + '_learning.svg'))
+plt.show()
+plt.close()
+
diff --git a/analysis/color_biased_regions.py b/analysis/color_biased_regions.py
@@ -10,12 +10,12 @@
 from bin import passive_task_functions as pf
 
 # Choose subject
-subject = 'jeeves' # one of 'wooster', 'jeeves'
-content_root = 'data' # where are the data stored
+subject = 'wooster' # one of 'wooster', 'jeeves'
+content_root = 'data' # where are the data stored change this
 subj_root = os.path.join(content_root, 'subjects', subject) # where is that subject's data
 
 # Set out directory
-outdir = 'results/passive' 
+outdir = 'results/passive'  change this
 
 # LOAD DATA KEYS pointing to nifti beta weight images for each condition on each run
 # for passive task 1 and for eccentricity 
@@ -103,7 +103,14 @@
 scp_betas = pf.load_betas(scp_beta_coeffs_key, 
                                  conditions_to_quant=['uncolored_shape', 'achromatic_shape', 'constant'], 
                                  content_root=content_root, subj_root=subj_root)
-
+scp_arrays = {}
+for i in range(len(scp_betas)):
+    namekey = scp_betas['run'][i] + '_' + scp_betas['condition'][i]
+    a=scp_betas['betas'][i]
+    scp_arrays[namekey] = a
+scpout = os.path.join('/mnt/isilon/PROJECTS/ColorShapeContingency1/data_files/univariate_data/save_out_betas', subject+'_scp_betas.npz')
+np.savez(scpout, **scp_arrays)
+
 # Path to subject's masked funcitonal target; used for getting brain mask
 ft_path = os.path.join(subj_root, 'mri', 'functional_target.nii.gz') 
 
@@ -143,4 +150,4 @@
 
 # Save file out
 color_minus_noncolor_out = os.path.join(outdir, subject+'_color_assoc_bias_colorbiased_minus_noncolorbiased.csv')
-color_minus_noncolor.to_csv(color_minus_noncolor_out, index=False)
+#color_minus_noncolor.to_csv(color_minus_noncolor_out, index=False)
diff --git a/analysis/compute_selectivity.py b/analysis/compute_selectivity.py
@@ -12,9 +12,9 @@
 import pandas as pd
 import nibabel as nib
 from bin import passive_task_functions as pf
-
+    
 # Choose subject
-subject = 'wooster' # one of 'wooster', 'jeeves'
+subject = 'jeeves' # one of 'wooster', 'jeeves'
 content_root = 'data' # where are the data stored
 subj_root = os.path.join(content_root, 'subjects', subject) # where is that subject's data
 
@@ -36,8 +36,6 @@
 ecc_mod_dir = os.path.join(subj_root, 'analysis', 'ecc')
 ecc_beta_coeffs_key = pd.read_csv(os.path.join(subj_root, 'analysis', 'eccentricity_mapper_beta_coeffs_key.csv'))
 
-
-
 # GENERATE ROI DEFINITIONS
 # Load subject's atlas parcels
 atlas_path = os.path.join(subj_root, 'rois', 'major_divisions', 'final_atlas.nii.gz')
@@ -122,6 +120,7 @@
 congruency_betas = pf.load_betas(congruency_beta_coeffs_key, 
                                  conditions_to_quant=['incongruent', 'congruent'], 
                                  content_root=content_root, subj_root=subj_root)
+
 incongruency_selectivity = pf.selectivity(congruency_betas,'incongruent', 'congruent', subdiv_rois_congruency)
 incongruency_selectivity = pd.DataFrame(incongruency_selectivity, columns = ['run', 'roi', 'effect'])
 incongruency_selectivity['comparison'] = 'incongruent_vs_congruent'