Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 155 additions & 68 deletions analysis/bin_compute_confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,85 +13,172 @@
import numpy as np
import os
from datetime import datetime
import matplotlib.pyplot as plt

# Choose subject
subject = 'w' # one of 'w', 'je', 'jo'
# make sure text is saved in svgs as text, not path
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica']
plt.rcParams['font.serif'] = ['Times']
plt.rcParams['lines.linewidth'] = .75 * .4

# Choose task
tasks = ['Probe_4AFC', 'Train_2AFC_idtrials', 'Train_4AFC', 'Train_2AFC'] # options
task = tasks[1] # which task to bin data for

# Choose task
probe_task = False
nafc = [4]
# Set directories
data_dir = 'data/learning_data'
out_dir = 'results/learning'
# Load Data
subject_data_path = os.path.join(data_dir, subject + '_' + task + '.csv')
subject_data = pd.read_csv(subject_data_path)

# Get year of each trial for plotting later
try:
subject_data['year'] = [datetime.fromtimestamp(x/1000).strftime("%Y") for x in subject_data['timestamp']]
except:
subject_data['year'] = '2016' # timestamp not currently in Train 2AFC id trial csvs, but all trials were in 2016

# Split into color and shape trials on the basis of the choice
# E.g, Probe_4AFC choose_shape means cued color, chose shape, but for Train_4AFC it means cued colored shape, chose shape
choose_shape_trials = subject_data[subject_data['is_choice_color']==0].reset_index(drop=True)
choose_color_trials = subject_data[subject_data['is_choice_color']==1].reset_index(drop=True)

# Bin trials
# Define how many bins to use
if task == 'Train_2AFC_idtrials':
n_in_bin = 50 # smaller than probe because many fewer trials
elif task == 'Train_2AFC':
n_in_bin = 75 # smaller than probe because many fewer trials
if probe_task:
bin_size = [1000]
task_names = ['_Probe_4AFC']
plot_size = (3.5, 2)
name = 'probe'
else:
n_in_bin = 500
if len(nafc)==2:
bin_size = [200,1000]
task_names = ['_Train_2AFC', '_Train_4AFC']
plot_size = (2.5,2) #(3.5, 2)
name = 'train'
elif len(nafc) == 1 and nafc[0] == 4:
bin_size = [1000]
task_names = ['_Train_4AFC']
plot_size = (2.4,2)#(2.7 , 2)
name = 'train4afc'
else:
bin_size = [200]
task_names = ['_Train_2AFC']
plot_size = (.7,2)#(.8, 2)
name = 'train2afc'

x = []
y = []
y_ci = []
year_ticks = []
for subject in ['w', 'je']:
subj_x = []
subj_y = []
subj_y_ci = []
subj_year_ticks = []
subj_years = []
for j, t in enumerate(task_names):
n_in_bin = bin_size[j]
subject_data = pd.read_csv(os.path.join(data_dir, subject + t + '.csv'))
# Get year of each trial for plotting later


# Bin data and get nested list containing the outcome values (0 or 1) of all trials in that bin
binned_trials = [subject_data['chose_correct'][i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)]
###
# counting sewssions in bins
sessions_ints = [int(x) for x in subject_data['days_from_20160101']]
sessions_binned = [sessions_ints[i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)]
id_bin = []
bin_how_many = []
for bb, b in enumerate(sessions_binned):
which_sessions = np.unique(b)
bin_how_many.append(which_sessions.shape[0])
id_bin.append(bb)

to_bev = pd.DataFrame({'bin num':id_bin, 'num sessions':bin_how_many})
# to_bev.to_csv('/home/ssbeast/Projects/HFTemp/ColorShapeContingency1/analysis/'+subject+'_sessions_bins.csv', index=False)
###
# For each bin, bootstrap the accuracy 1000 times
n_boots = 1000
n_bins = len(binned_trials)
shape_color = np.zeros((3, n_bins)) # <metric, bin number>

for l, b in enumerate(binned_trials):
boot_accs = []
for i in range(n_boots):
sample = np.random.choice(b, size=n_in_bin) # array of 0s and 1s, resample to bin size
boot_accs.append(sample.mean()) # calculate accuracy for that sample of trials
boot_mean_acc = np.array(boot_accs).mean() # accuracy at trial l
boot_lcb = np.quantile(boot_accs, q=.025) # lower confidence bound of acccuracy at trial l
boot_ucb = np.quantile(boot_accs, q=.975) # upper confidence bound of accuracy at trial l
shape_color[0,l] = boot_mean_acc
shape_color[1,l] = boot_lcb
shape_color[2,l] = boot_ucb

# To preserve trial number as x axis, get trial number each bin would be centered on
x_vals = len(subject_data)
use_x = list(range(int(n_in_bin/2),x_vals+int(n_in_bin/2), n_in_bin))
use_x = use_x[:n_bins]

try:
subject_data['year'] = [datetime.fromtimestamp(x/1000).strftime("%Y") for x in subject_data['timestamp']]
except:
subject_data['year'] = '2016' # timestamp not currently in Train 2AFC id trial csvs, but all trials were in 2016
# For each bin, approximate which year most trials in that bin were completed in, for plotting later

if t == '_Train_2AFC':
year_bins, include_years = [0], ['2016']
else:
bin_year = [subject_data['year'][i:i+n_in_bin] for i in range(0, len(subject_data), n_in_bin)]
year_labels = [x.mode()[0] for x in bin_year]
if len(year_labels) > n_bins: # if more years than bins, drop last, years are aligned to the first bin
year_labels = year_labels[:n_bins]
# Only want to plot a year mark at the start of each year
year_changes = [i for i in range(len(year_labels)) if year_labels[i] != year_labels[i-1]] # which bins are year transitions
include_years = [year_labels[x] for x in year_changes] # keep those years
year_bins = [use_x[i] for i in year_changes] # get corresponding trial number (bin) values
if j == 1:
use_x = [x + subj_x[0][-1] for x in use_x]
year_bins = [x + subj_x[0][-1] for x in year_bins]
if include_years[0] == '2016':
include_years = include_years[1:]
year_bins = year_bins[1:]
subj_x.append(use_x)
subj_y.append(shape_color[0])
subj_y_ci.append(np.array([shape_color[1],shape_color[2]]).T)
subj_year_ticks.extend(year_bins)
subj_years.extend(include_years)
x.append(subj_x)
y.append(subj_y)
y_ci.append(subj_y_ci)
year_ticks.append([subj_year_ticks, subj_years])

bin_data = [x,y]
ci_data = [x, y_ci]

# Bin data and get nested list containing the outcome values (0 or 1) of all trials in that bin
binned_choose_shape = [choose_shape_trials['chose_correct'][i:i+n_in_bin] for i in range(0, len(choose_shape_trials), n_in_bin)]
binned_choose_color = [choose_color_trials['chose_correct'][i:i+n_in_bin] for i in range(0, len(choose_color_trials), n_in_bin)]
colors = ["black", "tab:gray"]#["#D95319", "tab:gray"]
open_c = False

# For each bin, approximate which year most trials in that bin were completed in, for plotting later
bin_year = [choose_shape_trials['year'][i:i+n_in_bin] for i in range(0, len(choose_shape_trials), n_in_bin)]
bin_year_mode = [x.mode()[0] for x in bin_year]
fig, axs = plt.subplots(figsize = plot_size)
for i in range(len(bin_data)):
for j in range(len(bin_data[0][i])):
if open_c:
axs.scatter(bin_data[0][i][j], bin_data[1][i][j], facecolor='none', edgecolor=colors[i], s=6, linewidth=.2, rasterized=False)
else:
axs.scatter(bin_data[0][i][j], bin_data[1][i][j], facecolor=colors[i], edgecolor=colors[i], s=6, linewidth=.5, rasterized=False)
if ci_data is not None:
axs.fill_between(ci_data[0][i][j], np.array(ci_data[1][i][j]).T[0], np.array(ci_data[1][i][j]).T[1], alpha=.2, color=colors[i], rasterized=False) # rasterize CIs else get svg rendering issues
yr_ax = axs.secondary_xaxis(location=0)
yr_ax.set_xticks(year_ticks[i][0],year_ticks[i][1],fontsize=7, color=colors[i], rotation=90)
if np.max(bin_data[0][0][-1]) > 20000:
xtick = list(range(0, int(np.max(bin_data[0][0][-1])), 20000))
elif np.max(bin_data[0][0][-1]) < 15000:
xtick = [0, 10000]
else:
xtick = list(range(0, int(np.max(bin_data[0][0][-1])), 1000))
axs.margins(.05)

# Deal with last bins - may have few trials and one trial type may have one more bin than another
n_bins = np.min([len(binned_choose_shape), len(binned_choose_color)]) # min n bins shared by both trial types
if binned_choose_shape[n_bins-1].shape[0] < 10 or binned_choose_color[n_bins-1].shape[0] < 10:
n_bins = n_bins-1 # if either final bin has very few trials, don't include in plot
if len(nafc)==2:
stop2 = np.max([np.max(bin_data[0][0][0]), np.max(bin_data[0][1][0])])
start4 = np.min([np.min(bin_data[0][0][1]), np.min(bin_data[0][1][1])])
stop4 = np.max([np.max(bin_data[0][0][1]), np.max(bin_data[0][1][1])])

# For each bin, bootstrap the accuracy 1000 times
n_boots = 1000
shape_color = np.zeros((2, 3, n_bins)) # <trial type, metric, bin number>
# For each trial type
for t, trial_type in enumerate([binned_choose_shape, binned_choose_color]):
trial_type_binned = trial_type[:n_bins]
# For each bin
for l, b in enumerate(trial_type_binned):
boot_accs = []
for i in range(n_boots):
sample = np.random.choice(b, size=n_in_bin) # array of 0s and 1s, resample to bin size
boot_accs.append(sample.mean()) # calculate accuracy for that sample of trials
boot_mean_acc = np.array(boot_accs).mean() # accuracy at trial l
boot_lcb = np.quantile(boot_accs, q=.025) # lower confidence bound of acccuracy at trial l
boot_ucb = np.quantile(boot_accs, q=.975) # upper confidence bound of accuracy at trial l
shape_color[t,0,l] = boot_mean_acc
shape_color[t,1,l] = boot_lcb
shape_color[t,2,l] = boot_ucb
axs.tick_params(axis="both", length=2., pad=1)
axs.tick_params(axis='x', pad=6)

# To preserve trial number as x axis, get trial number each bin would be centered on
x_vals = np.min([choose_shape_trials.shape[0],choose_color_trials.shape[0]])
use_x = list(range(int(n_in_bin/2),x_vals+int(n_in_bin/2), n_in_bin))
use_x = use_x[:n_bins]
ytick = [.25,.5,1.]
axs.set_yticks(ytick, labels=[str(yt) for yt in ytick], fontsize=7)
axs.set_xticks(xtick)
axs.set_xticklabels([int(xt/10000) for xt in xtick],fontsize=7)

# Save out all accuracies, confidence intervals, x ticks, and years
shape_accs = shape_color[0, 0, :]
color_accs = shape_color[1, 0, :]
shape_ci = shape_color[0, 1:,:].T
color_ci = shape_color[1, 1:,:].T
array_out_name = subject + '_'+task+'_learning_curve_data_'+str(n_boots)+'_all_binned.npz'
out_path = os.path.join(out_dir, array_out_name)
np.savez(out_path, color_x=use_x,color_accs=color_accs,
shape_x=use_x,shape_accs=shape_accs,color_i=use_x,
color_ci=color_ci,shape_i=use_x,shape_ci=shape_ci, bin_year = bin_year_mode)
fig.tight_layout()
#fig.savefig(os.path.join(out_dir, name + '_learning.svg'))
plt.show()
plt.close()

17 changes: 12 additions & 5 deletions analysis/color_biased_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from bin import passive_task_functions as pf

# Choose subject
subject = 'jeeves' # one of 'wooster', 'jeeves'
content_root = 'data' # where are the data stored
subject = 'wooster' # one of 'wooster', 'jeeves'
content_root = 'data' # where are the data stored change this
subj_root = os.path.join(content_root, 'subjects', subject) # where is that subject's data

# Set out directory
outdir = 'results/passive'
outdir = 'results/passive' change this

# LOAD DATA KEYS pointing to nifti beta weight images for each condition on each run
# for passive task 1 and for eccentricity
Expand Down Expand Up @@ -103,7 +103,14 @@
scp_betas = pf.load_betas(scp_beta_coeffs_key,
conditions_to_quant=['uncolored_shape', 'achromatic_shape', 'constant'],
content_root=content_root, subj_root=subj_root)

scp_arrays = {}
for i in range(len(scp_betas)):
namekey = scp_betas['run'][i] + '_' + scp_betas['condition'][i]
a=scp_betas['betas'][i]
scp_arrays[namekey] = a
scpout = os.path.join('/mnt/isilon/PROJECTS/ColorShapeContingency1/data_files/univariate_data/save_out_betas', subject+'_scp_betas.npz')
np.savez(scpout, **scp_arrays)

# Path to subject's masked funcitonal target; used for getting brain mask
ft_path = os.path.join(subj_root, 'mri', 'functional_target.nii.gz')

Expand Down Expand Up @@ -143,4 +150,4 @@

# Save file out
color_minus_noncolor_out = os.path.join(outdir, subject+'_color_assoc_bias_colorbiased_minus_noncolorbiased.csv')
color_minus_noncolor.to_csv(color_minus_noncolor_out, index=False)
#color_minus_noncolor.to_csv(color_minus_noncolor_out, index=False)
7 changes: 3 additions & 4 deletions analysis/compute_selectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import pandas as pd
import nibabel as nib
from bin import passive_task_functions as pf

# Choose subject
subject = 'wooster' # one of 'wooster', 'jeeves'
subject = 'jeeves' # one of 'wooster', 'jeeves'
content_root = 'data' # where are the data stored
subj_root = os.path.join(content_root, 'subjects', subject) # where is that subject's data

Expand All @@ -36,8 +36,6 @@
ecc_mod_dir = os.path.join(subj_root, 'analysis', 'ecc')
ecc_beta_coeffs_key = pd.read_csv(os.path.join(subj_root, 'analysis', 'eccentricity_mapper_beta_coeffs_key.csv'))



# GENERATE ROI DEFINITIONS
# Load subject's atlas parcels
atlas_path = os.path.join(subj_root, 'rois', 'major_divisions', 'final_atlas.nii.gz')
Expand Down Expand Up @@ -122,6 +120,7 @@
congruency_betas = pf.load_betas(congruency_beta_coeffs_key,
conditions_to_quant=['incongruent', 'congruent'],
content_root=content_root, subj_root=subj_root)

incongruency_selectivity = pf.selectivity(congruency_betas,'incongruent', 'congruent', subdiv_rois_congruency)
incongruency_selectivity = pd.DataFrame(incongruency_selectivity, columns = ['run', 'roi', 'effect'])
incongruency_selectivity['comparison'] = 'incongruent_vs_congruent'
Expand Down
Loading