brain-score · yarikoptic · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/brainscore_vision/benchmarks/ferguson2024/benchmark.py b/brainscore_vision/benchmarks/ferguson2024/benchmark.py
@@ -123,7 +123,7 @@ def get_integral_data(assembly: BehavioralAssembly, experiment: str, precompute_
     :param assembly: the human behavioral data to look at
     :param experiment: str, the prefix of the experiment subtype, ex: "tilted_line" or "lle"
     :param precompute_boostrap: True if using precomputed integral errors, else manually compute (Slow!)
-    :return: tuple of calculated human integral and its boostrapped (precomputed) error
+    :return: tuple of calculated human integral and its bootstrapped (precomputed) error
     """
     lapse_rate = LAPSE_RATES[experiment]
     blue_data = generate_summary_df(assembly, lapse_rate, "first")

diff --git a/brainscore_vision/benchmarks/ferguson2024/helpers/helpers.py b/brainscore_vision/benchmarks/ferguson2024/helpers/helpers.py
@@ -130,7 +130,7 @@ def calculate_accuracy(df: BehavioralAssembly, lapse_rate: float) -> float:
     """
     - Calculates a per-subject lapse rate-corrected accuracy for an assembly.
     - Subject accuracy is averaged over all images with a certain distractor size and repetition coords (i.e. these
-      coords are mixed togather and the accuracy is calculated over this merged assembly).
+      coords are mixed together and the accuracy is calculated over this merged assembly).
 
     :param df: DataFrame Object that contains experimental data
     :param lapse_rate: a precomputed float defined above that represents avg. subject lapse rate in experiment
@@ -214,7 +214,7 @@ def boostrap_integral(df_blue: DataFrame, df_orange: DataFrame, num_loops: int =
 
     :param df_blue: DataFrame, the first (blue) block of data (target on a field of distractors)
     :param df_orange: DataFrame, the second (orange) block of data (distractor on a field of targets)
-    :param num_loops: int, number of times the boostrap will run (and thus take the average)
+    :param num_loops: int, number of times the bootstrap will run (and thus take the average)
     :return: Dict of values {bootstrapped_integral, bootstrapped_integral_error)
     """
     num_subjects = len(set(df_blue["participant_id"]))

diff --git a/brainscore_vision/benchmarks/hebart2023/benchmark.py b/brainscore_vision/benchmarks/hebart2023/benchmark.py
@@ -62,7 +62,7 @@ def __call__(self, candidate: BrainModel):
 
         # Score the model
         # We chose not to compute error estimates but you could compute them
-        # by spliting the data into five folds and computing the standard deviation.
+        # by splitting the data into five folds and computing the standard deviation.
         correct_choices = choices.values == self._assembly.coords["image_3"].values  # third image is always correct
         raw_score = np.sum(correct_choices) / len(choices['presentation'])
         score = (raw_score - 1 / 3) / (self.ceiling - 1 / 3)

diff --git a/brainscore_vision/benchmarks/scialom2024/test.py b/brainscore_vision/benchmarks/scialom2024/test.py
@@ -99,7 +99,7 @@ def test_dataset_ceiling(self, dataset, expected_ceiling):
         ('segments-59', approx(0.12072, abs=0.001)),
         ('segments-77', approx(0.12996, abs=0.001)),
         ('segments-100', approx(0.11540, abs=0.001)),  # all of the above are AccuracyDistance
-        ('phosphenes-all', approx(0.18057, abs=0.01)),  # alls are ErrorConsistency
+        ('phosphenes-all', approx(0.18057, abs=0.01)),  # all are ErrorConsistency
         ('segments-all', approx(0.15181, abs=0.01)),
     ])
     def test_model_8_degrees(self, dataset, expected_raw_score):

diff --git a/brainscore_vision/data/david2004/data_packaging/lib/DataHash_20160618/DataHash.m b/brainscore_vision/data/david2004/data_packaging/lib/DataHash_20160618/DataHash.m
@@ -27,7 +27,7 @@
 %            'array': The contents, type and size of the input [Data] are
 %                     considered  for the creation of the hash. Nested CELLs
 %                     and STRUCT arrays are parsed recursively. Empty arrays of
-%                     different type reply different hashs.
+%                     different type reply different hashes.
 %            'file':  [Data] is treated as file name and the hash is calculated
 %                     for the files contents.
 %            'bin':   [Data] is a numerical, LOGICAL or CHAR array. Only the
@@ -110,7 +110,7 @@
 %      Jan Achterhold (author 267816) suggested to consider Java objects.
 % 016: 01-Feb-2015 20:53, Java heap space exhausted for large files.
 %      Now files are process in chunks to save memory.
-% 017: 15-Feb-2015 19:40, Collsions: Same hash for different data.
+% 017: 15-Feb-2015 19:40, Collisions: Same hash for different data.
 %      Examples: zeros(1,1) and zeros(1,1,0)
 %                complex(0) and zeros(1,1,0,0)
 %      Now the number of dimensions is included, to avoid this.
@@ -132,7 +132,7 @@
 
 % OPEN BUGS:
 % Nath wrote:
-% function handle refering to struct containing the function will create
+% function handle referring to struct containing the function will create
 % infinite loop. Is there any workaround ?
 % Example:
 %   d= dynamicprops();
@@ -400,7 +400,7 @@
    DataBin = uint8(DataObj);
 
    % Matt Raum had this excellent idea - unfortunately this function is
-   % undocumented and might not be supported in te future:
+   % undocumented and might not be supported in the future:
    % DataBin = getByteStreamFromArray(DataObj);
 
 catch  % Or perhaps this is better:

diff --git a/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/GetMD5.c b/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/GetMD5.c
@@ -142,7 +142,7 @@
 #  define MWSIZE_MAX MAX_int32_T
 #endif
 
-// Directive for endianess:
+// Directive for endianness:
 #if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
 #  define _LITTLE_ENDIAN
 #endif
@@ -645,7 +645,7 @@ void ArrayCore(MD5_CTX *context, const mxArray *V)
 // Core function to process structs: ===========================================
 void StructCore(MD5_CTX *context, const mxArray *V, mwSize nElem)
 {
-  // Sort field names alphabetically to avoid effects of teh order of fields.
+  // Sort field names alphabetically to avoid effects of the order of fields.
   const char *FieldName;
   int        nField, iField, FieldIndex;
   mwSize     iElem;

diff --git a/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/GetMD5_helper.m b/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/GetMD5_helper.m
@@ -9,7 +9,7 @@
 %   V: Array of any type, which is not handled in the C-Mex.
 % OUTPUT:
 %   S: Array or struct containing elementary types only.
-%      The implementation migth be changed by the user!
+%      The implementation might be changed by the user!
 %      Default:
 %      - Sparse arrays:   Struct containing the indices and values.
 %      - Function handle: The reply of FUNCTIONS and the size and date of the
@@ -76,7 +76,7 @@
       S = uint8(V);
 
       % Matt Raum had this excellent idea - unfortunately this function is
-      % undocumented and might not be supported in te future:
+      % undocumented and might not be supported in the future:
       % S = getByteStreamFromArray(DataObj);
 
    catch ME  % Or perhaps this is better:

diff --git a/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/InstallMex.m b/brainscore_vision/data/david2004/data_packaging/lib/GetMD5/InstallMex.m
@@ -193,7 +193,7 @@
    Flags = cat(2, Flags, {'-compatibleArrayDims'});
 end
 
-% Define endianess directive:
+% Define endianness directive:
 if isLittleEndian
    Flags = cat(2, Flags, {'-D_LITTLE_ENDIAN'});
 else  % Does Matlab run on a big endian machine currently?!
@@ -270,7 +270,7 @@
 % Run the unit-test: -----------------------------------------------------------
 if ~isempty(UnitTestFcn) && compiled
    fprintf('\n\n== Post processing:\n');
-   [dum, UnitTestName] = fileparts(UnitTestFcn);  %#ok<ASGLU> % Remove extension
+   [dummy, UnitTestName] = fileparts(UnitTestFcn);  %#ok<ASGLU> % Remove extension
    if ~isempty(which(UnitTestName))
       fprintf('  Call: %s\n\n', UnitTestName);
       feval(UnitTestName);

diff --git a/brainscore_vision/data/david2004/data_packaging/lib/glob/glob.m b/brainscore_vision/data/david2004/data_packaging/lib/glob/glob.m
@@ -42,7 +42,7 @@
 %           * GLOB returns only directory names when a trailing file
 %             separator is specified.
 %           * On Windows GLOB is not case sensitive, but it returns
-%             matching names exactely in the case as they are defined on
+%             matching names exactly in the case as they are defined on
 %             the filesystem. Case of host and sharename of a UNC path and
 %             case of drive letters will be returned as specified in 
 %             FILESPEC.
@@ -319,7 +319,7 @@
 % ------------------------------------------------------------------------
 function L = ls_regexp(regexp_fhandle, path, varargin)
 % List files that match PATH/r1/r2/r3/... where PATH is a string without
-% any wildcards and r1..rn are regular expresions that contain the parts of
+% any wildcards and r1..rn are regular expressions that contain the parts of
 % a filespec between the file separators.
 % L is a cell array with matching file or directory names.
 % REGEXP_FHANDLE contain a file handle to REGEXP or REGEXPI depending
@@ -427,11 +427,11 @@
 
 % return matching names
 if ~isempty(varargin{end})
-    % determing matching names ignoring trailing '/'
+    % determine matching names ignoring trailing '/'
     L_no_trailing_fsep = regexprep(L, '/$', '');
     I = regexp_fhandle(L_no_trailing_fsep, ['^' expression '$']);
 else
-    % determing matching names including trailing '/'
+    % determine matching names including trailing '/'
     I = regexp_fhandle(L, ['^' expression '$']);
 end
 I = cellfun('isempty', I);

diff --git a/brainscore_vision/data/ferguson2024/data_packaging/data_packaging.py b/brainscore_vision/data/ferguson2024/data_packaging/data_packaging.py
@@ -68,7 +68,7 @@ def create_assembly_and_upload(name: str, experiment: str, upload_to_s3=True) ->
     :param name: the name of the experiment, usually Ferguson2024
     :param experiment: the dataset, i.e. color
     :param upload_to_s3: True if you want to upload this to BrainIO on S3
-    :return: the assmebly
+    :return: the assembly
     """
     all_subjects = pd.read_csv(f'csvs/{experiment}_sanity_processed.csv')
 

diff --git a/brainscore_vision/data/geirhos2021/data_packaging/high-pass/high-pass_data_assembly.py b/brainscore_vision/data/geirhos2021/data_packaging/high-pass/high-pass_data_assembly.py
@@ -15,7 +15,7 @@
  - 16 image categories
  - for the this benchmark (high-pass) subjects saw the EXACT image indicated with the variable/column name
    image_lookup_id, and not a variation of it (no distortions, editing, etc). Condition is unclear based on 
-   documentation fromm source repo, needs follow up. 
+   documentation from source repo, needs follow up. 
 '''
 
 # initial csv to dataframe processing:

diff --git a/brainscore_vision/data/geirhos2021/data_packaging/low-pass/low-pass_data_assembly.py b/brainscore_vision/data/geirhos2021/data_packaging/low-pass/low-pass_data_assembly.py
@@ -15,7 +15,7 @@
  - 16 image categories
  - for the this benchmark (low-pass) subjects saw the EXACT image indicated with the variable/column name
    image_lookup_id, and not a variation of it (no distortions, editing, etc). Condition is unclear based on 
-   documentation fromm source repo, needs follow up. 
+   documentation from source repo, needs follow up. 
 '''
 
 # initial csv to dataframe processing:

diff --git a/...vision/data/geirhos2021/data_packaging/phase-scrambling/phase-scrambling_data_assembly.py b/...vision/data/geirhos2021/data_packaging/phase-scrambling/phase-scrambling_data_assembly.py
@@ -15,7 +15,7 @@
  - 16 image categories
  - for the this benchmark (phase-scrambling) subjects saw the EXACT image indicated with the variable/column name
    image_lookup_id, and not a variation of it (no distortions, editing, etc). Condition is again unclear based on 
-   documentation fromm source repo, needs follow up. 
+   documentation from source repo, needs follow up. 
 '''
 
 # initial csv to dataframe processing:

diff --git a/brainscore_vision/data/igustibagus2024/investigation_consistency.ipynb b/brainscore_vision/data/igustibagus2024/investigation_consistency.ipynb
@@ -95,7 +95,7 @@
     "    assembly = assembly.where(assembly.object_style != 'skeleton', drop=True)\n",
     "    assembly = assembly.where(assembly.object_style != 'nan', drop=True)\n",
     "\n",
-    "    ## this is temporary because i havent pushed the new version of the assembly online:\n",
+    "    ## this is temporary because i haven't pushed the new version of the assembly online:\n",
     "    csv_path = './merged_assembly/merged_stimulus_set.csv'\n",
     "    dir_path = './images'\n",
     "    stimulus_set = brainio.stimuli.StimulusSet.from_files(csv_path, dir_path)\n",

diff --git a/brainscore_vision/data/igustibagus2024/merged_assembly/create_merged_assembly.ipynb b/brainscore_vision/data/igustibagus2024/merged_assembly/create_merged_assembly.ipynb
@@ -173,7 +173,7 @@
     }
    ],
    "source": [
-    "# now check if whithin oleo assembly images have always the same number of repetitions\n",
+    "# now check if within oleo assembly images have always the same number of repetitions\n",
     "from collections import Counter\n",
     "\n",
     "repetition_values = assembly_oleo.repetition.values  # Replace with your actual array\n",

diff --git a/brainscore_vision/data/igustibagus2024/merged_assembly/create_merged_assembly_and_stim.py b/brainscore_vision/data/igustibagus2024/merged_assembly/create_merged_assembly_and_stim.py
@@ -146,7 +146,7 @@
 import pdb; pdb.set_trace()
 
 
-# upload assmebly to S3                      
+# upload assembly to S3                      
 package_data_assembly('brainio_brainscore', merged_assembly, assembly_identifier=merged_assembly.name,
                       stimulus_set_identifier=stimuli.name,assembly_class_name="NeuronRecordingAssembly", 
                       bucket_name="brainio-brainscore")
diff --git a/brainscore_vision/data/igustibagus2024/merged_assembly/helpers_background_id.py b/brainscore_vision/data/igustibagus2024/merged_assembly/helpers_background_id.py
@@ -222,14 +222,14 @@ def create_background_ids(hvm_data, rest_data, non_silhouette_data):
 def find_matching_background(oods_category, hvm_category):
     '''
     hvm and hvm-like images share the same background. To identify similar backgrounds images are compared pixel-wise to each over.
-    Images that share the most overlapp are then labeled with the same background id as the respective hvm-image.
+    Images that share the most overlap are then labeled with the same background id as the respective hvm-image.
 
     Arguments:
         oods_category (NeuronRecordingAssembly): all images from one single hvm-like domain without background id,
         hvm_category (NeuronRecordingAssembly): hvm images with background id
 
     Returns:
-        background_ids: list of matching background ids for the single hvm-like domian
+        background_ids: list of matching background ids for the single hvm-like domain
     '''
     background_ids = []
     # Find the respective background id from hvm images for each OOD image
@@ -262,7 +262,7 @@ def load_silhouette_data(data):
     Separating domain-transfer data into hvm, hvm-like (silhouette) and rest (non-silhouette) data. This separation is needed to give each hvm-like
     image the same background number as its respective hvm version (images are sharing the same background).
 
-    Arguements:
+    Arguments:
         data: full data that is going to be split into hvm, hvm-like (silhouette) and rest (non-silhouette) data
 
     Returns:
@@ -409,7 +409,7 @@ def reduce_data_num_images(data_complete, number_images):
 
 def get_final_traning_data(complete_training_data, num_images_training, num_neurons):
     '''
-    Draws final traning images and neurons for one split.
+    Draws final training images and neurons for one split.
 
     Arguments:
         complete_training_data (dict with NeuronRecordingAssembly): keys: domain names, values: complete training data pool for one split,
@@ -503,7 +503,7 @@ def add_accuracies_to_split_df(final_test_data_dictionary, decoder, split_datafr
         num_neurons: number of training neurons,
         num_training_images: number of training images
 
-    Retruns:
+    Returns:
         split_dataframe (dict): keys: domain names, values: dataframe with columns: #Neurons, #Images training, Accuracy test data
         '''
     # Get and store the test accuracy for each crossdomain
@@ -568,7 +568,7 @@ def get_classifier_score_2AFC(classifier, data):
 #################################################
 #################################################
 #################################################
-# Brain model speficic functions
+# Brain model specific functions
 #################################################
 
 
@@ -678,7 +678,7 @@ def save_split_dataframes(split_crossdomain_dataframes, crossdomain_dataframes,
 
 def save_split_averaged_dataframes(crossdomain_dataframes, neurons_array, images_array, brain_model_name):
     '''
-    Saves dataframe with perfromance averaged over multiple splits for each domain.
+    Saves dataframe with performance averaged over multiple splits for each domain.
 
     Arguments:
         crossdomain_dataframes (dict): keys: domain name, values: dataframes with performance for each #Neurons x #Images combination stored over multiple splits

diff --git a/brainscore_vision/data/marques2020/data_packaging/marques_stim_common.py b/brainscore_vision/data/marques2020/data_packaging/marques_stim_common.py
@@ -205,11 +205,11 @@ def gen_grating_stim(degrees, size_px, stim_name, grat_params, save_dir):
     for i in np.arange(nStim):
         stim_id = np.uint64(grat_params[i, 0] * 10e9 + grat_params[i, 1] * 10e7 + grat_params[i, 3] * 10e5 +
                             grat_params[i, 4] * 10e3 + grat_params[i, 5] * 10e1 + grat_params[i, 6])
-        grat = Grating(width=width, pos=[grat_params[i, 0], grat_params[i, 1]], contrast=grat_params[i, 2],
+        great = Grating(width=width, pos=[grat_params[i, 0], grat_params[i, 1]], contrast=grat_params[i, 2],
                        rad=grat_params[i, 3], sf=grat_params[i, 4], orientation=grat_params[i, 5],
                        phase=grat_params[i, 6], stim_id= stim_id, format_id='{0:012d}', save_dir=save_dir,
                        size_px=[size_px, size_px], type_name=stim_name)
-        image_names[i] = (grat.save_stimulus())
+        image_names[i] = (great.save_stimulus())
         image_local_file_path[i] = save_dir + os.sep + image_names[i]
         all_y[i] = grat_params[i, 0]
         all_x[i] = grat_params[i, 1]
@@ -264,15 +264,15 @@ def gen_grating_stim_old(degrees, size_px, stim_name, grat_contrast, grat_pos, g
                     for s in np.arange(len(grat_sf)):
                         for o in np.arange(len(grat_orientation)):
                             for p in np.arange(len(grat_phase)):
-                                grat = Grating(width=width, pos=[grat_pos[y], grat_pos[x]],
+                                great = Grating(width=width, pos=[grat_pos[y], grat_pos[x]],
                                                contrast=grat_contrast[c], rad=grat_rad[r],
                                                sf=grat_sf[s], orientation=grat_orientation[o],
                                                phase=grat_phase[p],
                                                stim_id=np.uint64(
                                                    y * 10e9 + x * 10e7 + r * 10e5 + s * 10e3 + o * 10e1 + p),
                                                format_id='{0:012d}', save_dir=save_dir, size_px=[size_px, size_px],
                                                type_name=stim_name)
-                                image_names[i] = (grat.save_stimulus())
+                                image_names[i] = (great.save_stimulus())
                                 image_local_file_path[i] = save_dir + os.sep + image_names[i]
                                 all_y[i] = grat_pos[y]
                                 all_x[i] = grat_pos[x]

diff --git a/brainscore_vision/data/zhang2018/__init__.py b/brainscore_vision/data/zhang2018/__init__.py
@@ -5,7 +5,7 @@
 
 BIBTEX = """@article{zhang2018finding,
   title={Finding any Waldo with zero-shot invariant and efficient visual search},
-  author={Zhang, Mengmi and Feng, Jiashi and Ma, Keng Teck and Lim, Joo Hwee and Zhao, Qi and Kreiman, Gabriel},
+  author={Zhang, Mengmi and Feng, Jiashi and Ma, Keng Teck and Lim, You Hwee and Zhao, Qi and Kreiman, Gabriel},
   journal={Nature communications},
   volume={9},
   number={1},

diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py
@@ -49,7 +49,7 @@ def __call__(self, source: BehavioralAssembly, target:
                     indexers = {v: cond[i] for i, v in enumerate(variables)}
                     subject_cond_assembly = subject_assembly.sel(**indexers)
                     source_cond_assembly = source.sel(**indexers)
-                    # to accomodate unbalanced designs, skip combinations of
+                    # to accommodate unbalanced designs, skip combinations of
                     # variables that don't exist in both assemblies
                     if len(subject_cond_assembly) and len(source_cond_assembly):
                         cond_scores.append(self.compare_single_subject(

diff --git a/brainscore_vision/metrics/cka/metric.py b/brainscore_vision/metrics/cka/metric.py
@@ -64,7 +64,7 @@ def centering(K):
 
     return np.dot(np.dot(H, K), H)
     # HKH are the same with KH, KH is the first centering, H(KH) do the second time,
-    # results are the sme with one time centering
+    # results are the same with one time centering
     # return np.dot(H, K)  # KH
 
 

diff --git a/brainscore_vision/model_helpers/activations/temporal/model/pytorch.py b/brainscore_vision/model_helpers/activations/temporal/model/pytorch.py
@@ -63,7 +63,7 @@ def get_activations(self, inputs : List[Any], layer_names : List[str]) -> typing
 
     def get_layer(self, layer_name : str):
         # the layer_name is a string that represents the module hierarchy up to the target layer,
-        # seperated by ".", e.g., "module1.submodule2.relu".
+        # separated by ".", e.g., "module1.submodule2.relu".
         if layer_name == 'logits':
             return self._output_layer()
         module = self._model