Skip to content
Open
63 changes: 40 additions & 23 deletions brainscore_language/benchmarks/futrell2018/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,30 @@ class Futrell2018Pearsonr(BenchmarkBase):
"""

def __init__(self):
self.data = load_dataset('Futrell2018')
self.metric = load_metric('pearsonr')
ceiler = SplitHalvesConsistency(num_splits=10, split_coordinate='subject_id', consistency_metric=self.metric)
self.data = load_dataset("Futrell2018")
self.metric = load_metric("pearsonr")
ceiler = SplitHalvesConsistency(
num_splits=10, split_coordinate="subject_id", consistency_metric=self.metric
)
ceiling = ceiler(self.data)
super(Futrell2018Pearsonr, self).__init__(
identifier='Futrell2018-pearsonr',
version=1,
parent='behavior',
identifier="Futrell2018-pearsonr",
ceiling=ceiling,
bibtex=self.data.bibtex)
version=1,
parent="behavior",
bibtex=self.data.bibtex,
)

def __call__(self, candidate: ArtificialSubject) -> Score:
# run experiment
candidate.start_behavioral_task(ArtificialSubject.Task.reading_times)
stimuli = self.data['word'].values
predictions = candidate.digest_text(stimuli)['behavior']
attach_presentation_meta(predictions, self.data['presentation'])
stimuli = self.data["word"].values
predictions = candidate.digest_text(stimuli)["behavior"]
attach_presentation_meta(predictions, self.data["presentation"])
# exclude first words
predictions = predictions[predictions['word_within_sentence_id'] != 1]
targets = self.data[self.data['word_within_sentence_id'] != 1]
targets = targets.mean('subject') # compare to "average human"
predictions = predictions[predictions["word_within_sentence_id"] != 1]
targets = self.data[self.data["word_within_sentence_id"] != 1]
targets = targets.mean("subject") # compare to "average human"
# score
raw_score = self.metric(predictions, targets)
score = ceiling_normalize(raw_score, self.ceiling)
Expand All @@ -56,7 +59,9 @@ class SplitHalvesConsistency:
# following
# https://github.com/brain-score/brain-score/blob/c51b8aa2c94212a9ac56c06c556afad0bb0a3521/brainscore/metrics/ceiling.py#L25-L96

def __init__(self, num_splits: int, split_coordinate: str, consistency_metric: Metric):
def __init__(
self, num_splits: int, split_coordinate: str, consistency_metric: Metric
):
"""
:param num_splits: how many times to create two halves
:param split_coordinate: over which coordinate to split the assembly into halves
Expand All @@ -73,18 +78,30 @@ def __call__(self, assembly: DataAssembly) -> Score:
consistencies, uncorrected_consistencies = [], []
splits = range(self.num_splits)
for _ in splits:
half1_values = random_state.choice(split_values, size=len(split_values) // 2, replace=False)
half2_values = set(split_values) - set(half1_values) # this only works because of `replace=False` above
half1 = assembly[{split_dim: [value in half1_values for value in split_values]}].mean(split_dim)
half2 = assembly[{split_dim: [value in half2_values for value in split_values]}].mean(split_dim)
half1_values = random_state.choice(
split_values, size=len(split_values) // 2, replace=False
)
half2_values = set(split_values) - set(
half1_values
) # this only works because of `replace=False` above
half1 = assembly[
{split_dim: [value in half1_values for value in split_values]}
].mean(split_dim)
half2 = assembly[
{split_dim: [value in half2_values for value in split_values]}
].mean(split_dim)
consistency = self.consistency_metric(half1, half2)
uncorrected_consistencies.append(consistency)
# Spearman-Brown correction for sub-sampling
corrected_consistency = 2 * consistency / (1 + (2 - 1) * consistency)
consistencies.append(corrected_consistency)
consistencies = Score(consistencies, coords={'split': splits}, dims=['split'])
uncorrected_consistencies = Score(uncorrected_consistencies, coords={'split': splits}, dims=['split'])
average_consistency = consistencies.median('split')
average_consistency.attrs['raw'] = consistencies
average_consistency.attrs['uncorrected_consistencies'] = uncorrected_consistencies
consistencies = Score(consistencies, coords={"split": splits}, dims=["split"])
uncorrected_consistencies = Score(
uncorrected_consistencies, coords={"split": splits}, dims=["split"]
)
average_consistency = consistencies.median("split")
average_consistency.attrs["raw"] = consistencies
average_consistency.attrs[
"uncorrected_consistencies"
] = uncorrected_consistencies
return average_consistency
123 changes: 80 additions & 43 deletions brainscore_language/benchmarks/pereira2018/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,39 @@


def Pereira2018_243sentences():
return _Pereira2018ExperimentLinear(experiment='243sentences', ceiling_s3_kwargs=dict(
version_id='CHl_9aFHIWVnPW_njePfy28yzggKuUPw',
sha1='5e23de899883828f9c886aec304bc5aa0f58f66c',
raw_kwargs=dict(
version_id='uZye03ENmn.vKB5mARUGhcIY_DjShtPD',
sha1='525a6ac8c14ad826c63fdd71faeefb8ba542d5ac',
return _Pereira2018ExperimentLinear(
experiment="243sentences",
ceiling_s3_kwargs=dict(
version_id="CHl_9aFHIWVnPW_njePfy28yzggKuUPw",
sha1="5e23de899883828f9c886aec304bc5aa0f58f66c",
raw_kwargs=dict(
version_id='XVTo58Po5YrNjTuDIWrmfHI0nbN2MVZa',
sha1='34ba453dc7e8a19aed18cc9bca160e97b4a80be5'
)
)
))
version_id="uZye03ENmn.vKB5mARUGhcIY_DjShtPD",
sha1="525a6ac8c14ad826c63fdd71faeefb8ba542d5ac",
raw_kwargs=dict(
version_id="XVTo58Po5YrNjTuDIWrmfHI0nbN2MVZa",
sha1="34ba453dc7e8a19aed18cc9bca160e97b4a80be5",
),
),
),
)


def Pereira2018_384sentences():
return _Pereira2018ExperimentLinear(experiment='384sentences', ceiling_s3_kwargs=dict(
version_id='sjlnXr5wXUoGv6exoWu06C4kYI0KpZLk',
sha1='fc895adc52fd79cea3040961d65d8f736a9d3e29',
raw_kwargs=dict(
version_id='Hi74r9UKfpK0h0Bjf5DL.JgflGoaknrA',
sha1='ce2044a7713426870a44131a99bfc63d8843dae0',
return _Pereira2018ExperimentLinear(
experiment="384sentences",
ceiling_s3_kwargs=dict(
version_id="sjlnXr5wXUoGv6exoWu06C4kYI0KpZLk",
sha1="fc895adc52fd79cea3040961d65d8f736a9d3e29",
raw_kwargs=dict(
version_id='m4dq_ouKWZkYtdyNPMSP0p6rqb7wcYpi',
sha1='fe9fb24b34fd5602e18e34006ac5ccc7d4c825b8'
)
)
))
version_id="Hi74r9UKfpK0h0Bjf5DL.JgflGoaknrA",
sha1="ce2044a7713426870a44131a99bfc63d8843dae0",
raw_kwargs=dict(
version_id="m4dq_ouKWZkYtdyNPMSP0p6rqb7wcYpi",
sha1="fe9fb24b34fd5602e18e34006ac5ccc7d4c825b8",
),
),
),
)


class _Pereira2018ExperimentLinear(BenchmarkBase):
Expand All @@ -73,43 +79,74 @@ class _Pereira2018ExperimentLinear(BenchmarkBase):

def __init__(self, experiment: str, ceiling_s3_kwargs: dict):
self.data = self._load_data(experiment)
self.metric = load_metric('linear_pearsonr')
identifier = f'Pereira2018.{experiment}-linear'
self.metric = load_metric("linear_pearsonr")
identifier = f"Pereira2018.{experiment}-linear"
ceiling = self._load_ceiling(identifier=identifier, **ceiling_s3_kwargs)
super(_Pereira2018ExperimentLinear, self).__init__(
identifier=identifier,
version=1,
parent='Pereira2018-linear',
ceiling=ceiling,
bibtex=BIBTEX)
version=1,
parent="Pereira2018-linear",
bibtex=BIBTEX,
)

def _load_data(self, experiment: str) -> NeuroidAssembly:
data = load_dataset('Pereira2018.language')
data = load_dataset("Pereira2018.language")
data = data.sel(experiment=experiment) # filter experiment
data = data.dropna('neuroid') # not all subjects have done both experiments, drop those that haven't
data.attrs['identifier'] = f"{data.identifier}.{experiment}"
data = data.dropna(
"neuroid"
) # not all subjects have done both experiments, drop those that haven't
data.attrs["identifier"] = f"{data.identifier}.{experiment}"
return data

def _load_ceiling(self, identifier: str, version_id: str, sha1: str, assembly_prefix="ceiling_", raw_kwargs=None):
ceiling = load_from_s3(identifier, cls=Score, assembly_prefix=assembly_prefix, version_id=version_id, sha1=sha1)
def _load_ceiling(
self,
identifier: str,
version_id: str,
sha1: str,
assembly_prefix="ceiling_",
raw_kwargs=None,
):
ceiling = load_from_s3(
identifier,
cls=Score,
assembly_prefix=assembly_prefix,
version_id=version_id,
sha1=sha1,
)
if raw_kwargs: # recursively load raw attributes
raw = self._load_ceiling(identifier=identifier, assembly_prefix=assembly_prefix + "raw_", **raw_kwargs)
ceiling.attrs['raw'] = raw
raw = self._load_ceiling(
identifier=identifier,
assembly_prefix=assembly_prefix + "raw_",
**raw_kwargs,
)
ceiling.attrs["raw"] = raw
return ceiling

def __call__(self, candidate: ArtificialSubject) -> Score:
candidate.start_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
recording_type=ArtificialSubject.RecordingType.fMRI)
stimuli = self.data['stimulus']
passages = self.data['passage_label'].values
candidate.start_neural_recording(
recording_target=ArtificialSubject.RecordingTarget.language_system,
recording_type=ArtificialSubject.RecordingType.fMRI,
)
stimuli = self.data["stimulus"]
passages = self.data["passage_label"].values
predictions = []
for passage in sorted(set(passages)): # go over individual passages, sorting to keep consistency across runs
passage_indexer = [stimulus_passage == passage for stimulus_passage in passages]
for passage in sorted(
set(passages)
): # go over individual passages, sorting to keep consistency across runs
passage_indexer = [
stimulus_passage == passage for stimulus_passage in passages
]
passage_stimuli = stimuli[passage_indexer]
passage_predictions = candidate.digest_text(passage_stimuli.values)['neural']
passage_predictions['stimulus_id'] = 'presentation', passage_stimuli['stimulus_id'].values
passage_predictions = candidate.digest_text(passage_stimuli.values)[
"neural"
]
passage_predictions["stimulus_id"] = (
"presentation",
passage_stimuli["stimulus_id"].values,
)
predictions.append(passage_predictions)
predictions = xr.concat(predictions, dim='presentation')
predictions = xr.concat(predictions, dim="presentation")
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
return score
5 changes: 5 additions & 0 deletions brainscore_language/benchmarks/pereira2018_v2022/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .benchmark import Pereira2018_243sentences, Pereira2018_384sentences
from brainscore_language import benchmark_registry

benchmark_registry["Pereira2018_v2022.243sentences-linreg_pearsonr"] = Pereira2018_243sentences
benchmark_registry["Pereira2018_v2022.384sentences-linreg_pearsonr"] = Pereira2018_384sentences
Loading