From 6cfba6f8ecdc515ce1674ab4bfb3f3670970d472 Mon Sep 17 00:00:00 2001 From: lodevt Date: Wed, 29 Jan 2025 17:01:36 +0100 Subject: [PATCH 1/5] implement basic cases --- kloppy/domain/models/event.py | 104 ++++++++++++++++++++++++++++++++++ kloppy/tests/test_event.py | 41 +++++++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 6e60fb83a..befcd89ef 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -1119,6 +1119,110 @@ def _update_formations_and_positions(self): else: event.team.formations.set(event.time, event.formation_type) + def insert( + self, + event: Event, + position: Optional[int] = None, + before_event_id: Optional[str] = None, + after_event_id: Optional[str] = None, + timestamp: Optional[timedelta] = None, + constraints: Optional[Callable[[Event, "EventDataset"], bool]] = None, + ): + """ + Inserts an event into the dataset at the appropriate position. + + Parameters: + ---------- + event : Event + The event to be inserted into the dataset. + + position : Optional[int], default=None + The exact index where the event should be inserted. If provided, + overrides all other positioning parameters. + + before_event_id : Optional[str], default=None + The ID of the event before which the new event should be inserted. + Ignored if `position` is provided. + + after_event_id : Optional[str], default=None + The ID of the event after which the new event should be inserted. + Ignored if `position` or `before_event_id` is provided. + + timestamp : Optional[datetime], default=None + The timestamp of the event, used to determine its position based + on chronological order if no other positional parameters are specified. + + constraints : Optional[Callable[[Event, EventDataset], bool]], default=None + A custom function that takes the event and dataset as arguments and + evaluates whether the event satisfies specific conditions to determine + its position. Useful for more complex insertion logic, such as inserting + into a valid contextual window (e.g., dead ball states or possession sequences). + + Returns: + ------- + void + The method modifies the dataset in place. + + Raises: + ------ + ValueError + If the insertion position cannot be determined or is invalid. + + Notes: + ------ + - If multiple parameters are provided to specify the position, the precedence is: + 1. `position` + 2. `before_event_id` + 3. `after_event_id` + 4. `timestamp` + - If none of the above parameters are specified, the method raises a `ValueError`. + """ + # Determine the insert position based on precedence + if position is not None: + # If position is provided, use it directly + insert_position = position + elif before_event_id is not None: + # Find the event with the matching `before_event_id` and insert before it + insert_position = next( + ( + i + for i, e in enumerate(self.events) + if e.event_id == before_event_id + ), + None, + ) + if insert_position is None: + raise ValueError(f"No event found with ID {before_event_id}.") + elif after_event_id is not None: + # Find the event with the matching `after_event_id` and insert after it + insert_position = next( + ( + i + 1 + for i, e in enumerate(self.events) + if e.event_id == after_event_id + ), + None, + ) + if insert_position is None: + raise ValueError(f"No event found with ID {after_event_id}.") + elif timestamp is not None: + # If no position or event IDs are specified, insert based on timestamp + insert_position = next( + ( + i + for i, e in enumerate(self.events) + if e.timestamp > timestamp + ), + len(self.events), + ) + else: + # If no valid positioning logic is provided, raise an error + raise ValueError( + "Unable to determine insertion position for the event." + ) + + self.events.insert(insert_position, event) + @property def events(self): return self.records diff --git a/kloppy/tests/test_event.py b/kloppy/tests/test_event.py index c89c214bf..be8d9c4e9 100644 --- a/kloppy/tests/test_event.py +++ b/kloppy/tests/test_event.py @@ -1,7 +1,9 @@ +from datetime import timedelta + import pytest from kloppy import statsbomb -from kloppy.domain import EventDataset +from kloppy.domain import EventDataset, Event, EventFactory, CarryResult class TestEvent: @@ -87,3 +89,40 @@ def test_find_all(self, dataset: EventDataset): assert goals[0].next("shot.goal") == goals[1] assert goals[0].next("shot.goal") == goals[2].prev("shot.goal") assert goals[2].next("shot.goal") is None + + def test_insert(self, dataset: EventDataset): + new_event = EventFactory().build_carry( + qualifiers=None, + timestamp=timedelta(seconds=700), + end_timestamp=timedelta(seconds=701), + result=CarryResult.COMPLETE, + period=dataset.metadata.periods[0], + ball_owning_team=dataset.metadata.teams[0], + ball_state="alive", + event_id="test-insert-1234", + team=dataset.metadata.teams[0], + player=dataset.metadata.teams[0].players[0], + coordinates=(0.2, 0.3), + end_coordinates=(0.22, 0.33), + raw_event=None, + ) + + # insert by position + dataset.insert(new_event, position=3) + assert dataset.events[3].event_id == "test-insert-1234" + del dataset.events[3] # Remove by index to restore the dataset + + # insert by before_event_id + dataset.insert(new_event, before_event_id=dataset.events[100].event_id) + assert dataset.events[100].event_id == "test-insert-1234" + del dataset.events[100] # Remove by index to restore the dataset + + # insert by after_event_id + dataset.insert(new_event, after_event_id=dataset.events[305].event_id) + assert dataset.events[306].event_id == "test-insert-1234" + del dataset.events[306] # Remove by index to restore the dataset + + # insert by timestamp + dataset.insert(new_event, timestamp=new_event.timestamp) + assert dataset.events[609].event_id == "test-insert-1234" + del dataset.events[609] # Remove by index to restore the dataset From 853bf2883b966314b1aa3160509723db38fa7c48 Mon Sep 17 00:00:00 2001 From: lodevt Date: Wed, 29 Jan 2025 17:31:14 +0100 Subject: [PATCH 2/5] initial effort at inserting based on a scoring function --- kloppy/domain/models/event.py | 120 ++++++++++++++++++++-------------- kloppy/tests/test_event.py | 23 ++++++- 2 files changed, 93 insertions(+), 50 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index befcd89ef..f916c39a8 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -1126,56 +1126,65 @@ def insert( before_event_id: Optional[str] = None, after_event_id: Optional[str] = None, timestamp: Optional[timedelta] = None, - constraints: Optional[Callable[[Event, "EventDataset"], bool]] = None, + scoring_function: Optional[ + Callable[[Event, "EventDataset"], float] + ] = None, ): """ - Inserts an event into the dataset at the appropriate position. - - Parameters: - ---------- - event : Event - The event to be inserted into the dataset. - - position : Optional[int], default=None - The exact index where the event should be inserted. If provided, - overrides all other positioning parameters. - - before_event_id : Optional[str], default=None - The ID of the event before which the new event should be inserted. - Ignored if `position` is provided. - - after_event_id : Optional[str], default=None - The ID of the event after which the new event should be inserted. - Ignored if `position` or `before_event_id` is provided. - - timestamp : Optional[datetime], default=None - The timestamp of the event, used to determine its position based - on chronological order if no other positional parameters are specified. - - constraints : Optional[Callable[[Event, EventDataset], bool]], default=None - A custom function that takes the event and dataset as arguments and - evaluates whether the event satisfies specific conditions to determine - its position. Useful for more complex insertion logic, such as inserting - into a valid contextual window (e.g., dead ball states or possession sequences). - - Returns: - ------- - void - The method modifies the dataset in place. - - Raises: - ------ - ValueError - If the insertion position cannot be determined or is invalid. - - Notes: - ------ - - If multiple parameters are provided to specify the position, the precedence is: - 1. `position` - 2. `before_event_id` - 3. `after_event_id` - 4. `timestamp` - - If none of the above parameters are specified, the method raises a `ValueError`. + Inserts an event into the dataset at the appropriate position. + + Parameters: + ---------- + event : Event + The event to be inserted into the dataset. + + position : Optional[int], default=None + The exact index where the event should be inserted. If provided, + overrides all other positioning parameters. + + before_event_id : Optional[str], default=None + The ID of the event before which the new event should be inserted. + Ignored if `position` is provided. + + after_event_id : Optional[str], default=None + The ID of the event after which the new event should be inserted. + Ignored if `position` or `before_event_id` is provided. + + timestamp : Optional[datetime], default=None + The timestamp of the event, used to determine its position based + on chronological order if no other positional parameters are specified. + + scoring_function : Optional[Callable[[Event, EventDataset, int], int]], default=None + A custom function that takes the event, dataset, and candidate position as arguments + and returns a score indicating how suitable the position is for insertion. + Higher scores indicate better placement. + + This is useful for determining the optimal insertion position when an exact + index is not provided. For example, events can be inserted into a valid + contextual window based on conditions such as dead ball states, possession sequences, + or temporal proximity to similar events. + + If no valid position is found (i.e., all scores are zero), the + insertion will fail with a ValueError. + + Returns: + ------- + void + The method modifies the dataset in place. + + Raises: + ------ + ValueError + If the insertion position cannot be determined or is invalid. + + Notes: + ------ + - If multiple parameters are provided to specify the position, the precedence is: + 1. `position` + 2. `before_event_id` + 3. `after_event_id` + 4. `timestamp` + - If none of the above parameters are specified, the method raises a `ValueError`. """ # Determine the insert position based on precedence if position is not None: @@ -1215,8 +1224,21 @@ def insert( ), len(self.events), ) + elif scoring_function is not None: + # Evaluate all possible positions using the constraint function + scores = [ + (i, scoring_function(event, self)) + for i, event in enumerate(self.events) + ] + # Select the best position with the highest score + insert_position, best_score = max( + scores, key=lambda x: x[1], default=(None, -1) + ) + if best_score == 0: + raise ValueError( + "No valid insertion position found based on the provided scoring function." + ) else: - # If no valid positioning logic is provided, raise an error raise ValueError( "Unable to determine insertion position for the event." ) diff --git a/kloppy/tests/test_event.py b/kloppy/tests/test_event.py index be8d9c4e9..2b772de8d 100644 --- a/kloppy/tests/test_event.py +++ b/kloppy/tests/test_event.py @@ -3,7 +3,13 @@ import pytest from kloppy import statsbomb -from kloppy.domain import EventDataset, Event, EventFactory, CarryResult +from kloppy.domain import ( + EventDataset, + Event, + EventFactory, + CarryResult, + BallState, +) class TestEvent: @@ -126,3 +132,18 @@ def test_insert(self, dataset: EventDataset): dataset.insert(new_event, timestamp=new_event.timestamp) assert dataset.events[609].event_id == "test-insert-1234" del dataset.events[609] # Remove by index to restore the dataset + + # insert using scoring function + def scoring_function(event: Event, dataset: EventDataset): + if event.ball_owning_team != dataset.metadata.teams[0]: + return 0 + if event.period != new_event.period: + return 0 + return 1 / abs( + event.timestamp.total_seconds() + - new_event.timestamp.total_seconds() + ) + + dataset.insert(new_event, scoring_function=scoring_function) + assert dataset.events[607].event_id == "test-insert-1234" + del dataset.events[607] # Remove by index to restore the dataset From 0731ad43778e3d77ee795d2bc4d6a1808db866ce Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 1 Feb 2025 14:32:57 +0100 Subject: [PATCH 3/5] docs: google docstring; fix: update refs --- kloppy/domain/models/event.py | 171 +++++++++++++++++----------------- kloppy/tests/test_event.py | 38 +++++++- 2 files changed, 118 insertions(+), 91 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index f916c39a8..be4d3390c 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -3,36 +3,35 @@ from datetime import timedelta from enum import Enum from typing import ( + TYPE_CHECKING, + Any, + Callable, Dict, List, + Optional, Type, Union, - Any, - Callable, - Optional, - TYPE_CHECKING, ) from kloppy.domain.models.common import ( - DatasetType, AttackingDirection, + DatasetType, OrientationError, PositionType, ) from kloppy.utils import ( + DeprecatedEnumValue, camelcase_to_snakecase, - removes_suffix, - docstring_inherit_attributes, deprecated, - DeprecatedEnumValue, + docstring_inherit_attributes, + removes_suffix, ) +from ...exceptions import InvalidFilterError, KloppyError, OrphanedRecordError from .common import DataRecord, Dataset, Player, Team from .formation import FormationType from .pitch import Point -from ...exceptions import OrphanedRecordError, InvalidFilterError, KloppyError - if TYPE_CHECKING: from .tracking import Frame @@ -1130,105 +1129,84 @@ def insert( Callable[[Event, "EventDataset"], float] ] = None, ): + """Inserts an event into the dataset at the appropriate position. + + Args: + event (Event): The event to be inserted into the dataset. + position (Optional[int]): The exact index where the event should be inserted. + If provided, overrides all other positioning parameters. Defaults to None. + before_event_id (Optional[str]): The ID of the event before which the new event + should be inserted. Ignored if `position` is provided. Defaults to None. + after_event_id (Optional[str]): The ID of the event after which the new event + should be inserted. Ignored if `position` or `before_event_id` is provided. + Defaults to None. + timestamp (Optional[timedelta]): The timestamp of the event, used to determine + its position based on chronological order if no other positional parameters + are specified. Defaults to None. + scoring_function (Optional[Callable[[Event, EventDataset], float]]): A custom + function that takes the event and dataset as arguments and returns a score + indicating how suitable the position is for insertion. Higher scores indicate + better placement. If no valid position is found (i.e., all scores are zero), + the insertion will fail with a ValueError. Defaults to None. + + Raises: + ValueError: If the insertion position cannot be determined or is invalid. + + Notes: + - If multiple parameters are provided to specify the position, the precedence is: + 1. `position` + 2. `before_event_id` + 3. `after_event_id` + 4. `timestamp` + 5. `scoring_function` + - If none of the above parameters are specified, the method raises a `ValueError`. """ - Inserts an event into the dataset at the appropriate position. - - Parameters: - ---------- - event : Event - The event to be inserted into the dataset. - - position : Optional[int], default=None - The exact index where the event should be inserted. If provided, - overrides all other positioning parameters. - - before_event_id : Optional[str], default=None - The ID of the event before which the new event should be inserted. - Ignored if `position` is provided. - - after_event_id : Optional[str], default=None - The ID of the event after which the new event should be inserted. - Ignored if `position` or `before_event_id` is provided. - - timestamp : Optional[datetime], default=None - The timestamp of the event, used to determine its position based - on chronological order if no other positional parameters are specified. - - scoring_function : Optional[Callable[[Event, EventDataset, int], int]], default=None - A custom function that takes the event, dataset, and candidate position as arguments - and returns a score indicating how suitable the position is for insertion. - Higher scores indicate better placement. - - This is useful for determining the optimal insertion position when an exact - index is not provided. For example, events can be inserted into a valid - contextual window based on conditions such as dead ball states, possession sequences, - or temporal proximity to similar events. - - If no valid position is found (i.e., all scores are zero), the - insertion will fail with a ValueError. - - Returns: - ------- - void - The method modifies the dataset in place. - - Raises: - ------ - ValueError - If the insertion position cannot be determined or is invalid. - - Notes: - ------ - - If multiple parameters are provided to specify the position, the precedence is: - 1. `position` - 2. `before_event_id` - 3. `after_event_id` - 4. `timestamp` - - If none of the above parameters are specified, the method raises a `ValueError`. - """ - # Determine the insert position based on precedence if position is not None: # If position is provided, use it directly insert_position = position + elif before_event_id is not None: # Find the event with the matching `before_event_id` and insert before it - insert_position = next( - ( - i - for i, e in enumerate(self.events) - if e.event_id == before_event_id - ), - None, - ) - if insert_position is None: + try: + insert_position = next( + ( + i + for i, e in enumerate(self.records) + if e.event_id == before_event_id + ), + ) + except StopIteration: raise ValueError(f"No event found with ID {before_event_id}.") + elif after_event_id is not None: # Find the event with the matching `after_event_id` and insert after it - insert_position = next( - ( - i + 1 - for i, e in enumerate(self.events) - if e.event_id == after_event_id - ), - None, - ) - if insert_position is None: + try: + insert_position = next( + ( + i + 1 + for i, e in enumerate(self.records) + if e.event_id == after_event_id + ), + ) + except StopIteration: raise ValueError(f"No event found with ID {after_event_id}.") + elif timestamp is not None: # If no position or event IDs are specified, insert based on timestamp insert_position = next( ( i - for i, e in enumerate(self.events) + for i, e in enumerate(self.records) if e.timestamp > timestamp ), - len(self.events), + len(self.records), ) + elif scoring_function is not None: # Evaluate all possible positions using the constraint function scores = [ (i, scoring_function(event, self)) - for i, event in enumerate(self.events) + for i, event in enumerate(self.records) ] # Select the best position with the highest score insert_position, best_score = max( @@ -1238,12 +1216,29 @@ def insert( raise ValueError( "No valid insertion position found based on the provided scoring function." ) + else: raise ValueError( "Unable to determine insertion position for the event." ) - self.events.insert(insert_position, event) + assert insert_position is not None + + # Insert the event at the determined position + self.records.insert(insert_position, event) + + # Update the event's references + self.records[insert_position].dataset = self + for i in range( + max(0, insert_position - 1), + min(insert_position + 2, len(self.records)), + ): + self.records[i].prev_record = ( + self.records[i - 1] if i > 0 else None + ) + self.records[i].next_record = ( + self.records[i + 1] if i + 1 < len(self.records) else None + ) @property def events(self): diff --git a/kloppy/tests/test_event.py b/kloppy/tests/test_event.py index 2b772de8d..a607ce5cd 100644 --- a/kloppy/tests/test_event.py +++ b/kloppy/tests/test_event.py @@ -4,11 +4,11 @@ from kloppy import statsbomb from kloppy.domain import ( - EventDataset, + BallState, + CarryResult, Event, + EventDataset, EventFactory, - CarryResult, - BallState, ) @@ -147,3 +147,35 @@ def scoring_function(event: Event, dataset: EventDataset): dataset.insert(new_event, scoring_function=scoring_function) assert dataset.events[607].event_id == "test-insert-1234" del dataset.events[607] # Remove by index to restore the dataset + + # update references + dataset.insert(new_event, position=1) + assert dataset.events[0].next_record.event_id == "test-insert-1234" + assert ( + dataset.events[1].prev_record.event_id + == dataset.events[0].event_id + ) + assert dataset.events[1].event_id == "test-insert-1234" + assert ( + dataset.events[1].next_record.event_id + == dataset.events[2].event_id + ) + assert dataset.events[2].prev_record.event_id == "test-insert-1234" + + dataset.insert(new_event, position=0) + assert dataset.events[0].prev_record is None + assert dataset.events[0].event_id == "test-insert-1234" + assert ( + dataset.events[0].next_record.event_id + == dataset.events[1].event_id + ) + assert dataset.events[1].prev_record.event_id == "test-insert-1234" + + dataset.insert(new_event, position=len(dataset)) + assert dataset.events[-2].next_record.event_id == "test-insert-1234" + assert ( + dataset.events[-1].prev_record.event_id + == dataset.events[-2].event_id + ) + assert dataset.events[-1].event_id == "test-insert-1234" + assert dataset.events[-1].next_record is None From 0a960e3f564ac0083bcd0d331866c92eade5487c Mon Sep 17 00:00:00 2001 From: lodevt Date: Tue, 4 Feb 2025 14:55:10 +0100 Subject: [PATCH 4/5] docstring: more explanation --- kloppy/domain/models/event.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index be4d3390c..a7ee89db2 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -1146,7 +1146,8 @@ def insert( scoring_function (Optional[Callable[[Event, EventDataset], float]]): A custom function that takes the event and dataset as arguments and returns a score indicating how suitable the position is for insertion. Higher scores indicate - better placement. If no valid position is found (i.e., all scores are zero), + better placement. The new event will be inserted before the event that gives + the maximum score. If no valid position is found (i.e., all scores are zero), the insertion will fail with a ValueError. Defaults to None. Raises: From b9cea5253c90b0d437b9d818f9b6382220ea1055 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 1 Mar 2025 12:20:53 +0100 Subject: [PATCH 5/5] add option to insert before or after max score --- kloppy/domain/models/event.py | 15 +++++++++++---- kloppy/tests/test_event.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index a7ee89db2..c3c352441 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -1149,6 +1149,12 @@ def insert( better placement. The new event will be inserted before the event that gives the maximum score. If no valid position is found (i.e., all scores are zero), the insertion will fail with a ValueError. Defaults to None. + scoring_function (Optional[Callable[[Event, EventDataset], float]]): A custom + function that takes the event and dataset as arguments and returns a score + indicating how suitable the position is for insertion. Negative scores mean + insertion should happen **before** the highest-scoring event, while positive + scores mean insertion should happen **after** the highest-scoring event. + If all scores are zero, the insertion will fail with a ValueError. Raises: ValueError: If the insertion position cannot be determined or is invalid. @@ -1210,21 +1216,22 @@ def insert( for i, event in enumerate(self.records) ] # Select the best position with the highest score - insert_position, best_score = max( - scores, key=lambda x: x[1], default=(None, -1) + best_index, best_score = max( + scores, key=lambda x: abs(x[1]), default=(0, -1) ) if best_score == 0: raise ValueError( "No valid insertion position found based on the provided scoring function." ) + # Insert after if score is positive, before if score is negative + insert_position = best_index + 1 if best_score > 0 else best_index + else: raise ValueError( "Unable to determine insertion position for the event." ) - assert insert_position is not None - # Insert the event at the determined position self.records.insert(insert_position, event) diff --git a/kloppy/tests/test_event.py b/kloppy/tests/test_event.py index a607ce5cd..758e563fe 100644 --- a/kloppy/tests/test_event.py +++ b/kloppy/tests/test_event.py @@ -134,7 +134,7 @@ def test_insert(self, dataset: EventDataset): del dataset.events[609] # Remove by index to restore the dataset # insert using scoring function - def scoring_function(event: Event, dataset: EventDataset): + def insert_after_scoring_function(event: Event, dataset: EventDataset): if event.ball_owning_team != dataset.metadata.teams[0]: return 0 if event.period != new_event.period: @@ -144,10 +144,39 @@ def scoring_function(event: Event, dataset: EventDataset): - new_event.timestamp.total_seconds() ) - dataset.insert(new_event, scoring_function=scoring_function) + dataset.insert( + new_event, scoring_function=insert_after_scoring_function + ) + assert dataset.events[608].event_id == "test-insert-1234" + del dataset.events[608] # Remove by index to restore the dataset + + # insert using scoring function + def insert_before_scoring_function( + event: Event, dataset: EventDataset + ): + if event.ball_owning_team != dataset.metadata.teams[0]: + return 0 + if event.period != new_event.period: + return 0 + return -1 / abs( + event.timestamp.total_seconds() + - new_event.timestamp.total_seconds() + ) + + dataset.insert( + new_event, scoring_function=insert_before_scoring_function + ) assert dataset.events[607].event_id == "test-insert-1234" del dataset.events[607] # Remove by index to restore the dataset + def no_match_scoring_function(event: Event, dataset: EventDataset): + return 0 + + with pytest.raises(ValueError): + dataset.insert( + new_event, scoring_function=no_match_scoring_function + ) + # update references dataset.insert(new_event, position=1) assert dataset.events[0].next_record.event_id == "test-insert-1234"