From feb82f0da6c1daadd06e2cefa3ba113a2f486ad3 Mon Sep 17 00:00:00 2001
From: Carter Green <carter@databento.com>
Date: Thu, 11 Dec 2025 11:30:51 -0600
Subject: [PATCH 1/3] DOC: Fix code of conduct email

---
 CODE_OF_CONDUCT.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 8ddae1db..483134e4 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -60,7 +60,7 @@ representative at an online or offline event.
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at
-info@nautechsystems.io.
+support@databento.com.
 All complaints will be reviewed and investigated promptly and fairly.
 
 All community leaders are obligated to respect the privacy and security of the

From 191f5dc20c34bae73d35a59125d6d2f8d7a4f231 Mon Sep 17 00:00:00 2001
From: Nick Macholl <nick@databento.com>
Date: Tue, 9 Dec 2025 15:35:28 -0800
Subject: [PATCH 2/3] OPT: Vectorize symbol mapping

---
 CHANGELOG.md                   |   5 +
 databento/common/dbnstore.py   |  21 +--
 databento/common/symbology.py  | 234 ++++++++++++++++++++++++++-------
 pyproject.toml                 |   1 -
 tests/test_common_symbology.py | 150 ++++++++++++++-------
 5 files changed, 304 insertions(+), 107 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 27416e74..8c0ee46d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 0.69.0
+
+#### Enhancements
+- Improved the performance of `DBNStore.to_df()` symbol mapping
+
 ## 0.68.0 - 2025-12-09
 
 This release adds support for Python 3.14.
diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py
index 722f2aa7..dc1a09f9 100644
--- a/databento/common/dbnstore.py
+++ b/databento/common/dbnstore.py
@@ -1287,7 +1287,7 @@ def _transcode(
     ) -> None:
         if map_symbols:
             self._instrument_map.insert_metadata(self.metadata)
-            symbol_map = self._instrument_map._data
+            symbol_map = self._instrument_map.build_symbol_map()
         else:
             symbol_map = None
 
@@ -1299,7 +1299,7 @@ def _transcode(
             pretty_ts=pretty_ts,
             has_metadata=True,
             map_symbols=map_symbols,
-            symbol_interval_map=symbol_map,  # type: ignore [arg-type]
+            symbol_interval_map=symbol_map,
             schema=schema,
         )
 
@@ -1508,19 +1508,12 @@ def _format_hidden_fields(self, df: pd.DataFrame) -> None:
     def _format_map_symbols(self, df: pd.DataFrame) -> None:
         # the first ordered field will be ts_recv or ts_event when appropriate
         ts_name = self._struct_type._ordered_fields[0]
+        dates = df[ts_name] if self._pretty_ts else pd.to_datetime(df[ts_name], utc=True).dt.date
 
-        if df.empty:
-            df["symbol"] = []
-        else:
-            df["symbol"] = df.apply(
-                lambda r: self._instrument_map.resolve(
-                    r["instrument_id"],
-                    (
-                        r[ts_name] if self._pretty_ts else pd.to_datetime(r[ts_name], utc=True)
-                    ).date(),
-                ),
-                axis=1,
-            )
+        df["symbol"] = self._instrument_map.resolve_many(
+            df["instrument_id"].to_numpy(),
+            np.asarray(dates, dtype="datetime64[D]"),
+        )
 
     def _format_timezone(self, df: pd.DataFrame) -> None:
         for field in self._struct_type._timestamp_fields:
diff --git a/databento/common/symbology.py b/databento/common/symbology.py
index 731ead9b..4b357fa4 100644
--- a/databento/common/symbology.py
+++ b/databento/common/symbology.py
@@ -1,11 +1,9 @@
 from __future__ import annotations
 
-import bisect
 import csv
 import datetime as dt
 import functools
 import json
-from collections import defaultdict
 from collections.abc import Mapping
 from io import TextIOWrapper
 from os import PathLike
@@ -15,12 +13,14 @@
 from typing import NamedTuple
 from typing import TextIO
 
+import numpy as np
 import pandas as pd
 from databento_dbn import UNDEF_TIMESTAMP
 from databento_dbn import Metadata
 from databento_dbn import SType
 from databento_dbn import SymbolMappingMsg
 from databento_dbn import SymbolMappingMsgV1
+from numpy.typing import NDArray
 
 from databento.common.parsing import datetime_to_unix_nanoseconds
 from databento.common.validation import validate_path
@@ -187,20 +187,24 @@ class InstrumentMap:
     )
 
     def __init__(self) -> None:
-        self._data: dict[int, list[MappingInterval]] = defaultdict(list)
+        self._instrument_ids: NDArray[np.uint64]
+        self._starts: NDArray[np.datetime64[dt.date]]
+        self._ends: NDArray[np.datetime64[dt.date]]
+        self._symbols: NDArray[np.datetime64[dt.date]]
+
+        self.clear()  # initialize
 
     def clear(self) -> None:
         """
         Clear all mapping data.
         """
-        self._data.clear()
+        self._instrument_ids = np.empty(shape=[0], dtype=np.uint64)
+        self._starts = np.empty(shape=[0], dtype="datetime64[D]")
+        self._ends = np.empty(shape=[0], dtype="datetime64[D]")
+        self._symbols = np.empty(shape=[0], dtype="object")
 
     @functools.lru_cache
-    def resolve(
-        self,
-        instrument_id: int,
-        date: dt.date,
-    ) -> str | None:
+    def resolve(self, instrument_id: int, date: dt.date) -> str | None:
         """
         Resolve an instrument ID on a particular date to the mapped symbol, or
         `None` if there is not mapping on that date.
@@ -220,13 +224,106 @@ def resolve(
             If the InstrumentMap is empty.
             If the InstrumentMap does not contain a mapping for the `instrument_id`.
 
+        See Also
+        --------
+        InstrumentMap.resolve_many()
+
         """
-        mappings = self._data[int(instrument_id)]
-        for entry in mappings:
-            if entry.start_date <= date < entry.end_date:
-                return entry.symbol
+        if self._instrument_ids.size == 0:
+            return None
+
+        np_date = np.datetime64(date)
+
+        key_ids = self._instrument_ids
+        key_starts = self._starts
+
+        left = np.searchsorted(key_ids, instrument_id, side="left")
+        right = np.searchsorted(key_ids, instrument_id, side="right")
+
+        if left == right:
+            return None
+
+        pos = np.searchsorted(key_starts[left:right], np_date, side="right") - 1
+
+        if pos < 0:
+            return None
+
+        pos += left
+
+        if np_date < self._ends[pos]:
+            return self._symbols[pos]
+
         return None
 
+    def resolve_many(
+        self,
+        instrument_ids: NDArray[np.uint64],
+        dates: NDArray[np.datetime64[dt.date]],
+    ) -> NDArray[np.object_]:
+        """
+        Resolve several instrument ID and date pairs to their mapped symbols.
+        This method is optimal for vectorizing the symbology resolution
+        operation.
+
+        Parameters
+        ----------
+        instrument_ids : NDArray[np.uint64]
+            The collection of instrument IDs.
+        dates : NDArray[np.datetime64[dt.date]]
+            The collection of dates for each instrument ID.
+
+        Returns
+        -------
+        NDArray[np.object_]
+            The collection of resolved symbols.
+
+        See Also
+        --------
+        InstrumentMap.resolve()
+
+        """
+        if instrument_ids.shape != dates.shape:
+            raise ValueError("instrument_ids and dates must have the same shape")
+
+        result = np.full(instrument_ids.shape, None, dtype=object)
+        if instrument_ids.size == 0 or self._instrument_ids.size == 0:
+            return result
+
+        # Get unique instrument, date combinations and just resolve those
+        query_array = np.stack([instrument_ids, dates.view("uint64")], axis=1)
+        unique_queries, inverse = np.unique(query_array, axis=0, return_inverse=True)
+        unique_inst = unique_queries[:, 0]
+        unique_dates = unique_queries[:, 1].view("datetime64[D]")
+
+        resolved_unique = np.full(unique_inst.shape, None, dtype=object)
+        for inst_id in np.unique(unique_inst):
+            # In this loop we resolve all dates for each instrument
+            mask = unique_inst == inst_id
+            idx = np.nonzero(mask)[0]
+
+            # Slice the mapping arrays
+            left = np.searchsorted(self._instrument_ids, inst_id, side="left")
+            right = np.searchsorted(self._instrument_ids, inst_id, side="right")
+
+            if left == right:
+                continue  # no intervals for this instrument
+
+            starts = self._starts[left:right]
+            ends = self._ends[left:right]
+            symbols = self._symbols[left:right]
+
+            # Get all the dates to resolve
+            dates_to_check = unique_dates[idx]
+
+            resolved = np.searchsorted(starts, dates_to_check, side="right") - 1
+            valid = (resolved >= 0) & (dates_to_check < ends[resolved])
+            resolved_unique[idx[valid]] = symbols[resolved[valid]]
+
+        # Map the resolved symbols back to the result
+        result[:] = resolved_unique[inverse]
+
+        return result
+
     def insert_metadata(self, metadata: Metadata) -> None:
         """
         Insert mappings from DBN Metadata.
@@ -246,6 +343,8 @@ def insert_metadata(self, metadata: Metadata) -> None:
             # Nothing to do
             return
 
+        instrument_ids: list[int] = []
+        intervals: list[MappingInterval] = []
         for symbol_in, entries in metadata.mappings.items():
             for entry in entries:
                 if not entry["symbol"]:
@@ -266,8 +365,8 @@ def insert_metadata(self, metadata: Metadata) -> None:
                     stype_out=metadata.stype_out,
                 )
 
-                self._insert_interval(
-                    instrument_id,
+                instrument_ids.append(instrument_id)
+                intervals.append(
                     MappingInterval(
                         start_date=start_date,
                         end_date=end_date,
@@ -275,6 +374,8 @@ def insert_metadata(self, metadata: Metadata) -> None:
                     ),
                 )
 
+        self._insert_intervals(instrument_ids=instrument_ids, intervals=intervals)
+
     def insert_symbol_mapping_msg(
         self,
         msg: SymbolMappingMsg | SymbolMappingMsgV1,
@@ -310,13 +411,15 @@ def insert_symbol_mapping_msg(
         else:
             symbol = msg.stype_out_symbol
 
-        self._insert_interval(
-            msg.instrument_id,
-            MappingInterval(
-                start_date=start_ts.date(),
-                end_date=end_ts.date(),
-                symbol=symbol,
-            ),
+        self._insert_intervals(
+            instrument_ids=[msg.instrument_id],
+            intervals=[
+                MappingInterval(
+                    start_date=start_ts.date(),
+                    end_date=end_ts.date(),
+                    symbol=symbol,
+                ),
+            ],
         )
 
     def insert_json(
@@ -360,6 +463,8 @@ def insert_json(
         stype_in = SType(mapping["stype_in"])
         stype_out = SType(mapping["stype_out"])
 
+        instrument_ids: list[int] = []
+        intervals: list[MappingInterval] = []
         for symbol_in, entries in mapping["result"].items():
             for entry in entries:
                 if not all(k in entry for k in self.SYMBOLOGY_RESULT_KEYS):
@@ -385,14 +490,10 @@ def insert_json(
                     stype_out=stype_out,
                 )
 
-                self._insert_interval(
-                    instrument_id,
-                    MappingInterval(
-                        start_date=start_date,
-                        end_date=end_date,
-                        symbol=symbol,
-                    ),
-                )
+                instrument_ids.append(instrument_id)
+                intervals.append(MappingInterval(start_date, end_date, symbol))
+
+        self._insert_intervals(instrument_ids=instrument_ids, intervals=intervals)
 
     def map_symbols_csv(
         self,
@@ -467,7 +568,7 @@ def map_symbols_csv(
                     if instrument_id is None:
                         row["symbol"] = ""
                     else:
-                        row["symbol"] = self.resolve(instrument_id, date)
+                        row["symbol"] = self.resolve(int(instrument_id), date)
 
                     writer.writerow(row)
 
@@ -531,7 +632,7 @@ def map_symbols_json(
                     ts = datetime_to_unix_nanoseconds(ts_field)
 
                     date = pd.Timestamp(ts, unit="ns").date()
-                    record["symbol"] = self.resolve(instrument_id, date)
+                    record["symbol"] = self.resolve(int(instrument_id), date)
 
                     json.dump(
                         record,
@@ -542,24 +643,69 @@ def map_symbols_json(
 
         return out_file_valid
 
-    def _insert_interval(self, instrument_id: int, interval: MappingInterval) -> None:
+    def build_symbol_map(self) -> dict[int, list[tuple[dt.date, dt.date, str]]]:
         """
-        Insert a SymbolInterval into the map.
+        Build a simple symbol map. This is to interface with methods of
+        databento-dbn.
 
-        This ensures elements are inserted in order and prevents
-        duplicate entries.
+        Returns
+        -------
+        dict[int, list[tuple[dt.date, dt.date, str]]]
 
         """
-        mappings = self._data[instrument_id]
-        insert_position = bisect.bisect_left(
-            self._data[instrument_id],
-            interval,
-        )
+        symbol_map: dict[int, list[tuple[dt.date, dt.date, str]]] = {}
+
+        if self._instrument_ids.size == 0:
+            return symbol_map
+
+        unique_ids, start_indices = np.unique(self._instrument_ids, return_index=True)
+        end_indices = np.append(start_indices[1:], self._instrument_ids.size)
+
+        for inst_id, start, end in zip(unique_ids, start_indices, end_indices):
+            starts = self._starts[start:end]
+            ends = self._ends[start:end]
+            symbols = self._symbols[start:end]
 
-        if insert_position < len(mappings) and mappings[insert_position] == interval:
-            return  # this mapping is already present
+            symbol_map[inst_id] = [
+                (_to_date(s), _to_date(e), str(sym)) for s, e, sym in zip(starts, ends, symbols)
+            ]
 
-        mappings.insert(insert_position, interval)
+        return symbol_map
+
+    def _insert_intervals(
+        self,
+        instrument_ids: list[int],
+        intervals: list[MappingInterval],
+    ) -> None:
+        new_ids = np.array(instrument_ids, dtype=np.uint64)
+        new_starts = np.array([i.start_date for i in intervals], dtype="datetime64[D]")
+        new_ends = np.array([i.end_date for i in intervals], dtype="datetime64[D]")
+        new_symbols = np.array([i.symbol for i in intervals], dtype=object)
+
+        n = new_ids.size
+        if not (n == new_starts.size == new_ends.size == new_symbols.size):
+            raise ValueError("All input arrays must have the same length")
+
+        if n == 0:
+            return  # nothing to insert
+
+        self._instrument_ids = np.concatenate([self._instrument_ids, new_ids])
+        self._starts = np.concatenate([self._starts, new_starts])
+        self._ends = np.concatenate([self._ends, new_ends])
+        self._symbols = np.concatenate([self._symbols, new_symbols])
+
+        order = np.lexsort((self._starts, self._instrument_ids))
+        self._instrument_ids = self._instrument_ids[order]
+        self._starts = self._starts[order]
+        self._ends = self._ends[order]
+        self._symbols = self._symbols[order]
+
+
+def _to_date(value: np.datetime64[dt.date]) -> dt.date:
+    py_dt = value.astype("O")  # yields datetime.datetime or datetime.date
+    if isinstance(py_dt, dt.datetime):
+        return py_dt.date()
+    return py_dt
 
 
 def _resolve_mapping_tuple(
diff --git a/pyproject.toml b/pyproject.toml
index 160b313a..2245710d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,7 +74,6 @@ no_strict_optional = true
 warn_no_return = true
 warn_unused_configs = true
 warn_unused_ignores = true
-plugins = ["numpy.typing.mypy_plugin"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tests/test_common_symbology.py b/tests/test_common_symbology.py
index 49467c54..a14c1743 100644
--- a/tests/test_common_symbology.py
+++ b/tests/test_common_symbology.py
@@ -8,6 +8,7 @@
 from typing import NamedTuple
 
 import databento_dbn
+import numpy as np
 import pandas as pd
 import pytest
 from databento_dbn import UNDEF_TIMESTAMP
@@ -57,7 +58,7 @@ def fixture_instrument_map() -> InstrumentMap:
 def fixture_start_date() -> pd.Timestamp:
     """
     Fixture for a start date. This is one day behind the date provided by the
-    `start_date` fixture.
+    `end_date` fixture.
 
     Returns
     -------
@@ -199,7 +200,10 @@ def test_instrument_map(
     Test the creation of an InstrumentMap.
     """
     # Arrange, Act, Assert
-    assert instrument_map._data == {}
+    assert len(instrument_map._starts) == 0
+    assert len(instrument_map._ends) == 0
+    assert len(instrument_map._instrument_ids) == 0
+    assert len(instrument_map._symbols) == 0
 
 
 @pytest.mark.parametrize(
@@ -323,7 +327,10 @@ def test_instrument_map_insert_metadata_empty_mappings(
     instrument_map.insert_metadata(metadata)
 
     # Assert
-    assert instrument_map._data == {}
+    assert len(instrument_map._starts) == 0
+    assert len(instrument_map._ends) == 0
+    assert len(instrument_map._instrument_ids) == 0
+    assert len(instrument_map._symbols) == 0
 
 
 @pytest.mark.parametrize(
@@ -570,7 +577,10 @@ def test_instrument_map_insert_symbology_response_empty_mapping(
     instrument_map.insert_json(sym_resp)
 
     # Assert
-    assert instrument_map._data == {}
+    assert len(instrument_map._starts) == 0
+    assert len(instrument_map._ends) == 0
+    assert len(instrument_map._instrument_ids) == 0
+    assert len(instrument_map._symbols) == 0
 
 
 @pytest.mark.parametrize(
@@ -709,7 +719,10 @@ def test_instrument_map_insert_json_str_empty_mapping(
     instrument_map.insert_json(json.dumps(sym_resp))
 
     # Assert
-    assert instrument_map._data == {}
+    assert len(instrument_map._starts) == 0
+    assert len(instrument_map._ends) == 0
+    assert len(instrument_map._instrument_ids) == 0
+    assert len(instrument_map._symbols) == 0
 
 
 @pytest.mark.parametrize(
@@ -793,13 +806,16 @@ def test_instrument_map_resolve_with_date(
     symbol = "test_1"
     instrument_id = 1234
 
-    instrument_map._data[instrument_id] = [
-        MappingInterval(
-            start_date=start_date.date(),
-            end_date=end_date.date(),
-            symbol=symbol,
-        ),
-    ]
+    instrument_map._insert_intervals(
+        [instrument_id],
+        [
+            MappingInterval(
+                start_date=start_date.date(),
+                end_date=end_date.date(),
+                symbol=symbol,
+            ),
+        ],
+    )
 
     # Assert
     assert (
@@ -813,42 +829,6 @@ def test_instrument_map_resolve_with_date(
     assert instrument_map.resolve(instrument_id, end_date.date()) is None
 
 
-def test_instrument_map_ignore_duplicate(
-    instrument_map: InstrumentMap,
-    start_date: pd.Timestamp,
-    end_date: pd.Timestamp,
-) -> None:
-    """
-    Test that a duplicate entry is not inserted into an InstrumentMap.
-    """
-    # Arrange, Act
-    symbol = "test_1"
-    instrument_id = 1234
-
-    instrument_map._data[instrument_id] = [
-        MappingInterval(
-            start_date=start_date.date(),
-            end_date=end_date.date(),
-            symbol=symbol,
-        ),
-    ]
-
-    # Act, Assert
-    assert len(instrument_map._data[instrument_id]) == 1
-
-    msg = create_symbol_mapping_message(
-        instrument_id=instrument_id,
-        stype_in_symbol=symbol,
-        stype_out_symbol=instrument_id,
-        start_ts=start_date,
-        end_ts=end_date,
-    )
-
-    instrument_map.insert_symbol_mapping_msg(msg)
-
-    assert len(instrument_map._data[instrument_id]) == 1
-
-
 @pytest.mark.parametrize(
     "dataset",
     [
@@ -1000,3 +980,77 @@ def test_insert_symbology_json_mismatched_stypes(
     # Assert
     assert store.to_df().iloc[0]["symbol"] == "NVDA"
     assert store.to_df().iloc[0]["instrument_id"] == 6155
+
+
+def test_instrument_map_resolve() -> None:
+    """
+    Test a synthetic symbology of symbols, resolving an instrument ID across
+    many dates where the symbol it points to rotates every day.
+    """
+    # Arrange
+    instrument_map = InstrumentMap()
+    start_date = pd.Timestamp("2020-01-01")
+    end_date = pd.Timestamp("2021-01-01")
+
+    instrument_ids = np.arange(100).astype("uint64")
+    symbols = instrument_ids.astype(str)
+    dates = pd.date_range(start_date, end_date, freq="D", inclusive="left").to_numpy(
+        "datetime64[D]",
+    )
+
+    # Act
+    for offset, date in enumerate(dates):
+        instrument_map._insert_intervals(
+            np.roll(instrument_ids, offset).tolist(),
+            [
+                MappingInterval(
+                    start_date=date,
+                    end_date=date + pd.Timedelta(days=1),
+                    symbol=symbol,
+                )
+                for symbol in symbols
+            ],
+        )
+
+    # Resolve instrument ID 1 on every date
+    resolve_many_result = instrument_map.resolve_many(
+        np.ones(dtype="uint64", shape=dates.shape),
+        dates,
+    )
+
+    # Assert
+    resolve_results = []
+    for date in dates:
+        resolve_results.append(instrument_map.resolve(1, date))
+
+    assert resolve_results == resolve_many_result.tolist()
+
+
+@pytest.mark.parametrize(
+    "dataset",
+    Dataset,
+)
+def test_instrument_map_resolve_definition(
+    test_data_path: Callable[[Dataset, Schema], pathlib.Path],
+    dataset: Dataset,
+) -> None:
+    """
+    Test that symbology resolved with `InstrumentMap.resolve()` and
+    `InstrumentMap.resolve_many()` agree and are both correct using stub
+    Definition data.
+    """
+    # Arrange
+    store = DBNStore.from_file(test_data_path(dataset, Schema.DEFINITION))
+    df = store.to_df(map_symbols=False)
+
+    # Act
+    instrument_ids = df["instrument_id"]
+    dates = df.index.date
+    expected = df["raw_symbol"]
+    resolve_many_result = store._instrument_map.resolve_many(instrument_ids, dates)
+
+    # Assert
+    assert (resolve_many_result == expected).all
+    for i, (instrument_id, date) in enumerate(zip(instrument_ids, dates)):
+        resolve_result = store._instrument_map.resolve(instrument_id, date)
+        assert resolve_many_result[i] == resolve_result

From 71aaec214a503e284999e55af97c3e14b043373d Mon Sep 17 00:00:00 2001
From: Nick Macholl <nick@databento.com>
Date: Tue, 16 Dec 2025 14:27:10 -0800
Subject: [PATCH 3/3] VER: Release 0.68.1

---
 CHANGELOG.md         | 2 +-
 databento/version.py | 2 +-
 pyproject.toml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8c0ee46d..fde69e4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## 0.69.0
+## 0.68.1 - 2025-12-16
 
 #### Enhancements
 - Improved the performance of `DBNStore.to_df()` symbol mapping
diff --git a/databento/version.py b/databento/version.py
index 251b74b8..993101b4 100644
--- a/databento/version.py
+++ b/databento/version.py
@@ -1 +1 @@
-__version__ = "0.68.0"
+__version__ = "0.68.1"
diff --git a/pyproject.toml b/pyproject.toml
index 2245710d..e06ebe16 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "databento"
-version = "0.68.0"
+version = "0.68.1"
 description = "Official Python client library for Databento"
 readme = "README.md"
 requires-python = ">=3.10"