From 3148339ee7866597fa30a8747f579262bdf4089a Mon Sep 17 00:00:00 2001
From: ready-research <72916209+ready-research@users.noreply.github.com>
Date: Wed, 27 Aug 2025 19:10:25 +0530
Subject: [PATCH 1/3] Update settings.py

---
 modelscan/settings.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/modelscan/settings.py b/modelscan/settings.py
index 56b3a796..5e1568a7 100644
--- a/modelscan/settings.py
+++ b/modelscan/settings.py
@@ -130,6 +130,38 @@ class SupportedModelFormats:
             "pdb": "*",
             "shutil": "*",
             "asyncio": "*",
+            "commands": "*",  # Python 2 precursor to subprocess
+            "functools": ["partial"],
+            "numpy.testing._private.utils": "*",
+            "ssl": "*",  # DNS exfiltration via ssl.get_server_certificate()
+            "ensurepip": ["_run_pip"],
+            "idlelib.autocomplete": ["AutoComplete.get_entity", "AutoComplete.fetch_completions"],
+            "idlelib.calltip": ["Calltip.fetch_tip", "get_entity"],
+            "idlelib.debugobj": ["ObjectTreeItem.SetText"],
+            "idlelib.pyshell": ["ModifiedInterpreter.runcode", "ModifiedInterpreter.runcommand"],
+            "idlelib.run": ["Executive.runcode"],
+            "lib2to3.pgen2.grammar": ["Grammar.loads"],
+            "lib2to3.pgen2.pgen": ["ParserGenerator.make_label"],
+            "code": ["InteractiveInterpreter.runcode"],
+            "cProfile": ["runctx", "run"],
+            "doctest": ["debug_script"],
+            "profile": ["Profile.run", "Profile.runctx"],
+            "pydoc": ["pipepager"],
+            "timeit": "*",
+            "trace": ["Trace.run", "Trace.runctx"],
+            "venv": "*",
+            "pip": "*",
+            # PyTorch-related risky globals
+            "torch._dynamo.guards": ["GuardBuilder.get"],
+            "torch._inductor.codecache": "compile_file",
+            "torch.fx.experimental.symbolic_shapes": ["ShapeEnv.evaluate_guards_expression"],
+            "torch.jit.unsupported_tensor_ops": ["execWrapper"],
+            "torch.serialization": "load",
+            "torch.utils._config_module": ["ConfigModule.load_config"],
+            "torch.utils.bottleneck.__main__": ["run_cprofile", "run_autograd_prof"],
+            "torch.utils.collect_env": ["run"],
+            "torch.utils.data.datapipes.utils.decoder": ["basichandlers"],
+            "asyncio.unix_events": ["_UnixSubprocessTransport._start"],
         },
         "HIGH": {
             "webbrowser": "*",  # Includes webbrowser.open()

From 07c223511e9164470d8a042668bf0a58c0b96a24 Mon Sep 17 00:00:00 2001
From: ready-research <72916209+ready-research@users.noreply.github.com>
Date: Wed, 27 Aug 2025 19:11:21 +0530
Subject: [PATCH 2/3] Update modelscan.py

---
 modelscan/modelscan.py | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/modelscan/modelscan.py b/modelscan/modelscan.py
index 4442f5eb..1c900bac 100644
--- a/modelscan/modelscan.py
+++ b/modelscan/modelscan.py
@@ -98,18 +98,35 @@ def _iterate_models(self, model_path: Path) -> Generator[Model, None, None]:
                     with zipfile.ZipFile(model.get_stream(), "r") as zip:
                         file_names = zip.namelist()
                         for file_name in file_names:
-                            with zip.open(file_name, "r") as file_io:
-                                file_name = f"{model.get_source()}:{file_name}"
-                                if _is_zipfile(file_name, data=file_io):
-                                    self._errors.append(
-                                        NestedZipError(
-                                            "ModelScan does not support nested zip files.",
-                                            Path(file_name),
+                            try:
+                                with zip.open(file_name, "r") as file_io:
+                                    file_name = f"{model.get_source()}:{file_name}"
+                                    if _is_zipfile(file_name, data=file_io):
+                                        self._errors.append(
+                                            NestedZipError(
+                                                "ModelScan does not support nested zip files.",
+                                                Path(file_name),
+                                            )
                                         )
+                                        continue
+
+                                    yield Model(file_name, file_io)
+                            except (KeyError, RuntimeError, zipfile.BadZipFile) as e:
+                                logger.debug(
+                                    "Skipping file %s in zip %s due to error",
+                                    file_name,
+                                    str(model.get_source()),
+                                    exc_info=True,
+                                )
+                                self._skipped.append(
+                                    ModelScanSkipped(
+                                        "ModelScan",
+                                        SkipCategories.BAD_ZIP,
+                                        f"Skipping file in zip due to error: {e}",
+                                        f"{model.get_source()}:{file_name}",
                                     )
-                                    continue
-
-                                yield Model(file_name, file_io)
+                                )
+                                continue
                 except (zipfile.BadZipFile, RuntimeError) as e:
                     logger.debug(
                         "Skipping zip file %s, due to error",

From 09f922378d2fb20cf57232595834c8be6a8b54fe Mon Sep 17 00:00:00 2001
From: ready-research <72916209+ready-research@users.noreply.github.com>
Date: Wed, 27 Aug 2025 19:12:30 +0530
Subject: [PATCH 3/3] Update picklescanner.py

---
 modelscan/tools/picklescanner.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/modelscan/tools/picklescanner.py b/modelscan/tools/picklescanner.py
index 44c4e2a0..a9893467 100644
--- a/modelscan/tools/picklescanner.py
+++ b/modelscan/tools/picklescanner.py
@@ -55,17 +55,15 @@ def _list_globals(
     memo: Dict[Union[int, str], str] = {}
     # Scan the data for pickle buffers, stopping when parsing fails or stops making progress
     last_byte = b"dummy"
+    parsing_pkl_error: Optional[str] = None
     while last_byte != b"":
         # List opcodes
+        ops: List[Tuple[Any, Any, Union[int, None]]] = []
         try:
-            ops: List[Tuple[Any, Any, Union[int, None]]] = list(
-                pickletools.genops(data)
-            )
+            for op in pickletools.genops(data):
+                ops.append(op)
         except Exception as e:
-            # Given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
-            # Thus return the already found globals in the error & let the caller decide what to do.
-            globals_opt = globals if len(globals) > 0 else None
-            raise GenOpsError(str(e), globals_opt)
+            parsing_pkl_error = str(e)
 
         last_byte = data.read(1)
         data.seek(-1, 1)
@@ -84,7 +82,7 @@ def _list_globals(
                 globals.add(tuple(op_value.split(" ", 1)))
             elif op_name == "STACK_GLOBAL":
                 values: List[str] = []
-                for offset in range(1, n):
+                for offset in range(1, n + 1):
                     if ops[n - offset][0].name in [
                         "MEMOIZE",
                         "PUT",
@@ -99,6 +97,9 @@ def _list_globals(
                         "UNICODE",
                         "BINUNICODE",
                         "BINUNICODE8",
+                        "STRING",
+                        "BINSTRING",
+                        "SHORT_BINSTRING",
                     ]:
                         logger.debug(
                             "Presence of non-string opcode, categorizing as an unknown dangerous import"
@@ -116,6 +117,11 @@ def _list_globals(
         if not multiple_pickles:
             break
 
+        if parsing_pkl_error is not None:
+            # Return the already found globals in the error & let the caller decide what to do.
+            globals_opt = globals if len(globals) > 0 else None
+            raise GenOpsError(parsing_pkl_error, globals_opt)
+
     return globals