From 3148339ee7866597fa30a8747f579262bdf4089a Mon Sep 17 00:00:00 2001 From: ready-research <72916209+ready-research@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:10:25 +0530 Subject: [PATCH 1/3] Update settings.py --- modelscan/settings.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/modelscan/settings.py b/modelscan/settings.py index 56b3a796..5e1568a7 100644 --- a/modelscan/settings.py +++ b/modelscan/settings.py @@ -130,6 +130,38 @@ class SupportedModelFormats: "pdb": "*", "shutil": "*", "asyncio": "*", + "commands": "*", # Python 2 precursor to subprocess + "functools": ["partial"], + "numpy.testing._private.utils": "*", + "ssl": "*", # DNS exfiltration via ssl.get_server_certificate() + "ensurepip": ["_run_pip"], + "idlelib.autocomplete": ["AutoComplete.get_entity", "AutoComplete.fetch_completions"], + "idlelib.calltip": ["Calltip.fetch_tip", "get_entity"], + "idlelib.debugobj": ["ObjectTreeItem.SetText"], + "idlelib.pyshell": ["ModifiedInterpreter.runcode", "ModifiedInterpreter.runcommand"], + "idlelib.run": ["Executive.runcode"], + "lib2to3.pgen2.grammar": ["Grammar.loads"], + "lib2to3.pgen2.pgen": ["ParserGenerator.make_label"], + "code": ["InteractiveInterpreter.runcode"], + "cProfile": ["runctx", "run"], + "doctest": ["debug_script"], + "profile": ["Profile.run", "Profile.runctx"], + "pydoc": ["pipepager"], + "timeit": "*", + "trace": ["Trace.run", "Trace.runctx"], + "venv": "*", + "pip": "*", + # PyTorch-related risky globals + "torch._dynamo.guards": ["GuardBuilder.get"], + "torch._inductor.codecache": "compile_file", + "torch.fx.experimental.symbolic_shapes": ["ShapeEnv.evaluate_guards_expression"], + "torch.jit.unsupported_tensor_ops": ["execWrapper"], + "torch.serialization": "load", + "torch.utils._config_module": ["ConfigModule.load_config"], + "torch.utils.bottleneck.__main__": ["run_cprofile", "run_autograd_prof"], + "torch.utils.collect_env": ["run"], + "torch.utils.data.datapipes.utils.decoder": ["basichandlers"], + "asyncio.unix_events": ["_UnixSubprocessTransport._start"], }, "HIGH": { "webbrowser": "*", # Includes webbrowser.open() From 07c223511e9164470d8a042668bf0a58c0b96a24 Mon Sep 17 00:00:00 2001 From: ready-research <72916209+ready-research@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:11:21 +0530 Subject: [PATCH 2/3] Update modelscan.py --- modelscan/modelscan.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/modelscan/modelscan.py b/modelscan/modelscan.py index 4442f5eb..1c900bac 100644 --- a/modelscan/modelscan.py +++ b/modelscan/modelscan.py @@ -98,18 +98,35 @@ def _iterate_models(self, model_path: Path) -> Generator[Model, None, None]: with zipfile.ZipFile(model.get_stream(), "r") as zip: file_names = zip.namelist() for file_name in file_names: - with zip.open(file_name, "r") as file_io: - file_name = f"{model.get_source()}:{file_name}" - if _is_zipfile(file_name, data=file_io): - self._errors.append( - NestedZipError( - "ModelScan does not support nested zip files.", - Path(file_name), + try: + with zip.open(file_name, "r") as file_io: + file_name = f"{model.get_source()}:{file_name}" + if _is_zipfile(file_name, data=file_io): + self._errors.append( + NestedZipError( + "ModelScan does not support nested zip files.", + Path(file_name), + ) ) + continue + + yield Model(file_name, file_io) + except (KeyError, RuntimeError, zipfile.BadZipFile) as e: + logger.debug( + "Skipping file %s in zip %s due to error", + file_name, + str(model.get_source()), + exc_info=True, + ) + self._skipped.append( + ModelScanSkipped( + "ModelScan", + SkipCategories.BAD_ZIP, + f"Skipping file in zip due to error: {e}", + f"{model.get_source()}:{file_name}", ) - continue - - yield Model(file_name, file_io) + ) + continue except (zipfile.BadZipFile, RuntimeError) as e: logger.debug( "Skipping zip file %s, due to error", From 09f922378d2fb20cf57232595834c8be6a8b54fe Mon Sep 17 00:00:00 2001 From: ready-research <72916209+ready-research@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:12:30 +0530 Subject: [PATCH 3/3] Update picklescanner.py --- modelscan/tools/picklescanner.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/modelscan/tools/picklescanner.py b/modelscan/tools/picklescanner.py index 44c4e2a0..a9893467 100644 --- a/modelscan/tools/picklescanner.py +++ b/modelscan/tools/picklescanner.py @@ -55,17 +55,15 @@ def _list_globals( memo: Dict[Union[int, str], str] = {} # Scan the data for pickle buffers, stopping when parsing fails or stops making progress last_byte = b"dummy" + parsing_pkl_error: Optional[str] = None while last_byte != b"": # List opcodes + ops: List[Tuple[Any, Any, Union[int, None]]] = [] try: - ops: List[Tuple[Any, Any, Union[int, None]]] = list( - pickletools.genops(data) - ) + for op in pickletools.genops(data): + ops.append(op) except Exception as e: - # Given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle. - # Thus return the already found globals in the error & let the caller decide what to do. - globals_opt = globals if len(globals) > 0 else None - raise GenOpsError(str(e), globals_opt) + parsing_pkl_error = str(e) last_byte = data.read(1) data.seek(-1, 1) @@ -84,7 +82,7 @@ def _list_globals( globals.add(tuple(op_value.split(" ", 1))) elif op_name == "STACK_GLOBAL": values: List[str] = [] - for offset in range(1, n): + for offset in range(1, n + 1): if ops[n - offset][0].name in [ "MEMOIZE", "PUT", @@ -99,6 +97,9 @@ def _list_globals( "UNICODE", "BINUNICODE", "BINUNICODE8", + "STRING", + "BINSTRING", + "SHORT_BINSTRING", ]: logger.debug( "Presence of non-string opcode, categorizing as an unknown dangerous import" @@ -116,6 +117,11 @@ def _list_globals( if not multiple_pickles: break + if parsing_pkl_error is not None: + # Return the already found globals in the error & let the caller decide what to do. + globals_opt = globals if len(globals) > 0 else None + raise GenOpsError(parsing_pkl_error, globals_opt) + return globals