From 668e69a2f065d462d922593ae9f86574a59f37f5 Mon Sep 17 00:00:00 2001 From: Azam Din Date: Sat, 5 Nov 2022 02:03:45 +0000 Subject: [PATCH 1/2] Changed mlflow logger to upload model_last artifact too --- spacy_loggers/mlflow.py | 55 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/spacy_loggers/mlflow.py b/spacy_loggers/mlflow.py index 24f943d..0dda547 100644 --- a/spacy_loggers/mlflow.py +++ b/spacy_loggers/mlflow.py @@ -9,8 +9,20 @@ from spacy import Language from spacy import load from spacy.training.loggers import console_logger +from pathlib import Path +from loguru import logger +# logger.remove() +# logger.add('file.log', filter=__name__, level='DEBUG') +class ModelDir: + def __init__(self) -> None: + self.path = None + + def update(self, path: str) -> None: + self.path = path + +@logger.catch # entry point: spacy.MLflowLogger.v1 def mlflow_logger_v1( run_id: Optional[str] = None, @@ -19,6 +31,7 @@ def mlflow_logger_v1( nested: bool = False, tags: Optional[Dict[str, Any]] = None, remove_config_values: List[str] = [], + log_latest_dir: bool = True, ): try: import mlflow @@ -33,7 +46,7 @@ def mlflow_logger_v1( ) console = console_logger(progress_bar=False) - + def setup_logger( nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr ) -> Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]: @@ -58,6 +71,26 @@ def setup_logger( mlflow.log_params({k.replace("@", ""): v for k, v in batch}) console_log_step, console_finalize = console(nlp, stdout, stderr) + + if log_latest_dir: + latest_model = ModelDir() + + def log_model(path, name): + logger.debug(f'logging model: {path}') + mlflow.log_artifacts( + path, + name + ) + logger.debug('model artifact uploaded') + # Can't use below as the mlflow.spacy.log_model method seems to abort abruptly + # Only seems to work if it is called from within log_step but we prob don't want + # To force an upload every step. I don't understand this behaviour + # Safer to just use log_artifacts to upload the whole folder + # mlflow.spacy.log_model( + # load(path), + # 'model_last_spacy' + # ) + # logger.debug(f'model spacy model logged') def log_step(info: Optional[Dict[str, Any]]): console_log_step(info) @@ -66,23 +99,37 @@ def log_step(info: Optional[Dict[str, Any]]): other_scores = info["other_scores"] losses = info["losses"] output_path = info.get("output_path", None) + if log_latest_dir: + latest_model.update(output_path) + if score is not None: - mlflow.log_metric("score", score) + mlflow.log_metric("score", score, info["step"]) if losses: - mlflow.log_metrics({f"loss_{k}": v for k, v in losses.items()}) + mlflow.log_metrics({f"loss_{k}": v for k, v in losses.items()}, info["step"]) if isinstance(other_scores, dict): mlflow.log_metrics( { k: v for k, v in util.dict_to_dot(other_scores).items() if isinstance(v, float) or isinstance(v, int) - } + }, + info["step"] ) if output_path and score == max(info["checkpoints"])[0]: nlp = load(output_path) mlflow.spacy.log_model(nlp, "best") + logger.debug(f'Uploading "model_best" artifact from {output_path}') + log_model(output_path, 'model_best') + def finalize() -> None: + + if log_latest_dir: + logger.debug(f'Uploading "model_last" artifact from {latest_model.path}') + log_model(latest_model.path, 'model_last') + logger.debug('Uploaded model_last') + + print('End run') console_finalize() mlflow.end_run() From 03e103ac3ba0010f4fdc7f0e53c0677d8c9405d5 Mon Sep 17 00:00:00 2001 From: Azam Din Date: Sat, 5 Nov 2022 02:11:25 +0000 Subject: [PATCH 2/2] Stripped out unnecessary logging statements --- spacy_loggers/mlflow.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/spacy_loggers/mlflow.py b/spacy_loggers/mlflow.py index 0dda547..ef95383 100644 --- a/spacy_loggers/mlflow.py +++ b/spacy_loggers/mlflow.py @@ -9,10 +9,6 @@ from spacy import Language from spacy import load from spacy.training.loggers import console_logger -from pathlib import Path -from loguru import logger -# logger.remove() -# logger.add('file.log', filter=__name__, level='DEBUG') class ModelDir: @@ -22,7 +18,6 @@ def __init__(self) -> None: def update(self, path: str) -> None: self.path = path -@logger.catch # entry point: spacy.MLflowLogger.v1 def mlflow_logger_v1( run_id: Optional[str] = None, @@ -76,21 +71,10 @@ def setup_logger( latest_model = ModelDir() def log_model(path, name): - logger.debug(f'logging model: {path}') mlflow.log_artifacts( path, name ) - logger.debug('model artifact uploaded') - # Can't use below as the mlflow.spacy.log_model method seems to abort abruptly - # Only seems to work if it is called from within log_step but we prob don't want - # To force an upload every step. I don't understand this behaviour - # Safer to just use log_artifacts to upload the whole folder - # mlflow.spacy.log_model( - # load(path), - # 'model_last_spacy' - # ) - # logger.debug(f'model spacy model logged') def log_step(info: Optional[Dict[str, Any]]): console_log_step(info) @@ -118,16 +102,13 @@ def log_step(info: Optional[Dict[str, Any]]): if output_path and score == max(info["checkpoints"])[0]: nlp = load(output_path) mlflow.spacy.log_model(nlp, "best") - logger.debug(f'Uploading "model_best" artifact from {output_path}') log_model(output_path, 'model_best') def finalize() -> None: if log_latest_dir: - logger.debug(f'Uploading "model_last" artifact from {latest_model.path}') log_model(latest_model.path, 'model_last') - logger.debug('Uploaded model_last') print('End run') console_finalize()