diff --git a/CHANGELOG.md b/CHANGELOG.md index f8694b6..077b9cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0. ## [Unreleased] +### Changed +- Upgrade Azure ML SDK from V1 to V2 before [V1 deperecation](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-migrate-from-v1) ([#73](https://github.com/microsoft/molecule-generation/pull/73)) + ## [0.4.1] - 2024-01-03 ### Changed diff --git a/environment-py37.yml b/environment-py37.yml index 00161b4..4aefb9a 100644 --- a/environment-py37.yml +++ b/environment-py37.yml @@ -8,4 +8,6 @@ dependencies: - rdkit==2020.09.1.0 - tensorflow==2.1.0 - pip: - - numpy==1.19.2 + - numpy==1.19.2 + - mlflow==1.30.1 + - azureml-mlflow==1.55.0 diff --git a/environment-py38.yml b/environment-py38.yml index 4ffa715..dd6eb8e 100644 --- a/environment-py38.yml +++ b/environment-py38.yml @@ -9,3 +9,5 @@ dependencies: - tensorflow==2.6.2 - pip: - numpy==1.22.4 + - mlflow==2.17.2 + - azureml-mlflow==1.60.0 diff --git a/environment-py39.yml b/environment-py39.yml index 4a9979d..1370097 100644 --- a/environment-py39.yml +++ b/environment-py39.yml @@ -9,3 +9,5 @@ dependencies: - tensorflow==2.9.1 - pip: - numpy==1.24.3 + - mlflow==3.1.4 + - azureml-mlflow==1.60.0 diff --git a/environment.yml b/environment.yml index 6dd760e..73474aa 100644 --- a/environment.yml +++ b/environment.yml @@ -9,3 +9,5 @@ dependencies: - tensorflow<2.10 - pip: - numpy + - mlflow + - azureml-mlflow diff --git a/molecule_generation/cli/train.py b/molecule_generation/cli/train.py index 794fbcc..8f432c4 100644 --- a/molecule_generation/cli/train.py +++ b/molecule_generation/cli/train.py @@ -7,6 +7,7 @@ import time from typing import Dict, Any, Callable, Tuple, Union +import mlflow import numpy as np import tensorflow as tf import tf2_gnn.cli_utils as cli @@ -163,13 +164,6 @@ def log(msg) -> None: log(f"Dataset parameters: {json.dumps(training_utils.unwrap_tf_tracked_data(dataset._params))}") log(f"Model parameters: {json.dumps(training_utils.unwrap_tf_tracked_data(model._params))}") - if args.azureml_logging: - from azureml.core.run import Run - - aml_run = Run.get_context() - else: - aml_run = None - # Set up tensorboard logging. if args.tensorboard or args.profile: writer = tf.summary.create_file_writer(os.path.join(args.save_dir, "tensorboard")) @@ -185,7 +179,7 @@ def log(msg) -> None: patience=args.patience, save_dir=args.save_dir, quiet=args.quiet, - aml_run=aml_run, + should_log_aml_run=args.azureml_logging, # argument indicating the need of logging azureml runs profile=args.profile, ) @@ -208,12 +202,12 @@ def log(msg) -> None: try: with dataset.get_context_managed_tf_dataset(training_utils.DataFold.TEST) as test_data: _, _, test_results = model.run_on_data_iterator( - iter(test_data.tf_dataset), training=False, quiet=args.quiet, aml_run=aml_run + iter(test_data.tf_dataset), training=False, quiet=args.quiet, should_log_aml_run=args.azureml_logging ) test_metric, test_metric_string = model.compute_epoch_metrics(test_results) log(test_metric_string) - if aml_run is not None: - aml_run.log("task_test_metric", float(test_metric)) + if args.azureml_logging is not None: + mlflow.log_metric("task_test_metric", float(test_metric)) finally: dataset._params["trace_element_keep_prob"] = orig_keep_prob dataset._params["trace_element_non_carbon_keep_prob"] = orig_non_carbon_keep_prob @@ -230,7 +224,7 @@ def train( patience: int, save_dir: str, quiet: bool = False, - aml_run=None, + should_log_aml_run=None, profile: bool = False, ): save_file = os.path.join(save_dir, f"{run_id}_best.pkl") @@ -254,7 +248,7 @@ def train( training=False, quiet=quiet, max_num_steps=num_valid_steps, - aml_run=aml_run, + should_log_aml_run=should_log_aml_run, ) best_valid_metric, best_val_str = model.compute_epoch_metrics(initial_valid_results) log_fun(f"Initial valid metric: {best_val_str}.") @@ -276,7 +270,7 @@ def train( training=True, quiet=quiet, max_num_steps=num_train_steps_between_valid, - aml_run=aml_run, + should_log_aml_run=should_log_aml_run, ) if profile and epoch == 2: @@ -294,7 +288,7 @@ def train( training=False, quiet=quiet, max_num_steps=num_valid_steps, - aml_run=aml_run, + should_log_aml_run=should_log_aml_run, ) tf.summary.scalar("valid_loss", data=valid_loss, step=epoch) @@ -303,11 +297,11 @@ def train( f" Valid: {valid_loss:.4f} loss | {valid_metric_string} | {valid_speed:.2f} graphs/s", ) - if aml_run is not None: - aml_run.log("task_train_metric", float(train_metric)) - aml_run.log("train_speed", float(train_speed)) - aml_run.log("task_valid_metric", float(valid_metric)) - aml_run.log("valid_speed", float(valid_speed)) + if should_log_aml_run is not None: + mlflow.log_metric("task_train_metric", float(train_metric)) + mlflow.log_metric("train_speed", float(train_speed)) + mlflow.log_metric("task_valid_metric", float(valid_metric)) + mlflow.log_metric("valid_speed", float(valid_speed)) # Save if good enough. if valid_metric < best_valid_metric: diff --git a/molecule_generation/models/cgvae.py b/molecule_generation/models/cgvae.py index e75b251..089c341 100644 --- a/molecule_generation/models/cgvae.py +++ b/molecule_generation/models/cgvae.py @@ -629,12 +629,12 @@ def run_on_data_iterator( quiet: bool = False, training: bool = True, max_num_steps: Optional[int] = None, # Run until dataset ends if None - aml_run: Optional = None, + should_log_aml_run: Optional[bool] = None, ) -> Tuple[float, float, List[Any]]: with EpochMetricsLogger( window_size=self._logged_loss_smoothing_window_size, quiet=quiet, - aml_run=aml_run, + should_log_aml_run=should_log_aml_run, training=training, ) as metrics_logger: for step, (batch_features, batch_labels) in enumerate(data_iterator): diff --git a/molecule_generation/models/moler_base_model.py b/molecule_generation/models/moler_base_model.py index 6f13954..a17b291 100644 --- a/molecule_generation/models/moler_base_model.py +++ b/molecule_generation/models/moler_base_model.py @@ -229,12 +229,12 @@ def run_on_data_iterator( quiet: bool = False, training: bool = True, max_num_steps: Optional[int] = None, # Run until dataset ends if None - aml_run: Optional[Any] = None, + should_log_aml_run: Optional[bool] = None, ) -> Tuple[float, float, List[Any]]: with EpochMetricsLogger( window_size=self._logged_loss_smoothing_window_size, quiet=quiet, - aml_run=aml_run, + should_log_aml_run=should_log_aml_run, training=training, ) as metrics_logger: for step, (batch_features, batch_labels) in enumerate(data_iterator): diff --git a/molecule_generation/utils/epoch_metrics_logger.py b/molecule_generation/utils/epoch_metrics_logger.py index 054ece4..a971ff5 100644 --- a/molecule_generation/utils/epoch_metrics_logger.py +++ b/molecule_generation/utils/epoch_metrics_logger.py @@ -2,6 +2,7 @@ from collections import defaultdict, deque import time +from mlflow import tensorflow as tf import numpy as np @@ -10,11 +11,11 @@ class EpochMetricsLogger: """Logs metrics for an epoch of training""" def __init__( - self, *, window_size: int = 100, quiet: bool, aml_run: Optional, training: bool + self, *, window_size: int = 100, quiet: bool, should_log_aml_run: Optional[bool], training: bool ) -> None: self._window_size = window_size self._quiet = quiet - self._aml_run = aml_run + self._should_log_aml_run = should_log_aml_run self._training = training # Initialise everything in case you don't want to use this as a contextmanager @@ -53,9 +54,9 @@ def log_step_metrics(self, task_metrics, batch_features): ) if self._step >= self._window_size and self._step % self._window_size == 0: self._moving_average_metrics = self._get_moving_average_metrics() - if self._aml_run is not None: + if self._should_log_aml_run is not None: for k, v in self._moving_average_metrics.items(): - self._aml_run.log("smoothed_" + k, float(v)) + mlflow.log_metric("smoothed_" + k, float(v)) # Tensorboard logging: batch_graph_average_loss = task_metrics["loss"] / float( diff --git a/molecule_generation/utils/property_models.py b/molecule_generation/utils/property_models.py index 92747d3..561fc92 100644 --- a/molecule_generation/utils/property_models.py +++ b/molecule_generation/utils/property_models.py @@ -3,6 +3,7 @@ from abc import abstractmethod from typing import Any, Dict, List, Tuple, Callable, Optional +import mlflow import numpy as np import tensorflow as tf import sklearn.metrics as metrics @@ -172,7 +173,7 @@ def print_evaluation_report( @staticmethod def log_evaluation_report( - prop_name: str, predictions, labels, aml_run=None, log_fun: Callable[[str], None] = print + prop_name: str, predictions, labels, should_log_aml_run=None, log_fun: Callable[[str], None] = print ) -> None: mae = metrics.mean_absolute_error(y_true=labels, y_pred=predictions) mse = metrics.mean_squared_error(y_true=labels, y_pred=predictions) @@ -187,15 +188,12 @@ def log_evaluation_report( log_fun(f" Explained Variance: {expl_var:.3f}") log_fun(f" R2 Score: {r2_score:.3f}") - if aml_run: - aml_run.log_row( - f"{prop_name}_test_metrics", - mean_abs_err=float(mae), - mse=float(mse), - max_err=float(max_err), - explained_variance=float(expl_var), - r2_score=float(r2_score), - ) + if should_log_aml_run: + mlflow.log_metric(f"{prop_name}_test_metrics", mean_abs_err=float(mae)) + mlflow.log_metric(f"{prop_name}_test_metrics", mse=float(mse)) + mlflow.log_metric(f"{prop_name}_test_metrics", max_err=float(max_err)) + mlflow.log_metric(f"{prop_name}_test_metrics", explained_variance=float(expl_var)) + mlflow.log_metric(f"{prop_name}_test_metrics", r2_score=float(r2_score)) class MLPBinaryClassifierLayer(MLPRegressionLayer): @@ -255,7 +253,7 @@ def print_evaluation_report( @staticmethod def log_evaluation_report( - prop_name: str, predictions, labels, aml_run=None, log_fun: Callable[[str], None] = print + prop_name: str, predictions, labels, should_log_aml_run=None, log_fun: Callable[[str], None] = print ) -> None: rounded_predictions = np.round(predictions) acc = metrics.accuracy_score(y_true=labels, y_pred=rounded_predictions) @@ -273,13 +271,10 @@ def log_evaluation_report( log_fun(f" F1 Score: {f1_score:.4f}") log_fun(f" ROC AUC: {roc_auc:.4f}") - if aml_run: - aml_run.log_row( - f"{prop_name}_test_metrics", - accuracy=float(acc), - balanced_accuracy=float(balanced_acc), - precision=float(precision), - recall=float(recall), - fl_score=float(f1_score), - roc_auc_score=float(roc_auc), - ) + if should_log_aml_run: + mlflow.log_metric(f"{prop_name}_test_metrics", accuracy=float(acc)) + mlflow.log_metric(f"{prop_name}_test_metrics", balanced_accuracy=float(balanced_acc)) + mlflow.log_metric(f"{prop_name}_test_metrics", precision=float(precision)) + mlflow.log_metric(f"{prop_name}_test_metrics", recall=float(recall)) + mlflow.log_metric(f"{prop_name}_test_metrics", fl_score=float(f1_score)) + mlflow.log_metric(f"{prop_name}_test_metrics", roc_auc_score=float(roc_auc))