Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add mlflow experiment tracker #450

Merged
merged 55 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
d0567f4
Add write ml-flow placeholder class
debrevitatevitae Jun 7, 2024
9aa979d
some prototype
dominikandreasseitz Jun 11, 2024
e43bd92
mlconfig stuff
dominikandreasseitz Jun 11, 2024
8d84ecd
correct tracking
dominikandreasseitz Jun 13, 2024
3765702
working test
dominikandreasseitz Jun 13, 2024
6552b98
add mark
dominikandreasseitz Jun 13, 2024
ebda5ba
Working version of mlflow logging + demonstration
dominikandreasseitz Jun 13, 2024
5198b49
Ignore mlflow runs
debrevitatevitae Jun 19, 2024
e5caf08
Add hyperparameter logging with mlflow
debrevitatevitae Jun 19, 2024
d12bc24
Add figures logging with callback
debrevitatevitae Jun 20, 2024
88195ab
Add details to MLFlowConfig class
smitchaudhary Jul 4, 2024
72f1098
Sync tracking stuff in train_no_grad
debrevitatevitae Jul 9, 2024
936021e
Fix current ml tracking tests
debrevitatevitae Jul 10, 2024
8d630cc
Add hparams filtering for TB
debrevitatevitae Jul 10, 2024
eaab95a
Test all current on mlflow too
debrevitatevitae Jul 10, 2024
d9d8b48
Correct experiment creation/setup
debrevitatevitae Jul 10, 2024
0211ef8
Remove os.environ (vars do not persist bc of imorts)
debrevitatevitae Jul 10, 2024
d126505
Ignore artifacts
debrevitatevitae Jul 11, 2024
039ac8f
Move MLFlowConfig to TrainConfig
debrevitatevitae Jul 12, 2024
1e560a0
De-dataclass MLConfig
debrevitatevitae Jul 12, 2024
0ba4c2c
Lowercase variables in MLFlowConfig
debrevitatevitae Jul 12, 2024
d412965
Adjust tests and introduce test_logging
debrevitatevitae Jul 12, 2024
714939a
Add mlflow final model logging
debrevitatevitae Jul 15, 2024
64fd801
Add warning for tb model logger
debrevitatevitae Jul 15, 2024
99f42be
Add plot_tensorboard
debrevitatevitae Jul 15, 2024
8bbb293
Correct log_model default and test_hp_log_mlflow
debrevitatevitae Jul 15, 2024
4dcfcda
Assign uuid to default mlflow run id
debrevitatevitae Jul 15, 2024
cabf464
Test model logging with tb/mlflow
debrevitatevitae Jul 15, 2024
99e49ee
Refactor input data checking in log_model_mlflow
debrevitatevitae Jul 15, 2024
4a12155
Refactor mlflow tests for clarity
debrevitatevitae Jul 16, 2024
8691327
Refactor tests and test adjoint model log
debrevitatevitae Jul 16, 2024
33f857c
Add plotting test and refactor typing
debrevitatevitae Jul 16, 2024
13a5ea1
Test to see if img files are produced
debrevitatevitae Jul 16, 2024
3d26ab7
Remove user/pwd from MLConfig
debrevitatevitae Jul 16, 2024
21245b6
Add mlflow tutorial in docs
debrevitatevitae Jul 17, 2024
3d05557
Merge branch 'main' into gt/ml-flow
debrevitatevitae Jul 17, 2024
8e7fbae
Fix mlflow dependency in pyproject
debrevitatevitae Jul 17, 2024
5b7ddf0
Add author
debrevitatevitae Jul 17, 2024
89ea8a1
Fix tests
debrevitatevitae Jul 17, 2024
a0ace68
Bump version
debrevitatevitae Jul 17, 2024
a13cbd4
Fix docs
debrevitatevitae Jul 17, 2024
f95bf19
Fix TrainConfig docstring
debrevitatevitae Jul 17, 2024
14373c7
Correct typing in TrainConfig
debrevitatevitae Jul 17, 2024
b018e0e
Correct typo in docs/tutorials/qml/ml_tools.md
debrevitatevitae Jul 17, 2024
243366c
Clarify mlflow docs
debrevitatevitae Jul 17, 2024
d2a917b
Lexical improvement in ml_tools.md
debrevitatevitae Jul 17, 2024
fee4626
Fix typing of InputData type
debrevitatevitae Jul 17, 2024
aa98dfc
Fix typing in log_model_mlflow
debrevitatevitae Jul 17, 2024
7df3c35
Fix typing with tracker mappings
debrevitatevitae Jul 17, 2024
fdf05f1
Merge remote-tracking branch 'refs/remotes/origin/gt/ml-flow' into gt…
debrevitatevitae Jul 17, 2024
249dccb
Minor reformatting
debrevitatevitae Jul 17, 2024
eb9a98c
Turn args of trackers into variadic
debrevitatevitae Jul 17, 2024
a0706bb
Merge branch 'main' into gt/ml-flow
smitchaudhary Jul 19, 2024
aa9d9a2
remove stray comments
smitchaudhary Jul 19, 2024
e91dfd7
Merge branch 'main' into gt/ml-flow
smitchaudhary Jul 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,6 @@ events.out.tfevents.*
*.dvi

*.gv

# mlflow
mlruns/
57 changes: 57 additions & 0 deletions docs/tutorials/qml/ml_tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,60 @@ def train(

return model, optimizer
```

## MLTools with mlflow

MLTools now offers MLflow support.


```python
from __future__ import annotations

import os
from itertools import count
import torch
from torch.utils.data import DataLoader

from qadence.ml_tools import (
TrainConfig,
train_with_grad,

)
from qadence.ml_tools.data import to_dataloader
from qadence.ml_tools.utils import rand_featureparameters
from qadence.models import QNN, QuantumModel
from qadence.types import ExperimentTrackingTool
from qadence import QuantumCircuit, hea, Z

os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'
os.environ['MLFLOW_EXPERIMENT'] = 'mlflow_demonstration'
os.environ['MLFLOW_RUN_NAME'] = 'test_0'

# in case you want to track remotely
#os.environ['MLFLOW_TRACKING_USERNAME'] =
#s.environ['MLFLOW_TRACKING_PASSWORD'] =
def dataloader(batch_size: int = 25) -> DataLoader:
x = torch.linspace(0, 1, batch_size).reshape(-1, 1)
y = torch.cos(x)
return to_dataloader(x, y, batch_size=batch_size, infinite=True)
data = dataloader()
model = QNN(QuantumCircuit(2, hea(2,1)), observable=Z(0))
cnt = count()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
inputs = rand_featureparameters(model, 1)

def loss_fn(model: QuantumModel, data: torch.Tensor) -> tuple[torch.Tensor, dict]:
next(cnt)
out = model.expectation(inputs)
loss = criterion(out, torch.rand(1))
return loss, {}

config = TrainConfig(
folder='mlflow_demonstration', max_iter=10, checkpoint_every=1, write_every=1, tracking_tool=ExperimentTrackingTool.MLFLOW
)
train_with_grad(model, data, optimizer, config, loss_fn=loss_fn)

os.system('mlflow ui --port 5000')
os.system('mlflow ui --backend-store-uri sqlite:///mlflow.db')
```
93 changes: 93 additions & 0 deletions docs/tutorials/qml/mlflow_demonstration.py
debrevitatevitae marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import os
import random
from itertools import count

import numpy as np
import torch
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
from torch.utils.data import DataLoader

from qadence import QuantumCircuit, Z, hea
from qadence.constructors import feature_map, hamiltonian_factory
from qadence.ml_tools import TrainConfig, train_with_grad
from qadence.ml_tools.data import to_dataloader
from qadence.ml_tools.utils import rand_featureparameters
from qadence.models import QNN, QuantumModel
from qadence.types import ExperimentTrackingTool

os.environ["MLFLOW_TRACKING_URI"] = "sqlite:///mlflow.db"
os.environ["MLFLOW_EXPERIMENT"] = "mlflow_demonstration"
os.environ["MLFLOW_RUN_NAME"] = "test_0"

hyperparams = {
"seed": 42,
"batch_size": 10,
"n_qubits": 2,
"ansatz_depth": 1,
"observable": Z,
}

np.random.seed(hyperparams["seed"])
torch.manual_seed(hyperparams["seed"])
random.seed(hyperparams["seed"])


# in case you want to track remotely
# os.environ['MLFLOW_TRACKING_USERNAME'] =
# s.environ['MLFLOW_TRACKING_PASSWORD'] =
def dataloader(batch_size: int = 25) -> DataLoader:
x = torch.linspace(0, 1, batch_size).reshape(-1, 1)
y = torch.cos(x)
return to_dataloader(x, y, batch_size=batch_size, infinite=True)


obs = hamiltonian_factory(register=hyperparams["n_qubits"], detuning=hyperparams["observable"])

data = dataloader(hyperparams["batch_size"])
fm = feature_map(hyperparams["n_qubits"], param="x")
model = QNN(
QuantumCircuit(
hyperparams["n_qubits"], fm, hea(hyperparams["n_qubits"], hyperparams["ansatz_depth"])
),
observable=obs,
inputs=["x"],
)
cnt = count()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
inputs = rand_featureparameters(model, 1)


def loss_fn(model: QuantumModel, data: torch.Tensor) -> tuple[torch.Tensor, dict]:
next(cnt)
out = model.expectation(inputs)
loss = criterion(out, torch.rand(1))
return loss, {}


def plot_fn(model: QuantumModel, iteration: int) -> tuple[str, Figure]:
descr = f"ufa_prediction_epoch_{iteration}.png"
fig, ax = plt.subplots()
x = torch.linspace(0, 1, 100).reshape(-1, 1)
out = model.expectation(x)
ax.plot(x.detach().numpy(), out.detach().numpy())
return descr, fig


config = TrainConfig(
folder="mlflow_demonstration",
max_iter=10,
checkpoint_every=1,
plot_every=2,
write_every=1,
tracking_tool=ExperimentTrackingTool.MLFLOW,
hyperparams=hyperparams,
plotting_functions=(plot_fn,),
)
train_with_grad(model, data, optimizer, config, loss_fn=loss_fn)

os.system("mlflow ui --port 5000")
os.system("mlflow ui --backend-store-uri sqlite:///mlflow.db")
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ horqrux = [
protocols = ["qadence-protocols"]
libs = ["qadence-libs"]
dlprof = ["nvidia-pyindex", "nvidia-dlprof[pytorch]"]
mlflow = ["mlflow"]
all = [
"pulser-core==0.18.0",
"pulser-simulation==0.18.0",
Expand Down
54 changes: 54 additions & 0 deletions qadence/ml_tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,17 @@
import datetime
import os
from dataclasses import dataclass
from logging import getLogger
from pathlib import Path
from typing import Callable, Optional
from uuid import uuid4

from matplotlib.figure import Figure
from torch.nn import Module

from qadence.types import ExperimentTrackingTool

logger = getLogger(__name__)


@dataclass
Expand All @@ -29,6 +38,11 @@ class TrainConfig:
"""Write tensorboard logs."""
checkpoint_every: int = 5000
"""Write model/optimizer checkpoint."""
plot_every: Optional[int] = None
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
"""Write figures.

NOTE: currently only works with mlflow.
"""
folder: Optional[Path] = None
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
"""Checkpoint/tensorboard logs folder."""
create_subfolder_per_run: bool = False
Expand All @@ -46,6 +60,14 @@ class TrainConfig:
"""The batch_size to use when passing a list/tuple of torch.Tensors."""
verbose: bool = True
"""Whether or not to print out metrics values during training."""
tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD
"""The tracking tool of choice."""
hyperparams: Optional[dict] = None
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
"""Hyperparameters to track."""
plotting_functions: Optional[tuple[Callable[[Module, int], tuple[str, Figure]]]] = None
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
"""Functions for in-train plotting."""

# mlflow_callbacks: list[Callable] = [write_mlflow_figure(), write_x()]

def __post_init__(self) -> None:
if self.folder:
Expand All @@ -60,3 +82,35 @@ def __post_init__(self) -> None:
self.trainstop_criterion = lambda x: x <= self.max_iter
if self.validation_criterion is None:
self.validation_criterion = lambda x: False
if self.plot_every and self.tracking_tool != ExperimentTrackingTool.MLFLOW:
raise NotImplementedError("In-training plots are only available with mlflow tracking.")
if self.plot_every and self.plotting_functions is None:
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
logger.warning("Plots tracking is required, but no plotting functions are provided.")


@dataclass
class MLFlowConfig:
"""
Example:
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved

export MLFLOW_TRACKING_URI=tracking_uri
export MLFLOW_TRACKING_USERNAME=username
export MLFLOW_TRACKING_PASSWORD=password
"""

MLFLOW_TRACKING_URI: str = os.getenv("MLFLOW_TRACKING_URI", "")
MLFLOW_TRACKING_USERNAME: str = os.getenv("MLFLOW_TRACKING_USERNAME", "")
MLFLOW_TRACKING_PASSWORD: str = os.getenv("MLFLOW_TRACKING_PASSWORD", "")
EXPERIMENT: str = os.getenv("MLFLOW_EXPERIMENT", str(uuid4()))
RUN_NAME: str = os.getenv("MLFLOW_RUN_NAME", "test_0")

def __post_init__(self) -> None:
import mlflow

if self.MLFLOW_TRACKING_USERNAME != "":
logger.info(
f"Intialized mlflow remote logging for user {self.MLFLOW_TRACKING_USERNAME}."
)
mlflow.set_tracking_uri(self.MLFLOW_TRACKING_URI)
smitchaudhary marked this conversation as resolved.
Show resolved Hide resolved
mlflow.set_experiment(self.EXPERIMENT)
mlflow.start_run(run_name=self.RUN_NAME, nested=False)
68 changes: 67 additions & 1 deletion qadence/ml_tools/printing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from __future__ import annotations

from typing import Any, Callable

from matplotlib.figure import Figure
from torch.nn import Module
from torch.utils.tensorboard import SummaryWriter

from qadence.types import ExperimentTrackingTool


def print_metrics(loss: float | None, metrics: dict, iteration: int) -> None:
msg = " ".join(
Expand All @@ -19,5 +25,65 @@ def write_tensorboard(
writer.add_scalar(key, arg, iteration)


def log_hyperparams(writer: SummaryWriter, hyperparams: dict, metrics: dict) -> None:
def log_hyperparams_tensorboard(writer: SummaryWriter, hyperparams: dict, metrics: dict) -> None:
writer.add_hparams(hyperparams, metrics)


def plot_tensorboard(
writer: SummaryWriter, iteration: int, plotting_functions: tuple[Callable]
) -> None:
raise NotImplementedError("Plot logging with tensorboard is not implemented")


def write_mlflow(writer: Any, loss: float | None, metrics: dict, iteration: int) -> None:
writer.log_metrics({"loss": float(loss)}, step=iteration) # type: ignore
writer.log_metrics(metrics, step=iteration) # logs the single metrics


def log_hyperparams_mlflow(writer: Any, hyperparams: dict, metrics: dict) -> None:
writer.log_params(hyperparams) # type: ignore


def plot_mlflow(
writer: SummaryWriter,
model: Module,
iteration: int,
plotting_functions: tuple[Callable[[Module, int], tuple[str, Figure]]],
) -> None:
for pf in plotting_functions:
descr, fig = pf(model, iteration)
writer.log_figure(fig, descr)


TRACKER_MAPPING = {
ExperimentTrackingTool.TENSORBOARD: write_tensorboard,
ExperimentTrackingTool.MLFLOW: write_mlflow,
}

LOGGER_MAPPING = {
ExperimentTrackingTool.TENSORBOARD: log_hyperparams_tensorboard,
ExperimentTrackingTool.MLFLOW: log_hyperparams_mlflow,
}

PLOTTER_MAPPING = {
ExperimentTrackingTool.TENSORBOARD: plot_tensorboard,
ExperimentTrackingTool.MLFLOW: plot_mlflow,
}


def write_tracker(
args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD
debrevitatevitae marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
debrevitatevitae marked this conversation as resolved.
Show resolved Hide resolved
return TRACKER_MAPPING[tracking_tool](*args)


def log_tracker(
args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD
) -> None:
return LOGGER_MAPPING[tracking_tool](*args)


def plot_tracker(
args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD
) -> None:
return PLOTTER_MAPPING[tracking_tool](*args) # type: ignore
Loading