Skip to content

Commit

Permalink
Feature notebook artifacts (#1807)
Browse files Browse the repository at this point in the history
  • Loading branch information
vanpelt authored Feb 26, 2021
1 parent 0bc6864 commit 1f53a33
Show file tree
Hide file tree
Showing 27 changed files with 713 additions and 98 deletions.
73 changes: 59 additions & 14 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import psutil
import atexit
import wandb
import shutil
from wandb.util import mkdir_exists_ok
from six.moves import urllib

Expand Down Expand Up @@ -61,6 +62,7 @@

def test_cleanup(*args, **kwargs):
global server
print("Shutting down mock server")
server.terminate()
print("Open files during tests: ")
proc = psutil.Process()
Expand Down Expand Up @@ -198,7 +200,7 @@ def dummy_api_key():


@pytest.fixture
def test_settings(test_dir, mocker):
def test_settings(test_dir, mocker, live_mock_server):
""" Settings object for tests"""
# TODO: likely not the right thing to do, we shouldn't be setting this
wandb._IS_INTERNAL_PROCESS = False
Expand All @@ -207,10 +209,9 @@ def test_settings(test_dir, mocker):
wandb_dir = os.path.join(os.getcwd(), "wandb")
mkdir_exists_ok(wandb_dir)
# root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
# TODO: consider making a debugable directory that stays around...
settings = wandb.Settings(
_start_time=time.time(),
base_url="http://localhost",
base_url=live_mock_server.base_url,
root_dir=os.getcwd(),
save_code=True,
project="test",
Expand Down Expand Up @@ -313,38 +314,82 @@ def live_mock_server(request):


@pytest.fixture
def notebook(live_mock_server):
def notebook(live_mock_server, test_dir):
"""This launches a live server, configures a notebook to use it, and enables
devs to execute arbitrary cells. See tests/test_notebooks.py
TODO: we should launch a single server on boot and namespace requests by host"""
"""

@contextmanager
def notebook_loader(nb_path, kernel_name="wandb_python", **kwargs):
def notebook_loader(nb_path, kernel_name="wandb_python", save_code=True, **kwargs):
with open(utils.notebook_path("setup.ipynb")) as f:
setupnb = nbformat.read(f, as_version=4)
setupcell = setupnb["cells"][0]
# Ensure the notebooks talks to our mock server
new_source = setupcell["source"].replace(
"__WANDB_BASE_URL__", live_mock_server.base_url
"__WANDB_BASE_URL__", live_mock_server.base_url,
)
if save_code:
new_source = new_source.replace("__WANDB_NOTEBOOK_NAME__", nb_path)
else:
new_source = new_source.replace("__WANDB_NOTEBOOK_NAME__", "")
setupcell["source"] = new_source

with open(utils.notebook_path(nb_path)) as f:
nb_path = utils.notebook_path(nb_path)
shutil.copy(nb_path, os.path.join(os.getcwd(), os.path.basename(nb_path)))
with open(nb_path) as f:
nb = nbformat.read(f, as_version=4)
nb["cells"].insert(0, setupcell)

client = utils.WandbNotebookClient(nb)
with client.setup_kernel(**kwargs):
# Run setup commands for mocks
client.execute_cell(0, store_history=False)
yield client
try:
client = utils.WandbNotebookClient(nb, kernel_name=kernel_name)
with client.setup_kernel(**kwargs):
# Run setup commands for mocks
client.execute_cells(-1, store_history=False)
yield client
finally:
with open(os.path.join(os.getcwd(), "notebook.log"), "w") as f:
f.write(client.all_output_text())
wandb.termlog("Find debug logs at: %s" % os.getcwd())
wandb.termlog(client.all_output_text())

notebook_loader.base_url = live_mock_server.base_url

return notebook_loader


@pytest.fixture
def mocked_module(monkeypatch):
"""This allows us to mock modules loaded via wandb.util.get_module"""

def mock_get_module(module):
orig_get_module = wandb.util.get_module
mocked_module = MagicMock()

def get_module(mod):
if mod == module:
return mocked_module
else:
return orig_get_module(mod)

monkeypatch.setattr(wandb.util, "get_module", get_module)
return mocked_module

return mock_get_module


@pytest.fixture
def mocked_ipython(monkeypatch):
monkeypatch.setattr(
wandb.wandb_sdk.wandb_settings, "_get_python_type", lambda: "jupyter"
)
ipython = MagicMock()
# TODO: this is really unfortunate, for reasons not clear to me, monkeypatch doesn't work
orig_get_ipython = wandb.jupyter.get_ipython
wandb.jupyter.get_ipython = lambda: ipython
yield ipython
wandb.jupyter.get_ipython = orig_get_ipython


def default_wandb_args():
"""This allows us to parameterize the wandb_init_run fixture
The most general arg is "env", you can call:
Expand Down
57 changes: 57 additions & 0 deletions tests/notebooks/code_saving.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import wandb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wandb.init(project=\"code_save\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Running some code\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wandb.finish()"
]
}
]
}
4 changes: 2 additions & 2 deletions tests/notebooks/setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
"source": [
"\"\"\"This sets up our testing environment. Currently only the first cell of this notebook is run\"\"\"\n",
"import os\n",
"\n",
"os.environ[\"WANDB_API_KEY\"] = \"1824812581259009ca9981580f8f8a9012409eee\"\n",
"os.environ[\"WANDB_BASE_URL\"] = \"__WANDB_BASE_URL__\""
"os.environ[\"WANDB_BASE_URL\"] = \"__WANDB_BASE_URL__\"\n",
"os.environ[\"WANDB_NOTEBOOK_NAME\"] = \"__WANDB_NOTEBOOK_NAME__\""
]
}
],
Expand Down
1 change: 1 addition & 0 deletions tests/test_library_public.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def test_library_root():
"upsert_artifact",
"finish_artifact",
"use_artifact",
"log_code",
"alert",
"define_metric",
# "summary", # really this should be here
Expand Down
91 changes: 85 additions & 6 deletions tests/test_notebooks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import sys
import os
import platform
import pytest

import json
import sys
import wandb

pytestmark = pytest.mark.skipif(
sys.version_info < (3, 5) or platform.system() == "Windows",
Expand All @@ -11,16 +13,93 @@

def test_one_cell(notebook):
with notebook("one_cell.ipynb") as nb:
nb.execute_cell(cell_index=1)
output = nb.cell_output(1)
nb.execute_all()
output = nb.cell_output(0)
print(output)
assert "lovely-dawn-32" in output[-1]["data"]["text/html"]
# assert "Failed to query for notebook name" not in text


def test_magic(notebook):
with notebook("magic.ipynb") as nb:
nb.execute_cell(cell_index=[1, 2])
output = nb.cell_output(2)
nb.execute_all()
output = nb.cell_output(1)
print(output)
assert notebook.base_url in output[0]["data"]["text/html"]


def test_code_saving(notebook, live_mock_server):
# TODO: this is awfully slow, we should likely run these in parallel
with notebook("code_saving.ipynb") as nb:
nb.execute_all()
server_ctx = live_mock_server.get_ctx()
artifact_name = list(server_ctx["artifacts"].keys())[0]
# We run 3 cells after calling wandb.init
assert len(server_ctx["artifacts"][artifact_name]) == 3

with notebook("code_saving.ipynb", save_code=False) as nb:
nb.execute_all()
assert "Failed to detect the name of this notebook" in nb.all_output_text()

# Let's make sure we warn the user if they lie to us.
with notebook("code_saving.ipynb") as nb:
os.remove("code_saving.ipynb")
nb.execute_all()
assert "WANDB_NOTEBOOK_NAME should be a path" in nb.all_output_text()


def test_notebook_not_exists(mocked_ipython, live_mock_server, capsys, test_settings):
os.environ["WANDB_NOTEBOOK_NAME"] = "fake.ipynb"
wandb.init(settings=test_settings)
_, err = capsys.readouterr()
assert "WANDB_NOTEBOOK_NAME should be a path" in err
del os.environ["WANDB_NOTEBOOK_NAME"]


def test_notebook_metadata_jupyter(mocker, mocked_module, live_mock_server):
ipyconnect = mocker.patch("ipykernel.connect")
ipyconnect.get_connection_file.return_value = "kernel-12345.json"
serverapp = mocked_module("jupyter_server.serverapp")
serverapp.list_running_servers.return_value = [
{"url": live_mock_server.base_url, "notebook_dir": "/test"}
]
meta = wandb.jupyter.notebook_metadata(False)
assert meta == {"path": "test.ipynb", "root": "/test", "name": "test.ipynb"}


def test_notebook_metadata_no_servers(mocker, mocked_module):
ipyconnect = mocker.patch("ipykernel.connect")
ipyconnect.get_connection_file.return_value = "kernel-12345.json"
serverapp = mocked_module("jupyter_server.serverapp")
serverapp.list_running_servers.return_value = []
meta = wandb.jupyter.notebook_metadata(False)
assert meta == {}


def test_notebook_metadata_colab(mocked_module):
colab = mocked_module("google.colab")
colab._message.blocking_request.return_value = {
"ipynb": {"metadata": {"colab": {"name": "colab.ipynb"}}}
}
meta = wandb.jupyter.notebook_metadata(False)
assert meta == {
"root": "/content",
"path": "colab.ipynb",
"name": "colab.ipynb",
}


def test_notebook_metadata_kaggle(mocker, mocked_module):
os.environ["KAGGLE_KERNEL_RUN_TYPE"] = "test"
kaggle = mocked_module("kaggle_session")
kaggle_client = mocker.MagicMock()
kaggle_client.get_exportable_ipynb.return_value = {
"source": json.dumps({"metadata": {}, "cells": []})
}
kaggle.UserSessionClient.return_value = kaggle_client
meta = wandb.jupyter.notebook_metadata(False)
assert meta == {
"root": "/kaggle/working",
"path": "kaggle.ipynb",
"name": "kaggle.ipynb",
}
26 changes: 23 additions & 3 deletions tests/utils/mock_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,12 @@ def run(ctx):
}


def artifact(ctx, collection_name="mnist"):
def artifact(ctx, collection_name="mnist", state="COMMITTED"):
return {
"id": ctx["page_count"],
"digest": "abc123",
"description": "",
"state": "COMMITTED",
"state": state,
"size": 10000,
"createdAt": datetime.now().isoformat(),
"updatedAt": datetime.now().isoformat(),
Expand Down Expand Up @@ -525,6 +525,11 @@ def graphql():
return json.dumps({"data": {"prepareFiles": {"files": {"edges": nodes}}}})
if "mutation CreateArtifact(" in body["query"]:
collection_name = body["variables"]["artifactCollectionNames"][0]
ctx["artifacts"] = ctx.get("artifacts", {})
ctx["artifacts"][collection_name] = ctx["artifacts"].get(
collection_name, []
)
ctx["artifacts"][collection_name].append(body["variables"])
return {
"data": {"createArtifact": {"artifact": artifact(ctx, collection_name)}}
}
Expand Down Expand Up @@ -603,7 +608,11 @@ def graphql():
}
if "query Artifact(" in body["query"]:
art = artifact(ctx)
art["artifactType"] = {"id": 1, "name": "dataset"}
# code artifacts use source-RUNID names, we return the code type
if "source" in body["variables"]["name"]:
art["artifactType"] = {"id": 2, "name": "code"}
else:
art["artifactType"] = {"id": 1, "name": "dataset"}
return {"data": {"project": {"artifact": art}}}
if "query ArtifactManifest(" in body["query"]:
art = artifact(ctx)
Expand Down Expand Up @@ -778,6 +787,17 @@ def k8s_pod():
else:
return b"", 500

@app.route("/api/sessions")
def jupyter_sessions():
return json.dumps(
[
{
"kernel": {"id": "12345"},
"notebook": {"path": "test.ipynb", "name": "test.ipynb"},
}
]
)

@app.route("/pypi/<library>/json")
def pypi(library):
version = getattr(wandb, "__hack_pypi_latest_version__", wandb.__version__)
Expand Down
Loading

0 comments on commit 1f53a33

Please sign in to comment.