Feature notebook artifacts (#1807)

wandb · Feb 26, 2021 · 1f53a33 · 1f53a33
1 parent 0bc6864
commit 1f53a33
Show file tree

Hide file tree

Showing 27 changed files with 713 additions and 98 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -23,6 +23,7 @@
 import psutil
 import atexit
 import wandb
+import shutil
 from wandb.util import mkdir_exists_ok
 from six.moves import urllib
 
@@ -61,6 +62,7 @@
 
 def test_cleanup(*args, **kwargs):
     global server
+    print("Shutting down mock server")
     server.terminate()
     print("Open files during tests: ")
     proc = psutil.Process()
@@ -198,7 +200,7 @@ def dummy_api_key():
 
 
 @pytest.fixture
-def test_settings(test_dir, mocker):
+def test_settings(test_dir, mocker, live_mock_server):
     """ Settings object for tests"""
     #  TODO: likely not the right thing to do, we shouldn't be setting this
     wandb._IS_INTERNAL_PROCESS = False
@@ -207,10 +209,9 @@ def test_settings(test_dir, mocker):
     wandb_dir = os.path.join(os.getcwd(), "wandb")
     mkdir_exists_ok(wandb_dir)
     # root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    # TODO: consider making a debugable directory that stays around...
     settings = wandb.Settings(
         _start_time=time.time(),
-        base_url="http://localhost",
+        base_url=live_mock_server.base_url,
         root_dir=os.getcwd(),
         save_code=True,
         project="test",
@@ -313,38 +314,82 @@ def live_mock_server(request):
 
 
 @pytest.fixture
-def notebook(live_mock_server):
+def notebook(live_mock_server, test_dir):
     """This launches a live server, configures a notebook to use it, and enables
     devs to execute arbitrary cells.  See tests/test_notebooks.py
-
-    TODO: we should launch a single server on boot and namespace requests by host"""
+    """
 
     @contextmanager
-    def notebook_loader(nb_path, kernel_name="wandb_python", **kwargs):
+    def notebook_loader(nb_path, kernel_name="wandb_python", save_code=True, **kwargs):
         with open(utils.notebook_path("setup.ipynb")) as f:
             setupnb = nbformat.read(f, as_version=4)
             setupcell = setupnb["cells"][0]
             # Ensure the notebooks talks to our mock server
             new_source = setupcell["source"].replace(
-                "__WANDB_BASE_URL__", live_mock_server.base_url
+                "__WANDB_BASE_URL__", live_mock_server.base_url,
             )
+            if save_code:
+                new_source = new_source.replace("__WANDB_NOTEBOOK_NAME__", nb_path)
+            else:
+                new_source = new_source.replace("__WANDB_NOTEBOOK_NAME__", "")
             setupcell["source"] = new_source
 
-        with open(utils.notebook_path(nb_path)) as f:
+        nb_path = utils.notebook_path(nb_path)
+        shutil.copy(nb_path, os.path.join(os.getcwd(), os.path.basename(nb_path)))
+        with open(nb_path) as f:
             nb = nbformat.read(f, as_version=4)
         nb["cells"].insert(0, setupcell)
 
-        client = utils.WandbNotebookClient(nb)
-        with client.setup_kernel(**kwargs):
-            # Run setup commands for mocks
-            client.execute_cell(0, store_history=False)
-            yield client
+        try:
+            client = utils.WandbNotebookClient(nb, kernel_name=kernel_name)
+            with client.setup_kernel(**kwargs):
+                # Run setup commands for mocks
+                client.execute_cells(-1, store_history=False)
+                yield client
+        finally:
+            with open(os.path.join(os.getcwd(), "notebook.log"), "w") as f:
+                f.write(client.all_output_text())
+            wandb.termlog("Find debug logs at: %s" % os.getcwd())
+            wandb.termlog(client.all_output_text())
 
     notebook_loader.base_url = live_mock_server.base_url
 
     return notebook_loader
 
 
+@pytest.fixture
+def mocked_module(monkeypatch):
+    """This allows us to mock modules loaded via wandb.util.get_module"""
+
+    def mock_get_module(module):
+        orig_get_module = wandb.util.get_module
+        mocked_module = MagicMock()
+
+        def get_module(mod):
+            if mod == module:
+                return mocked_module
+            else:
+                return orig_get_module(mod)
+
+        monkeypatch.setattr(wandb.util, "get_module", get_module)
+        return mocked_module
+
+    return mock_get_module
+
+
+@pytest.fixture
+def mocked_ipython(monkeypatch):
+    monkeypatch.setattr(
+        wandb.wandb_sdk.wandb_settings, "_get_python_type", lambda: "jupyter"
+    )
+    ipython = MagicMock()
+    # TODO: this is really unfortunate, for reasons not clear to me, monkeypatch doesn't work
+    orig_get_ipython = wandb.jupyter.get_ipython
+    wandb.jupyter.get_ipython = lambda: ipython
+    yield ipython
+    wandb.jupyter.get_ipython = orig_get_ipython
+
+
 def default_wandb_args():
     """This allows us to parameterize the wandb_init_run fixture
     The most general arg is "env", you can call:

diff --git a/tests/notebooks/code_saving.ipynb b/tests/notebooks/code_saving.ipynb
@@ -0,0 +1,57 @@
+{
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": 3
+  },
+  "orig_nbformat": 2
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import wandb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.init(project=\"code_save\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Running some code\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.finish()"
+   ]
+  }
+ ]
+}
diff --git a/tests/notebooks/setup.ipynb b/tests/notebooks/setup.ipynb
@@ -8,9 +8,9 @@
    "source": [
     "\"\"\"This sets up our testing environment.  Currently only the first cell of this notebook is run\"\"\"\n",
     "import os\n",
-    "\n",
     "os.environ[\"WANDB_API_KEY\"] = \"1824812581259009ca9981580f8f8a9012409eee\"\n",
-    "os.environ[\"WANDB_BASE_URL\"] = \"__WANDB_BASE_URL__\""
+    "os.environ[\"WANDB_BASE_URL\"] = \"__WANDB_BASE_URL__\"\n",
+    "os.environ[\"WANDB_NOTEBOOK_NAME\"] = \"__WANDB_NOTEBOOK_NAME__\""
    ]
   }
  ],

diff --git a/tests/test_library_public.py b/tests/test_library_public.py
@@ -158,6 +158,7 @@ def test_library_root():
     "upsert_artifact",
     "finish_artifact",
     "use_artifact",
+    "log_code",
     "alert",
     "define_metric",
     # "summary",   # really this should be here

diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py
@@ -1,7 +1,9 @@
-import sys
+import os
 import platform
 import pytest
-
+import json
+import sys
+import wandb
 
 pytestmark = pytest.mark.skipif(
     sys.version_info < (3, 5) or platform.system() == "Windows",
@@ -11,16 +13,93 @@
 
 def test_one_cell(notebook):
     with notebook("one_cell.ipynb") as nb:
-        nb.execute_cell(cell_index=1)
-        output = nb.cell_output(1)
+        nb.execute_all()
+        output = nb.cell_output(0)
         print(output)
         assert "lovely-dawn-32" in output[-1]["data"]["text/html"]
         # assert "Failed to query for notebook name" not in text
 
 
 def test_magic(notebook):
     with notebook("magic.ipynb") as nb:
-        nb.execute_cell(cell_index=[1, 2])
-        output = nb.cell_output(2)
+        nb.execute_all()
+        output = nb.cell_output(1)
         print(output)
         assert notebook.base_url in output[0]["data"]["text/html"]
+
+
+def test_code_saving(notebook, live_mock_server):
+    # TODO: this is awfully slow, we should likely run these in parallel
+    with notebook("code_saving.ipynb") as nb:
+        nb.execute_all()
+        server_ctx = live_mock_server.get_ctx()
+        artifact_name = list(server_ctx["artifacts"].keys())[0]
+        # We run 3 cells after calling wandb.init
+        assert len(server_ctx["artifacts"][artifact_name]) == 3
+
+    with notebook("code_saving.ipynb", save_code=False) as nb:
+        nb.execute_all()
+        assert "Failed to detect the name of this notebook" in nb.all_output_text()
+
+    # Let's make sure we warn the user if they lie to us.
+    with notebook("code_saving.ipynb") as nb:
+        os.remove("code_saving.ipynb")
+        nb.execute_all()
+        assert "WANDB_NOTEBOOK_NAME should be a path" in nb.all_output_text()
+
+
+def test_notebook_not_exists(mocked_ipython, live_mock_server, capsys, test_settings):
+    os.environ["WANDB_NOTEBOOK_NAME"] = "fake.ipynb"
+    wandb.init(settings=test_settings)
+    _, err = capsys.readouterr()
+    assert "WANDB_NOTEBOOK_NAME should be a path" in err
+    del os.environ["WANDB_NOTEBOOK_NAME"]
+
+
+def test_notebook_metadata_jupyter(mocker, mocked_module, live_mock_server):
+    ipyconnect = mocker.patch("ipykernel.connect")
+    ipyconnect.get_connection_file.return_value = "kernel-12345.json"
+    serverapp = mocked_module("jupyter_server.serverapp")
+    serverapp.list_running_servers.return_value = [
+        {"url": live_mock_server.base_url, "notebook_dir": "/test"}
+    ]
+    meta = wandb.jupyter.notebook_metadata(False)
+    assert meta == {"path": "test.ipynb", "root": "/test", "name": "test.ipynb"}
+
+
+def test_notebook_metadata_no_servers(mocker, mocked_module):
+    ipyconnect = mocker.patch("ipykernel.connect")
+    ipyconnect.get_connection_file.return_value = "kernel-12345.json"
+    serverapp = mocked_module("jupyter_server.serverapp")
+    serverapp.list_running_servers.return_value = []
+    meta = wandb.jupyter.notebook_metadata(False)
+    assert meta == {}
+
+
+def test_notebook_metadata_colab(mocked_module):
+    colab = mocked_module("google.colab")
+    colab._message.blocking_request.return_value = {
+        "ipynb": {"metadata": {"colab": {"name": "colab.ipynb"}}}
+    }
+    meta = wandb.jupyter.notebook_metadata(False)
+    assert meta == {
+        "root": "/content",
+        "path": "colab.ipynb",
+        "name": "colab.ipynb",
+    }
+
+
+def test_notebook_metadata_kaggle(mocker, mocked_module):
+    os.environ["KAGGLE_KERNEL_RUN_TYPE"] = "test"
+    kaggle = mocked_module("kaggle_session")
+    kaggle_client = mocker.MagicMock()
+    kaggle_client.get_exportable_ipynb.return_value = {
+        "source": json.dumps({"metadata": {}, "cells": []})
+    }
+    kaggle.UserSessionClient.return_value = kaggle_client
+    meta = wandb.jupyter.notebook_metadata(False)
+    assert meta == {
+        "root": "/kaggle/working",
+        "path": "kaggle.ipynb",
+        "name": "kaggle.ipynb",
+    }
diff --git a/tests/utils/mock_server.py b/tests/utils/mock_server.py
@@ -121,12 +121,12 @@ def run(ctx):
     }
 
 
-def artifact(ctx, collection_name="mnist"):
+def artifact(ctx, collection_name="mnist", state="COMMITTED"):
     return {
         "id": ctx["page_count"],
         "digest": "abc123",
         "description": "",
-        "state": "COMMITTED",
+        "state": state,
         "size": 10000,
         "createdAt": datetime.now().isoformat(),
         "updatedAt": datetime.now().isoformat(),
@@ -525,6 +525,11 @@ def graphql():
             return json.dumps({"data": {"prepareFiles": {"files": {"edges": nodes}}}})
         if "mutation CreateArtifact(" in body["query"]:
             collection_name = body["variables"]["artifactCollectionNames"][0]
+            ctx["artifacts"] = ctx.get("artifacts", {})
+            ctx["artifacts"][collection_name] = ctx["artifacts"].get(
+                collection_name, []
+            )
+            ctx["artifacts"][collection_name].append(body["variables"])
             return {
                 "data": {"createArtifact": {"artifact": artifact(ctx, collection_name)}}
             }
@@ -603,7 +608,11 @@ def graphql():
             }
         if "query Artifact(" in body["query"]:
             art = artifact(ctx)
-            art["artifactType"] = {"id": 1, "name": "dataset"}
+            # code artifacts use source-RUNID names, we return the code type
+            if "source" in body["variables"]["name"]:
+                art["artifactType"] = {"id": 2, "name": "code"}
+            else:
+                art["artifactType"] = {"id": 1, "name": "dataset"}
             return {"data": {"project": {"artifact": art}}}
         if "query ArtifactManifest(" in body["query"]:
             art = artifact(ctx)
@@ -778,6 +787,17 @@ def k8s_pod():
         else:
             return b"", 500
 
+    @app.route("/api/sessions")
+    def jupyter_sessions():
+        return json.dumps(
+            [
+                {
+                    "kernel": {"id": "12345"},
+                    "notebook": {"path": "test.ipynb", "name": "test.ipynb"},
+                }
+            ]
+        )
+
     @app.route("/pypi/<library>/json")
     def pypi(library):
         version = getattr(wandb, "__hack_pypi_latest_version__", wandb.__version__)