Skip to content

Commit

Permalink
Cloudpickle register by value (#6466)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Rose authored Jun 28, 2022
1 parent a8eb3b2 commit 40c9420
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
15 changes: 10 additions & 5 deletions distributed/protocol/pickle.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from __future__ import annotations

import inspect
import logging
import pickle

import cloudpickle
from packaging.version import parse as parse_version

CLOUDPICKLE_GTE_20 = parse_version(cloudpickle.__version__) >= parse_version("2.0.0")

HIGHEST_PROTOCOL = pickle.HIGHEST_PROTOCOL

Expand Down Expand Up @@ -40,13 +44,14 @@ def dumps(x, *, buffer_callback=None, protocol=HIGHEST_PROTOCOL):
try:
buffers.clear()
result = pickle.dumps(x, **dump_kwargs)
if len(result) < 1000:
if b"__main__" in result:
if b"__main__" in result or (
CLOUDPICKLE_GTE_20
and getattr(inspect.getmodule(x), "__name__", None)
in cloudpickle.list_registry_pickle_by_value()
):
if len(result) < 1000 or not _always_use_pickle_for(x):
buffers.clear()
result = cloudpickle.dumps(x, **dump_kwargs)
elif not _always_use_pickle_for(x) and b"__main__" in result:
buffers.clear()
result = cloudpickle.dumps(x, **dump_kwargs)
except Exception:
try:
buffers.clear()
Expand Down
35 changes: 34 additions & 1 deletion distributed/protocol/tests/test_pickle.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
from __future__ import annotations

import pickle
import sys
import weakref
from functools import partial
from operator import add

import cloudpickle
import pytest

from dask.utils import tmpdir

from distributed import profile
from distributed.protocol import deserialize, serialize
from distributed.protocol.pickle import HIGHEST_PROTOCOL, dumps, loads
from distributed.protocol.pickle import (
CLOUDPICKLE_GTE_20,
HIGHEST_PROTOCOL,
dumps,
loads,
)
from distributed.utils_test import save_sys_modules


class MemoryviewHolder:
Expand Down Expand Up @@ -187,3 +197,26 @@ def funcs():
assert wr() is None
assert wr2() is None
assert wr3() is None


@pytest.mark.skipif(
not CLOUDPICKLE_GTE_20, reason="Pickle by value registration not supported"
)
def test_pickle_by_value_when_registered():
with save_sys_modules():
with tmpdir() as d:
try:
sys.path.insert(0, d)
module = f"{d}/mymodule.py"
with open(module, "w") as f:
f.write("def myfunc(x):\n return x + 1")
import mymodule # noqa

assert dumps(
mymodule.myfunc, protocol=HIGHEST_PROTOCOL
) == pickle.dumps(mymodule.myfunc, protocol=HIGHEST_PROTOCOL)
cloudpickle.register_pickle_by_value(mymodule)
assert len(dumps(mymodule.myfunc)) > len(pickle.dumps(mymodule.myfunc))

finally:
sys.path.pop(0)
9 changes: 9 additions & 0 deletions docs/source/protocol.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ bytes rather than obscure Python functions.
*Note: we actually call some combination of pickle and cloudpickle, depending
on the situation. This is for performance reasons.*

CloudPickle can serialize objects by both reference (referring to them by
their module and name) or by value (serializing the actual code for the object).
By default, it serializes by reference if it can, but starting with CloudPickle 2.0
you can register a module to be serialized by value. This can be useful if you
want to send an object in a module that doesn't exist on the receiving end::

import mymodule
cloudpickle.register_pickle_by_value(mymodule)

Cross Language Specialization
-----------------------------

Expand Down

0 comments on commit 40c9420

Please sign in to comment.