Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: add display settings #1672

Merged
merged 2 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/vaex-core/vaex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def read_csv(filepath_or_buffer, **kwargs):
'''Alias to from_csv.'''
return from_csv(filepath_or_buffer, **kwargs)

aliases = vaex.settings.main.auto_store_dict("aliases")
aliases = vaex.settings.aliases

# py2/p3 compatibility
try:
Expand Down
32 changes: 23 additions & 9 deletions packages/vaex-core/vaex/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3790,12 +3790,13 @@ def tail(self, n=10):
# self.cat(i1=max(0, N-n), i2=min(len(self), N))
return self[max(0, N - n):min(len(self), N)]

def _head_and_tail_table(self, n=5, format='html'):
def _head_and_tail_table(self, n=None, format='html'):
n = n or vaex.settings.display.max_rows
N = _len(self)
if N <= n * 2:
if N <= n:
return self._as_table(0, N, format=format)
else:
return self._as_table(0, n, N - n, N, format=format)
return self._as_table(0, math.ceil(n / 2), N - math.floor(n / 2), N, format=format)

def head_and_tail_print(self, n=5):
"""Display the first and last n elements of a DataFrame."""
Expand Down Expand Up @@ -3891,21 +3892,29 @@ def cat(self, i1, i2, format='html'):
output = self._as_table(i1, i2, format=format)
print(output)

def _as_table(self, i1, i2, j1=None, j2=None, format='html'):
def _as_table(self, i1, i2, j1=None, j2=None, format='html', ellipsis="..."):
from .formatting import _format_value
parts = [] # """<div>%s (length=%d)</div>""" % (self.name, len(self))]
parts += ["<table class='table-striped'>"]

# we need to get the underlying names since we use df.evaluate
column_names = self.get_column_names()
max_columns = vaex.settings.display.max_columns
if (max_columns is not None) and (max_columns > 0):
if max_columns < len(column_names):
columns_sliced = math.ceil(max_columns/2)
column_names = column_names[:columns_sliced] + column_names[-math.floor(max_columns/2):]
else:
columns_sliced = None
values_list = []
values_list.append(['#', []])
# parts += ["<thead><tr>"]
for name in column_names:
for i, name in enumerate(column_names):
if columns_sliced == i:
values_list.append([ellipsis, []])
values_list.append([name, []])
# parts += ["<th>%s</th>" % name]
# parts += ["</tr></thead>"]

def table_part(k1, k2, parts):
N = k2 - k1
# slicing will invoke .extract which will make the evaluation
Expand All @@ -3930,18 +3939,23 @@ def table_part(k1, k2, parts):
value = "{:,}".format(i + k1)
values_list[0][1].append(value)
for j, name in enumerate(column_names):
column_index = j
if columns_sliced == j:
values_list[column_index+1][1].append(ellipsis)
if columns_sliced is not None and j >= columns_sliced:
column_index += 1 # skip over the slice/ellipsis
value = values[name][i]
value = _format_value(value)
values_list[j+1][1].append(value)
values_list[column_index+1][1].append(value)
# parts += ["</tr>"]
# return values_list
if i2 - i1 > 0:
parts = table_part(i1, i2, parts)
if j1 is not None and j2 is not None:
values_list[0][1].append('...')
values_list[0][1].append(ellipsis)
for i in range(len(column_names)):
# parts += ["<td>...</td>"]
values_list[i+1][1].append('...')
values_list[i+1][1].append(ellipsis)

# parts = table_part(j1, j2, parts)
table_part(j1, j2, parts)
Expand Down
74 changes: 60 additions & 14 deletions packages/vaex-core/vaex/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import vaex.utils
import collections
from dataclasses import dataclass

try:
collections_abc = collections.abc
Expand Down Expand Up @@ -29,11 +30,11 @@ def __init__(self, filename):
self.settings = {}
# logger.debug("settings: %r", self.settings)

def auto_store_dict(self, key):
def auto_store_dict(self, key, autostore=False):
# TODO: no nested keys supported yet
if key not in self.settings:
self.settings[key] = {}
return AutoStoreDict(self, self.settings[key])
return AutoStoreDict(self, self.settings[key], autostore)

def store(self, key, value):
parts = key.split(".")
Expand Down Expand Up @@ -69,37 +70,82 @@ def get(self, key, default=None):


class AutoStoreDict(collections_abc.MutableMapping):
def __init__(self, settings, store):
self.store = store
self.settings = settings
def __init__(self, settings, store, autostore):
self._store = store
self._settings = settings
self._autostore = autostore

def save(self):
self._settings.dump()

def __getitem__(self, key):
return self.store[self.__keytransform__(key)]
return self._store[self.__keytransform__(key)]

def __setitem__(self, key, value):
self.store[self.__keytransform__(key)] = value
self.settings.dump()
self._store[self.__keytransform__(key)] = value
if self._autostore:
self._settings.dump()

def __delitem__(self, key):
del self.store[self.__keytransform__(key)]
self.settings.dump()
del self._store[self.__keytransform__(key)]
if self._autostore:
self._settings.dump()

def __iter__(self):
return iter(self.store)
return iter(self._store)

def __len__(self):
return len(self.store)
return len(self._store)

def __keytransform__(self, key):
return key

def __dir__(self):
return list(self._store.keys())

def __setattr__(self, name, value):
if name.startswith("_"):
self.__dict__[name] = value
else:
self[name] = value

def __getattr__(self, name):
if name.startswith("_"):
return self.__dict__[name]
else:
return self[name]

def __repr__(self) -> str:
s = repr(self._store)
return f'auto_store_dict({s})'


main = Settings(os.path.join(vaex.utils.get_private_dir(), "main.yml"))
webclient = Settings(os.path.join(vaex.utils.get_private_dir(), "webclient.yml"))
webserver = Settings(os.path.join(vaex.utils.get_private_dir(), "webserver.yml"))
cluster = Settings(os.path.join(vaex.utils.get_private_dir(), "cluster.yml"))
display = main.auto_store_dict("display")
aliases = main.auto_store_dict("aliases")


def save():
'''Save all settings.'''
main.dump()
webclient.dump()
webserver.dump()
cluster.dump()

# default values
_display_default = dict(
max_columns=200,
max_rows=10,
)
for name, value in _display_default.items():
if name not in display:
display[name] = value

# yaml.load()

if __name__ == "__main__":
webclient.store("bla.la.l", 1)
import sys
print(f"main.yml is at {main.filename}")
vaex.utils.yaml_dump(sys.stdout, main.settings)
19 changes: 1 addition & 18 deletions packages/vaex-core/vaex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def confirm_on_console(topic, msg):


def yaml_dump(f, data):
yaml.safe_dump(data, f, default_flow_style=False, encoding='utf-8', allow_unicode=True)
yaml.safe_dump(data, f, default_flow_style=False, encoding='utf-8', allow_unicode=True, sort_keys=False)


def yaml_load(f):
Expand Down Expand Up @@ -452,23 +452,6 @@ def read_json_or_yaml(file, fs_options={}, fs=None, old_style=True):
file.close()


# from http://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts

_mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG


def dict_representer(dumper, data):
return dumper.represent_dict(data.iteritems() if hasattr(data, "iteritems") else data.items())


def dict_constructor(loader, node):
return collections.OrderedDict(loader.construct_pairs(node))


yaml.add_representer(collections.OrderedDict, dict_representer, yaml.SafeDumper)
yaml.add_constructor(_mapping_tag, dict_constructor, yaml.SafeLoader)


def check_memory_usage(bytes_needed, confirm):
psutil = optional_import('psutil')
if bytes_needed > psutil.virtual_memory().available:
Expand Down
27 changes: 26 additions & 1 deletion tests/repr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,29 @@ def test_display_large_int(df_factory):
large_int = 2**50-1
df = df_factory(x=[123, large_int])
text = repr(df)
assert str(large_int) in text
assert str(large_int) in text


def test_max_columns():
x = np.arange(10)
df1 = vaex.from_dict({f'col_{i}': x for i in range(vaex.settings.display.max_columns)})
df2 = vaex.from_dict({f'col_{i}': x for i in range(vaex.settings.display.max_columns+1)})
mime_bundle = df1._repr_mimebundle_()
for key, value in mime_bundle.items():
assert "..." not in value
mime_bundle = df2._repr_mimebundle_()
for key, value in mime_bundle.items():
assert "..." in value


def test_max_row():
x = np.arange(vaex.settings.display.max_rows)
x2 = np.arange(vaex.settings.display.max_rows+1)
df1 = vaex.from_dict({f'col_{i}': x for i in range(vaex.settings.display.max_columns)})
df2 = vaex.from_dict({f'col_{i}': x2 for i in range(vaex.settings.display.max_columns)})
mime_bundle = df1._repr_mimebundle_()
for key, value in mime_bundle.items():
assert "..." not in value
mime_bundle = df2._repr_mimebundle_()
for key, value in mime_bundle.items():
assert "..." in value