Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

es_type_overrides should error when receiving non-existing entries #302

Merged
merged 5 commits into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
with:
python-version: 3.8
- name: Install dependencies
run: python3.8 -m pip install nox
run: |
sudo apt-get install --yes pandoc
python3.8 -m pip install nox
- name: Build documentation
run: nox -s docs
21 changes: 15 additions & 6 deletions eland/field_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
if TYPE_CHECKING:
from elasticsearch import Elasticsearch

from eland import DataFrame


ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
Expand Down Expand Up @@ -463,7 +461,7 @@ def _es_dtype_to_pd_dtype(cls, es_dtype):
return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object")

@staticmethod
def _pd_dtype_to_es_dtype(pd_dtype):
def _pd_dtype_to_es_dtype(pd_dtype) -> Optional[str]:
"""
Mapping pandas dtypes to Elasticsearch dtype
--------------------------------------------
Expand All @@ -479,7 +477,7 @@ def _pd_dtype_to_es_dtype(pd_dtype):
category NA NA Finite list of text values
```
"""
es_dtype = None
es_dtype: Optional[str] = None

# Map all to 64-bit - TODO map to specifics: int32 -> int etc.
if is_float_dtype(pd_dtype):
Expand All @@ -501,7 +499,7 @@ def _pd_dtype_to_es_dtype(pd_dtype):

@staticmethod
def _generate_es_mappings(
dataframe: "DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
dataframe: "pd.DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
) -> Dict[str, Dict[str, Dict[str, Any]]]:
"""Given a pandas dataframe, generate the associated Elasticsearch mapping

Expand Down Expand Up @@ -536,8 +534,19 @@ def _generate_es_mappings(
}
}
"""
es_dtype: str

mapping_props: Dict[str, Any] = {}

if es_type_overrides is not None:
non_existing_columns: List[str] = [
key for key in es_type_overrides.keys() if key not in dataframe.columns
]
if non_existing_columns:
raise KeyError(
f"{repr(non_existing_columns)[1:-1]} column(s) not in given dataframe"
)

mapping_props = {}
for column, dtype in dataframe.dtypes.iteritems():
if es_type_overrides is not None and column in es_type_overrides:
es_dtype = es_type_overrides[column]
Expand Down
26 changes: 26 additions & 0 deletions eland/tests/dataframe/test_utils_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import numpy as np
import pandas as pd
import pytest

import eland as ed
from eland.field_mappings import FieldMappings
Expand Down Expand Up @@ -139,3 +140,28 @@ def test_eland_to_pandas_performance(self):

# This test calls the same method so is redundant
# assert_pandas_eland_frame_equal(pd_df, self.ed_flights())

def test_es_type_override_error(self):

df = self.pd_flights().filter(
["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
)

index_name = "test_es_type_override"

match = "'DistanceKilometers', 'DistanceMiles' column(s) not in given dataframe"
with pytest.raises(KeyError, match=match):
ed.pandas_to_eland(
df,
ES_TEST_CLIENT,
index_name,
es_if_exists="replace",
es_refresh=True,
use_pandas_index_for_es_ids=False,
es_type_overrides={
"AvgTicketPrice": "long",
"DistanceKilometers": "text",
"DistanceMiles": "text",
},
)
ES_TEST_CLIENT.indices.delete(index=index_name)
3 changes: 2 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import subprocess
from pathlib import Path

import elasticsearch
import nox

BASE_DIR = Path(__file__).parent
Expand Down Expand Up @@ -126,6 +125,8 @@ def docs(session):
# See if we have an Elasticsearch cluster active
# to rebuild the Jupyter notebooks with.
try:
import elasticsearch

es = elasticsearch.Elasticsearch("localhost:9200")
es.info()
if not es.indices.exists("flights"):
Expand Down