Skip to content

Commit

Permalink
Bug/is scripted error (#149)
Browse files Browse the repository at this point in the history
* Updating test matrix for 7.6 + removing oss for now.

* Resolving 7.6.0 docs issues

* Updating ML docs

* Minor mod to support 6.x style indices.

Currently, there is no specific test for this as
it requires a 6.x cluster. 6.x is not officially
supported by 7.x clients, but this is a friendly
option for users.

* Adding unittest for FieldMappings._extract_fields_from_mapping

* Changing to f-string formatting and adding exception test

* Reverting to OrderedDict

Will change after #150 is merged.
  • Loading branch information
stevedodson committed Mar 26, 2020
1 parent 2e74a56 commit 9e2997c
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 12 deletions.
49 changes: 37 additions & 12 deletions eland/field_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,32 @@ def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
}
}
}
or (6.x)
{
"my_index": {
"mappings": {
"doc": {
"properties": {
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
}
}
```
if source_only == False:
return {'city': 'text', 'city.keyword': 'keyword'}
return {'city': ('text', None), 'city.keyword': ('keyword', None)}
else:
return {'city': 'text'}
return {'city': ('text', None)}
Note: first field name type wins. E.g.
Expand All @@ -128,11 +149,7 @@ def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
Returns
-------
fields, dates_format: tuple(OrderedDict, dict)
where:
fields: OrderedDict of field names and types
dates_format: Dict of date field names and format
fields: dict of field name: (type, date_format)
"""
fields = OrderedDict()

Expand All @@ -149,9 +166,9 @@ def flatten(x, name=''):
date_format = x["format"]
# If there is a conflicting type, warn - first values added wins
if field_name in fields and fields[field_name] != field_type:
warnings.warn("Field {} has conflicting types {} != {}".
format(field_name, fields[field_name], field_type),
UserWarning)
warnings.warn(
f"Field {field_name} has conflicting types {fields[field_name]} != {field_type}",
UserWarning)
else:
fields[field_name] = (field_type, date_format)
elif a == 'properties' or (not source_only and a == 'fields'):
Expand All @@ -162,8 +179,16 @@ def flatten(x, name=''):
for index in mappings:
if 'properties' in mappings[index]['mappings']:
properties = mappings[index]['mappings']['properties']

flatten(properties)
else:
# Pre Elasticsearch 7.0 mappings had types. Support these
# in case eland is connected to 6.x index - this is not
# officially supported, but does help usability
es_types = list(mappings[index]['mappings'].keys())
if len(es_types) != 1:
raise NotImplementedError(f"eland only supports 0 or 1 Elasticsearch types. es_types={es_types}")
properties = mappings[index]['mappings'][es_types[0]]['properties']

flatten(properties)

return fields

Expand Down
109 changes: 109 additions & 0 deletions eland/tests/field_mappings/test_mappings_with_type_pytest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2020 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import OrderedDict

# File called _pytest for PyCharm compatability
import pytest

from eland import FieldMappings
from eland.tests.common import TestData


class TestMappingsWithType(TestData):

def test_mappings_with_type(self):
# Unless we spin up a 6.x index, this is difficult
# to test. This is not ideal, but supporting some basic
# features on 6.x indices makes eland more generally usable.
#
# For now, just test function:
mapping7x = OrderedDict({
"my_index": {
"mappings": {
"properties": {
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
})

expected7x_source_only_false = {'city': ('text', None), 'city.keyword': ('keyword', None)}
expected7x_source_only_true = {'city': ('text', None)}

mapping6x = OrderedDict({
"my_index": {
"mappings": {
"doc": {
"properties": {
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
}
})

expected6x_source_only_false = {'city': ('text', None), 'city.keyword': ('keyword', None)}
expected6x_source_only_true = {'city': ('text', None)}

# add a 5x mapping to get coverage of error
mapping5x = OrderedDict({
"my_index": {
"mappings": {
"user": {
"properties": {
"name": {"type": "text"},
"user_name": {"type": "keyword"},
"email": {"type": "keyword"}
}
},
"tweet": {
"properties": {
"content": {"type": "text"},
"user_name": {"type": "keyword"},
"tweeted_at": {"type": "date"}
}
}
}
}
})

result7x = FieldMappings._extract_fields_from_mapping(mapping7x)
assert expected7x_source_only_false == result7x

result7x = FieldMappings._extract_fields_from_mapping(mapping7x, source_only=True)
assert expected7x_source_only_true == result7x

result6x = FieldMappings._extract_fields_from_mapping(mapping6x)
assert expected6x_source_only_false == result6x

result6x = FieldMappings._extract_fields_from_mapping(mapping6x, source_only=True)
assert expected6x_source_only_true == result6x

with pytest.raises(NotImplementedError):
FieldMappings._extract_fields_from_mapping(mapping5x)

0 comments on commit 9e2997c

Please sign in to comment.