Skip to content

Commit

Permalink
Add data_source field to Metadata to differentiate between in-pixel a…
Browse files Browse the repository at this point in the history
…nd crawled metadata. (#18)
  • Loading branch information
dan-blanchard authored Mar 7, 2017
1 parent a305fba commit 5e75c6d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
11 changes: 8 additions & 3 deletions parsely_raw_data/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ class Metadata(SlotsMixin):
__slots__ = ('authors', 'canonical_url', 'urls', 'page_type', 'post_id',
'pub_date_tmsp', 'custom_metadata', 'section', 'tags',
'save_date_tmsp', 'thumb_url', 'title', 'image_url',
'full_content_word_count', 'share_urls', 'duration')
'full_content_word_count', 'share_urls', 'duration',
'data_source')
__version__ = 1

def __init__(self, authors, canonical_url, urls, page_type, post_id,
pub_date_tmsp, custom_metadata, section, tags,
save_date_tmsp, thumb_url, title, image_url,
full_content_word_count, share_urls, duration):
full_content_word_count, share_urls, duration,
data_source):
self.authors = authors
self.canonical_url = canonical_url
self.urls = urls
Expand All @@ -105,6 +107,7 @@ def __init__(self, authors, canonical_url, urls, page_type, post_id,
self.full_content_word_count = full_content_word_count
self.share_urls = share_urls
self.duration = duration
self.data_source = data_source


class SlotInfo(SlotsMixin):
Expand Down Expand Up @@ -285,6 +288,7 @@ def to_dict(self):
event_dict['metadata.full_content_word_count'] = self.metadata.full_content_word_count
event_dict['metadata.share_urls'] = self.metadata.share_urls
event_dict['metadata.duration'] = self.metadata.duration
event_dict['metadata.data_source'] = self.metadata.data_source
event_dict['metadata.__version__'] = self.metadata.__version__
else:
event_dict['metadata'] = False
Expand Down Expand Up @@ -363,7 +367,8 @@ def from_dict(cls, data):
data.get('metadata.image_url'),
data.get('metadata.full_content_word_count'),
data.get('metadata.share_urls'),
data.get('metadata.duration'))
data.get('metadata.duration'),
data.get('metadata.data_source'))
else:
metadata = None
if data.get('campaign'):
Expand Down
5 changes: 3 additions & 2 deletions tests/test_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"http://parsely.com/imgurl",
420,
["http://twitter.com/nothing"],
69
69,
'crawl'
),
CampaignInfo('spring_sale', 'email', 'newsletter', 'logolink', 'foo'),
EventFlags(False),
Expand All @@ -62,7 +63,7 @@ def test_to_dict_checker():
assert len(SlotInfo.__slots__) == 4, msg
assert len(TimestampInfo.__slots__) == 3, msg
assert len(VisitorInfo.__slots__) == 3, msg
assert len(Metadata.__slots__) == 16, msg
assert len(Metadata.__slots__) == 17, msg
assert len(CampaignInfo.__slots__) == 5, msg
assert len(EventFlags.__slots__) == 1, msg

Expand Down

0 comments on commit 5e75c6d

Please sign in to comment.