Use an existing OrderedSet library instead of custom implementation

mlazowik · May 2, 2020 · 3949477 · 3949477
1 parent db116cc
commit 3949477
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 17 deletions.
diff --git a/setup.py b/setup.py
@@ -18,6 +18,7 @@
         'path.py<12',  # Pinned for Python 2 compatibility
         'pyquery',
         'pyxform',
+        'orderedset',
         'statistics',
         'XlsxWriter',
         'backports.csv',  # Remove after dropping Python 2 support (and rewrite `imports`)
@@ -41,6 +42,7 @@
         'path.py',
         'pyquery',
         'pyxform',
+        'orderedset',
         'statistics',
         'XlsxWriter',
         'backports.csv',  # Remove after dropping Python 2 support (and rewrite `imports`)

diff --git a/src/formpack/utils/expand_content.py b/src/formpack/utils/expand_content.py
@@ -6,6 +6,7 @@
 from __future__ import (unicode_literals, print_function,
                         absolute_import, division)
 from copy import deepcopy
+from orderedset import OrderedSet
 import re
 
 from .array_to_xpath import EXPANDABLE_FIELD_TYPES
@@ -158,17 +159,7 @@ def _get_special_survey_cols(content):
         'hint::English',
     For more examples, see tests.
     """
-    uniq_cols_set = set()
-    uniq_cols = []
-    """
-    The reason for two separate data structures is performance. The goal is to have a unique
-    set that preserves insertion order.
-
-    We implement that by using set() for uniqueness and list() for order.
-
-    Python has OrderedDict that provides that functionality, but the performance is slightly
-    worse compared to this solution.
-    """
+    uniq_cols = OrderedSet()
 
     special = OrderedDict()
 
@@ -180,11 +171,7 @@ def _pluck_uniq_cols(sheet_name):
             # to be parsed and translated in a previous iteration
             _cols = [r for r in row.keys() if r not in known_translated_cols]
 
-            for _col in _cols:
-                if _col in uniq_cols_set:
-                    continue
-                uniq_cols_set.add(_col)
-                uniq_cols.append(_col)
+            uniq_cols.update(_cols)
 
     def _mark_special(**kwargs):
         column_name = kwargs.pop('column_name')
@@ -233,7 +220,7 @@ def _mark_special(**kwargs):
                           translation=matched[1])
 
             # also add the empty column if it exists
-            if column_shortname in uniq_cols_set:
+            if column_shortname in uniq_cols:
                 _mark_special(column_name=column_shortname,
                               column=column_shortname,
                               translation=UNTRANSLATED)