From 3800323c3668cafdb5fc611c8284c8a8d7709751 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 15 Jun 2024 11:08:06 -0400 Subject: [PATCH 1/4] Add failing test for #322 `Mismatched labels and translations` error when attempting to load a schema that contains multiple languages as well as a `select_one_from_file` or `select_multiple_from_file` that has been expanded to have a `file` column as performed by #314 --- tests/test_exports.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_exports.py b/tests/test_exports.py index 70f299a..a1dea9d 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -376,6 +376,34 @@ def test_translations_labels_mismatch(self): with self.assertRaises(TranslationError): FormPack(schemas, title) + def test_translations_with_select_multiple_from_file(self): + title, schemas, submissions = restaurant_profile + + # Add a `select_multiple_from_file` question to existing `rpV4` schema + assert schemas[-1]['version'] == 'rpV4' + schemas[-1]['content']['survey'].append( + { + 'type': 'select_multiple_from_file suppliers.csv', + 'name': 'suppliers', + 'label::english': 'suppliers', + 'label::french': 'fournisseurs', + } + ) + fp = FormPack(schemas[-1:], title) + + # Expect that a `file` column has been added; see + # `test_expand_content.test_expand_select_x_from_file()` + assert ( + fp.versions['rpV4'].schema['content']['survey'][-1]['file'] + == 'suppliers.csv' + ) + + # Feed this schema with `file` back into formpack again + fp = FormPack([fp.versions['rpV4'].schema], title) + + # Fails here with + # `formpack.errors.TranslationError: Mismatched labels and translations: [restaurant name, nom du restaurant] [english, french, None] 2!=3` + def test_simple_nested_grouped_repeatable(self): title, schemas, submissions = build_fixture( 'simple_nested_grouped_repeatable' From 3e9a06f51da9acb5d017c07ef56dcc761b24df24 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 15 Jun 2024 11:23:50 -0400 Subject: [PATCH 2/4] =?UTF-8?q?Do=20not=20confuse=20media=20column=20names?= =?UTF-8?q?=20with=20question=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit types! This fixes #322 for new schemas, but doesn't yet handle existing schemas that have a superfluous `None` translation --- src/formpack/constants.py | 8 ++++++++ src/formpack/utils/expand_content.py | 15 +++++++++------ tests/test_exports.py | 15 +++++++++++++-- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/formpack/constants.py b/src/formpack/constants.py index bdd8f30..1069d7a 100644 --- a/src/formpack/constants.py +++ b/src/formpack/constants.py @@ -51,6 +51,14 @@ 'hxl': '', } +# Which columns can contain media filenames? Do not confuse this with question +# types that can collect media! From https://xlsform.org/en/#media: +# You can include questions in your form that display images or that play +# video or audio files +# … +# Media is translatable in the same way as labels and hints… +MEDIA_COLUMN_NAMES = ('audio', 'image', 'video') + # Export Settings EXPORT_SETTING_FIELDS = 'fields' EXPORT_SETTING_FIELDS_FROM_ALL_VERSIONS = 'fields_from_all_versions' diff --git a/src/formpack/utils/expand_content.py b/src/formpack/utils/expand_content.py index 1e0cbd9..852e50b 100644 --- a/src/formpack/utils/expand_content.py +++ b/src/formpack/utils/expand_content.py @@ -18,8 +18,9 @@ from .array_to_xpath import EXPANDABLE_FIELD_TYPES from .iterator import get_first_occurrence -from .replace_aliases import MEDIA_TYPES, META_TYPES, selects +from .replace_aliases import META_TYPES, selects from ..constants import ( + MEDIA_COLUMN_NAMES, OR_OTHER_COLUMN, TAG_COLUMNS_AND_SEPARATORS, UNTRANSLATED, @@ -186,7 +187,7 @@ def _get_known_translated_cols(translated_cols: List[str]) -> List[str]: _translated_cols = [] for col in translated_cols: - if col in MEDIA_TYPES: + if col in MEDIA_COLUMN_NAMES: col = f'media::{col}' _translated_cols.append(col) @@ -207,7 +208,7 @@ def _get_special_survey_cols( 'hint::English', For more examples, see tests. """ - RE_MEDIA_TYPES = '|'.join(MEDIA_TYPES) + RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES) uniq_cols = OrderedDict() special = OrderedDict() @@ -238,14 +239,14 @@ def _mark_special(**kwargs: str) -> None: column=column_name, translation=UNTRANSLATED, ) - if ':' not in column_name and column_name not in MEDIA_TYPES: + if ':' not in column_name and column_name not in MEDIA_COLUMN_NAMES: continue if column_name.startswith('bind:'): continue if column_name.startswith('body:'): continue mtch = re.match( - rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})\s*::?\s*([^:]+)$', + rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$', column_name, ) if mtch: @@ -260,7 +261,9 @@ def _mark_special(**kwargs: str) -> None: translation=translation, ) continue - mtch = re.match(rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})$', column_name) + mtch = re.match( + rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name + ) if mtch: matched = mtch.groups() media_type = matched[1] diff --git a/tests/test_exports.py b/tests/test_exports.py index a1dea9d..1846810 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -377,6 +377,11 @@ def test_translations_labels_mismatch(self): FormPack(schemas, title) def test_translations_with_select_multiple_from_file(self): + """ + Make sure that exports do not crash if `select_multiple_from_file` is + used in a multi-language form. See #322 + """ + title, schemas, submissions = restaurant_profile # Add a `select_multiple_from_file` question to existing `rpV4` schema @@ -401,8 +406,14 @@ def test_translations_with_select_multiple_from_file(self): # Feed this schema with `file` back into formpack again fp = FormPack([fp.versions['rpV4'].schema], title) - # Fails here with - # `formpack.errors.TranslationError: Mismatched labels and translations: [restaurant name, nom du restaurant] [english, french, None] 2!=3` + # At this point, issue #322 would have already caused + # `TranslationError: Mismatched labels and translations`, but perform a + # simple export anyway, adding a response for `suppliers` + submissions[-1]['suppliers'] = 'SFG Ocsys' + export = fp.export(versions=fp.versions) + actual_dict = export.to_dict(submissions) + assert actual_dict['Restaurant profile']['fields'][-1] == 'suppliers' + assert actual_dict['Restaurant profile']['data'][-1][-1] == 'SFG Ocsys' def test_simple_nested_grouped_repeatable(self): title, schemas, submissions = build_fixture( From 19aa059456fe74b6a8a863df79351af912d1436a Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sun, 16 Jun 2024 13:58:24 -0400 Subject: [PATCH 3/4] Add method to repair `media::file` problems (#322) This needs to be applied to existing content --- src/formpack/utils/bugfix.py | 78 ++++++++++++++++++++++++++++++++++++ tests/test_bugfix.py | 38 ++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/formpack/utils/bugfix.py create mode 100644 tests/test_bugfix.py diff --git a/src/formpack/utils/bugfix.py b/src/formpack/utils/bugfix.py new file mode 100644 index 0000000..face387 --- /dev/null +++ b/src/formpack/utils/bugfix.py @@ -0,0 +1,78 @@ +# coding: utf-8 + +from copy import deepcopy + +from ..constants import UNTRANSLATED + + +def repair_file_column_content_in_place(content) -> bool: + """ + #321 introduced a bug where the `file` column gets renamed to `media::file` + and treated as a translatable column (see #322). This function repairs that + damage. + + This function is intended to be run by KPI, which should it to correct + `Asset` and `AssetVersion` content. + + Returns `True` if any change was made + """ + + # Store updates and apply them to `content` only of all conditions are met + updates = {} + + try: + for key in 'translated', 'translations', 'survey': + updates[key] = deepcopy(content[key]) + except KeyError: + # Do not proceed if content is incomplete + return False + + try: + updates['translated'].remove('media::file') + except ValueError: + # The invalid column `media::file` inside `translated` is a hallmark of + # the problem this method is intended to fix. Do not proceed if it was + # not found + return False + + max_label_list_length = 0 + any_row_fixed = False + for row in updates['survey']: + max_label_list_length = max( + max_label_list_length, len(row.get('label', [])) + ) + bad_file_col = row.get('media::file') + if not bad_file_col: + continue + if not isinstance(bad_file_col, list): + # All problems of our own making (#322) will result in + # `media::file` being a list (or array when JSON) + continue + for val in bad_file_col: + if val is not None: + row['file'] = val + del row['media::file'] + any_row_fixed = True + break + if not any_row_fixed: + return False + + # Multi-language forms need an additional fix to remove a superfluous null + # translation added by the bogus `media::file` column + if len(updates['translations']) > max_label_list_length: + labels_translations_mismatch = True + if len(updates['translations']) == max_label_list_length + 1: + try: + updates['translations'].remove(UNTRANSLATED) + except ValueError: + pass + else: + labels_translations_mismatch = False # Fixed it! + if labels_translations_mismatch: + # This form has uncorrected problems. Bail out instead of modifying + # it at all + return False + + # Success! Apply updates to the original content + content.update(updates) + return True diff --git a/tests/test_bugfix.py b/tests/test_bugfix.py new file mode 100644 index 0000000..740d1f3 --- /dev/null +++ b/tests/test_bugfix.py @@ -0,0 +1,38 @@ +# coding: utf-8 + +from formpack.utils.bugfix import repair_file_column_content_in_place + + +def test_repair_file_column(): + content = { + 'schema': '1', + 'settings': {}, + 'survey': [ + { + '$kuid': 'qt1C9CFUj', + '$qpath': 'interviewer_name_text', + '$xpath': 'interviewer_name_text', + 'label': [ + "Введіть ім'я співробітника:", + "Enter interviewer's name", + ], + 'name': 'interviewer_name_text', + 'type': 'text', + }, + { + '$kuid': 'bHGP1YkRq', + '$qpath': 'oblast', + '$xpath': 'oblast', + 'label': ['Область', 'Oblast'], + 'media::file': [None, None, 'oblast.csv'], + 'name': 'oblast', + 'type': 'select_one_from_file', + }, + ], + 'translated': ['label', 'media::file'], + 'translations': ['Ukrainian', 'English', None], + } + assert repair_file_column_content_in_place(content) + assert content['survey'][1]['file'] == 'oblast.csv' + assert content['translated'] == ['label'] + assert content['translations'] == ['Ukrainian', 'English'] From 9a38ac91ab4d8292e8375461611c0605d28196f2 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 17 Jun 2024 00:52:32 -0400 Subject: [PATCH 4/4] Remove unnecessary columns from unit test --- tests/test_bugfix.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_bugfix.py b/tests/test_bugfix.py index 740d1f3..8b98e6c 100644 --- a/tests/test_bugfix.py +++ b/tests/test_bugfix.py @@ -9,9 +9,6 @@ def test_repair_file_column(): 'settings': {}, 'survey': [ { - '$kuid': 'qt1C9CFUj', - '$qpath': 'interviewer_name_text', - '$xpath': 'interviewer_name_text', 'label': [ "Введіть ім'я співробітника:", "Enter interviewer's name", @@ -20,9 +17,6 @@ def test_repair_file_column(): 'type': 'text', }, { - '$kuid': 'bHGP1YkRq', - '$qpath': 'oblast', - '$xpath': 'oblast', 'label': ['Область', 'Oblast'], 'media::file': [None, None, 'oblast.csv'], 'name': 'oblast',