Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Mismatched labels and translations error for forms using multiple languages and select_one_from_file or select_multiple_from_file #323

Merged
merged 4 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/formpack/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
'hxl': '',
}

# Which columns can contain media filenames? Do not confuse this with question
# types that can collect media! From https://xlsform.org/en/#media:
# You can include questions in your form that display images or that play
# video or audio files
# …
# Media is translatable in the same way as labels and hints…
MEDIA_COLUMN_NAMES = ('audio', 'image', 'video')

# Export Settings
EXPORT_SETTING_FIELDS = 'fields'
EXPORT_SETTING_FIELDS_FROM_ALL_VERSIONS = 'fields_from_all_versions'
Expand Down
78 changes: 78 additions & 0 deletions src/formpack/utils/bugfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# coding: utf-8

from copy import deepcopy

from ..constants import UNTRANSLATED


def repair_file_column_content_in_place(content) -> bool:
"""
#321 introduced a bug where the `file` column gets renamed to `media::file`
and treated as a translatable column (see #322). This function repairs that
damage.

This function is intended to be run by KPI, which should it to correct
`Asset` and `AssetVersion` content.

Returns `True` if any change was made
"""

# Store updates and apply them to `content` only of all conditions are met
updates = {}

try:
for key in 'translated', 'translations', 'survey':
updates[key] = deepcopy(content[key])
except KeyError:
# Do not proceed if content is incomplete
return False

try:
updates['translated'].remove('media::file')
except ValueError:
# The invalid column `media::file` inside `translated` is a hallmark of
# the problem this method is intended to fix. Do not proceed if it was
# not found
return False

max_label_list_length = 0
any_row_fixed = False
for row in updates['survey']:
max_label_list_length = max(
max_label_list_length, len(row.get('label', []))
)
bad_file_col = row.get('media::file')
if not bad_file_col:
continue
if not isinstance(bad_file_col, list):
# All problems of our own making (#322) will result in
# `media::file` being a list (or array when JSON)
continue
for val in bad_file_col:
if val is not None:
row['file'] = val
del row['media::file']
any_row_fixed = True
break
if not any_row_fixed:
return False

# Multi-language forms need an additional fix to remove a superfluous null
# translation added by the bogus `media::file` column
if len(updates['translations']) > max_label_list_length:
labels_translations_mismatch = True
if len(updates['translations']) == max_label_list_length + 1:
try:
updates['translations'].remove(UNTRANSLATED)
except ValueError:
pass
else:
labels_translations_mismatch = False # Fixed it!
if labels_translations_mismatch:
# This form has uncorrected problems. Bail out instead of modifying
# it at all
return False

# Success! Apply updates to the original content
content.update(updates)
return True
15 changes: 9 additions & 6 deletions src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@

from .array_to_xpath import EXPANDABLE_FIELD_TYPES
from .iterator import get_first_occurrence
from .replace_aliases import MEDIA_TYPES, META_TYPES, selects
from .replace_aliases import META_TYPES, selects
from ..constants import (
MEDIA_COLUMN_NAMES,
OR_OTHER_COLUMN,
TAG_COLUMNS_AND_SEPARATORS,
UNTRANSLATED,
Expand Down Expand Up @@ -186,7 +187,7 @@ def _get_known_translated_cols(translated_cols: List[str]) -> List[str]:

_translated_cols = []
for col in translated_cols:
if col in MEDIA_TYPES:
if col in MEDIA_COLUMN_NAMES:
col = f'media::{col}'
_translated_cols.append(col)

Expand All @@ -207,7 +208,7 @@ def _get_special_survey_cols(
'hint::English',
For more examples, see tests.
"""
RE_MEDIA_TYPES = '|'.join(MEDIA_TYPES)
RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES)

uniq_cols = OrderedDict()
special = OrderedDict()
Expand Down Expand Up @@ -238,14 +239,14 @@ def _mark_special(**kwargs: str) -> None:
column=column_name,
translation=UNTRANSLATED,
)
if ':' not in column_name and column_name not in MEDIA_TYPES:
if ':' not in column_name and column_name not in MEDIA_COLUMN_NAMES:
continue
if column_name.startswith('bind:'):
continue
if column_name.startswith('body:'):
continue
mtch = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})\s*::?\s*([^:]+)$',
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$',
column_name,
)
if mtch:
Expand All @@ -260,7 +261,9 @@ def _mark_special(**kwargs: str) -> None:
translation=translation,
)
continue
mtch = re.match(rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})$', column_name)
mtch = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name
)
if mtch:
matched = mtch.groups()
media_type = matched[1]
Expand Down
32 changes: 32 additions & 0 deletions tests/test_bugfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# coding: utf-8

from formpack.utils.bugfix import repair_file_column_content_in_place


def test_repair_file_column():
content = {
'schema': '1',
'settings': {},
'survey': [
{
'label': [
"Введіть ім'я співробітника:",
"Enter interviewer's name",
],
'name': 'interviewer_name_text',
'type': 'text',
},
{
'label': ['Область', 'Oblast'],
'media::file': [None, None, 'oblast.csv'],
'name': 'oblast',
'type': 'select_one_from_file',
},
],
'translated': ['label', 'media::file'],
'translations': ['Ukrainian', 'English', None],
}
assert repair_file_column_content_in_place(content)
assert content['survey'][1]['file'] == 'oblast.csv'
assert content['translated'] == ['label']
assert content['translations'] == ['Ukrainian', 'English']
39 changes: 39 additions & 0 deletions tests/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,45 @@ def test_translations_labels_mismatch(self):
with self.assertRaises(TranslationError):
FormPack(schemas, title)

def test_translations_with_select_multiple_from_file(self):
"""
Make sure that exports do not crash if `select_multiple_from_file` is
used in a multi-language form. See #322
"""

title, schemas, submissions = restaurant_profile

# Add a `select_multiple_from_file` question to existing `rpV4` schema
assert schemas[-1]['version'] == 'rpV4'
schemas[-1]['content']['survey'].append(
{
'type': 'select_multiple_from_file suppliers.csv',
'name': 'suppliers',
'label::english': 'suppliers',
'label::french': 'fournisseurs',
}
)
fp = FormPack(schemas[-1:], title)

# Expect that a `file` column has been added; see
# `test_expand_content.test_expand_select_x_from_file()`
assert (
fp.versions['rpV4'].schema['content']['survey'][-1]['file']
== 'suppliers.csv'
)

# Feed this schema with `file` back into formpack again
fp = FormPack([fp.versions['rpV4'].schema], title)

# At this point, issue #322 would have already caused
# `TranslationError: Mismatched labels and translations`, but perform a
# simple export anyway, adding a response for `suppliers`
submissions[-1]['suppliers'] = 'SFG Ocsys'
export = fp.export(versions=fp.versions)
actual_dict = export.to_dict(submissions)
assert actual_dict['Restaurant profile']['fields'][-1] == 'suppliers'
assert actual_dict['Restaurant profile']['data'][-1][-1] == 'SFG Ocsys'

def test_simple_nested_grouped_repeatable(self):
title, schemas, submissions = build_fixture(
'simple_nested_grouped_repeatable'
Expand Down
Loading