Skip to content

Commit

Permalink
Merge pull request #323 from kobotoolbox/322-export-failure
Browse files Browse the repository at this point in the history
Fix `Mismatched labels and translations` error for forms using multiple languages and `select_one_from_file` or `select_multiple_from_file`
  • Loading branch information
jnm authored Jun 17, 2024
2 parents 95e2743 + 9a38ac9 commit 451df4c
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 6 deletions.
8 changes: 8 additions & 0 deletions src/formpack/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
'hxl': '',
}

# Which columns can contain media filenames? Do not confuse this with question
# types that can collect media! From https://xlsform.org/en/#media:
# You can include questions in your form that display images or that play
# video or audio files
# …
# Media is translatable in the same way as labels and hints…
MEDIA_COLUMN_NAMES = ('audio', 'image', 'video')

# Export Settings
EXPORT_SETTING_FIELDS = 'fields'
EXPORT_SETTING_FIELDS_FROM_ALL_VERSIONS = 'fields_from_all_versions'
Expand Down
78 changes: 78 additions & 0 deletions src/formpack/utils/bugfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# coding: utf-8

from copy import deepcopy

from ..constants import UNTRANSLATED


def repair_file_column_content_in_place(content) -> bool:
"""
#321 introduced a bug where the `file` column gets renamed to `media::file`
and treated as a translatable column (see #322). This function repairs that
damage.
This function is intended to be run by KPI, which should it to correct
`Asset` and `AssetVersion` content.
Returns `True` if any change was made
"""

# Store updates and apply them to `content` only of all conditions are met
updates = {}

try:
for key in 'translated', 'translations', 'survey':
updates[key] = deepcopy(content[key])
except KeyError:
# Do not proceed if content is incomplete
return False

try:
updates['translated'].remove('media::file')
except ValueError:
# The invalid column `media::file` inside `translated` is a hallmark of
# the problem this method is intended to fix. Do not proceed if it was
# not found
return False

max_label_list_length = 0
any_row_fixed = False
for row in updates['survey']:
max_label_list_length = max(
max_label_list_length, len(row.get('label', []))
)
bad_file_col = row.get('media::file')
if not bad_file_col:
continue
if not isinstance(bad_file_col, list):
# All problems of our own making (#322) will result in
# `media::file` being a list (or array when JSON)
continue
for val in bad_file_col:
if val is not None:
row['file'] = val
del row['media::file']
any_row_fixed = True
break
if not any_row_fixed:
return False

# Multi-language forms need an additional fix to remove a superfluous null
# translation added by the bogus `media::file` column
if len(updates['translations']) > max_label_list_length:
labels_translations_mismatch = True
if len(updates['translations']) == max_label_list_length + 1:
try:
updates['translations'].remove(UNTRANSLATED)
except ValueError:
pass
else:
labels_translations_mismatch = False # Fixed it!
if labels_translations_mismatch:
# This form has uncorrected problems. Bail out instead of modifying
# it at all
return False

# Success! Apply updates to the original content
content.update(updates)
return True
15 changes: 9 additions & 6 deletions src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@

from .array_to_xpath import EXPANDABLE_FIELD_TYPES
from .iterator import get_first_occurrence
from .replace_aliases import MEDIA_TYPES, META_TYPES, selects
from .replace_aliases import META_TYPES, selects
from ..constants import (
MEDIA_COLUMN_NAMES,
OR_OTHER_COLUMN,
TAG_COLUMNS_AND_SEPARATORS,
UNTRANSLATED,
Expand Down Expand Up @@ -186,7 +187,7 @@ def _get_known_translated_cols(translated_cols: List[str]) -> List[str]:

_translated_cols = []
for col in translated_cols:
if col in MEDIA_TYPES:
if col in MEDIA_COLUMN_NAMES:
col = f'media::{col}'
_translated_cols.append(col)

Expand All @@ -207,7 +208,7 @@ def _get_special_survey_cols(
'hint::English',
For more examples, see tests.
"""
RE_MEDIA_TYPES = '|'.join(MEDIA_TYPES)
RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES)

uniq_cols = OrderedDict()
special = OrderedDict()
Expand Down Expand Up @@ -238,14 +239,14 @@ def _mark_special(**kwargs: str) -> None:
column=column_name,
translation=UNTRANSLATED,
)
if ':' not in column_name and column_name not in MEDIA_TYPES:
if ':' not in column_name and column_name not in MEDIA_COLUMN_NAMES:
continue
if column_name.startswith('bind:'):
continue
if column_name.startswith('body:'):
continue
mtch = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})\s*::?\s*([^:]+)$',
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$',
column_name,
)
if mtch:
Expand All @@ -260,7 +261,9 @@ def _mark_special(**kwargs: str) -> None:
translation=translation,
)
continue
mtch = re.match(rf'^(media\s*::?\s*)?({RE_MEDIA_TYPES})$', column_name)
mtch = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name
)
if mtch:
matched = mtch.groups()
media_type = matched[1]
Expand Down
32 changes: 32 additions & 0 deletions tests/test_bugfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# coding: utf-8

from formpack.utils.bugfix import repair_file_column_content_in_place


def test_repair_file_column():
content = {
'schema': '1',
'settings': {},
'survey': [
{
'label': [
"Введіть ім'я співробітника:",
"Enter interviewer's name",
],
'name': 'interviewer_name_text',
'type': 'text',
},
{
'label': ['Область', 'Oblast'],
'media::file': [None, None, 'oblast.csv'],
'name': 'oblast',
'type': 'select_one_from_file',
},
],
'translated': ['label', 'media::file'],
'translations': ['Ukrainian', 'English', None],
}
assert repair_file_column_content_in_place(content)
assert content['survey'][1]['file'] == 'oblast.csv'
assert content['translated'] == ['label']
assert content['translations'] == ['Ukrainian', 'English']
39 changes: 39 additions & 0 deletions tests/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,45 @@ def test_translations_labels_mismatch(self):
with self.assertRaises(TranslationError):
FormPack(schemas, title)

def test_translations_with_select_multiple_from_file(self):
"""
Make sure that exports do not crash if `select_multiple_from_file` is
used in a multi-language form. See #322
"""

title, schemas, submissions = restaurant_profile

# Add a `select_multiple_from_file` question to existing `rpV4` schema
assert schemas[-1]['version'] == 'rpV4'
schemas[-1]['content']['survey'].append(
{
'type': 'select_multiple_from_file suppliers.csv',
'name': 'suppliers',
'label::english': 'suppliers',
'label::french': 'fournisseurs',
}
)
fp = FormPack(schemas[-1:], title)

# Expect that a `file` column has been added; see
# `test_expand_content.test_expand_select_x_from_file()`
assert (
fp.versions['rpV4'].schema['content']['survey'][-1]['file']
== 'suppliers.csv'
)

# Feed this schema with `file` back into formpack again
fp = FormPack([fp.versions['rpV4'].schema], title)

# At this point, issue #322 would have already caused
# `TranslationError: Mismatched labels and translations`, but perform a
# simple export anyway, adding a response for `suppliers`
submissions[-1]['suppliers'] = 'SFG Ocsys'
export = fp.export(versions=fp.versions)
actual_dict = export.to_dict(submissions)
assert actual_dict['Restaurant profile']['fields'][-1] == 'suppliers'
assert actual_dict['Restaurant profile']['data'][-1][-1] == 'SFG Ocsys'

def test_simple_nested_grouped_repeatable(self):
title, schemas, submissions = build_fixture(
'simple_nested_grouped_repeatable'
Expand Down

0 comments on commit 451df4c

Please sign in to comment.