diff --git a/CHANGELOG.md b/CHANGELOG.md index 74834154a5b..1d530f6e285 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Server-side validation for attribute specifications () +- \[API\] File downloading failures for filenames with special characters l() ### Security - TDB diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 799b217c2d9..f33c789b074 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -26,7 +26,6 @@ from rest_framework.renderers import JSONRenderer from rest_framework.response import Response from rest_framework.exceptions import ValidationError, PermissionDenied, NotFound -from django_sendfile import sendfile from distutils.util import strtobool import cvat.apps.dataset_manager as dm @@ -37,7 +36,9 @@ LabeledDataSerializer, SegmentSerializer, SimpleJobSerializer, TaskReadSerializer, ProjectReadSerializer, ProjectFileSerializer, TaskFileSerializer, RqIdSerializer) from cvat.apps.engine.utils import ( - av_scan_paths, process_failed_job, configure_dependent_job, get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after + av_scan_paths, process_failed_job, configure_dependent_job, + get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after, + sendfile ) from cvat.apps.engine.models import ( StorageChoice, StorageMethodChoice, DataChoice, Task, Project, Location) diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index 63953fe58ac..adeb3bbc07b 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -14,6 +14,7 @@ import subprocess import os import urllib.parse +import re import logging import platform @@ -31,6 +32,7 @@ from multiprocessing import cpu_count from django.core.exceptions import ValidationError +from django_sendfile import sendfile as _sendfile Import = namedtuple("Import", ["module", "name", "alias"]) @@ -284,3 +286,46 @@ def get_cpu_number() -> int: # the number of cpu cannot be determined cpu_number = 1 return cpu_number + +def make_attachment_file_name(filename: str) -> str: + # Borrowed from sendfile() to minimize changes for users. + # Added whitespace conversion and squashing into a single space + # Added removal of control characters + + filename = str(filename).replace("\\", "\\\\").replace('"', r"\"") + filename = re.sub(r"\s+", " ", filename) + + # From https://github.com/encode/uvicorn/blob/cd18c3b14aa810a4a6ebb264b9a297d6f8afb9ac/uvicorn/protocols/http/httptools_impl.py#L51 + filename = re.sub(r"[\x00-\x1F\x7F]", "", filename) + + return filename + +def sendfile( + request, filename, + attachment=False, attachment_filename=None, mimetype=None, encoding=None +): + """ + Create a response to send file using backend configured in ``SENDFILE_BACKEND`` + + ``filename`` is the absolute path to the file to send. + + If ``attachment`` is ``True`` the ``Content-Disposition`` header will be set accordingly. + This will typically prompt the user to download the file, rather + than view it. But even if ``False``, the user may still be prompted, depending + on the browser capabilities and configuration. + + The ``Content-Disposition`` filename depends on the value of ``attachment_filename``: + + ``None`` (default): Same as ``filename`` + ``False``: No ``Content-Disposition`` filename + ``String``: Value used as filename + + If neither ``mimetype`` or ``encoding`` are specified, then they will be guessed via the + filename (using the standard Python mimetypes module) + """ + # A drop-in replacement for sendfile with extra filename cleaning + + if attachment_filename: + attachment_filename = make_attachment_file_name(attachment_filename) + + return _sendfile(request, filename, attachment, attachment_filename, mimetype, encoding) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 225fd729cd2..4d498d495a0 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -42,7 +42,6 @@ from rest_framework.permissions import SAFE_METHODS from rest_framework.response import Response from rest_framework.settings import api_settings -from django_sendfile import sendfile import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import @@ -75,7 +74,7 @@ from cvat.apps.engine.utils import ( av_scan_paths, process_failed_job, configure_dependent_job, parse_exception_message, get_rq_job_meta, get_import_rq_id, - import_resource_with_clean_up_after + import_resource_with_clean_up_after, sendfile ) from cvat.apps.engine import backup from cvat.apps.engine.mixins import PartialUpdateModelMixin, UploadMixin, AnnotationMixin, SerializeMixin diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 90aca64db8b..07adc468570 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -15,10 +15,10 @@ from rest_framework import serializers, status from rest_framework.response import Response -from django_sendfile import sendfile from cvat.apps.dataset_manager.views import clear_export_cache, log_exception from cvat.apps.engine.log import slogger +from cvat.apps.engine.utils import sendfile DEFAULT_CACHE_TTL = timedelta(hours=1) diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index 370521964db..512977792b5 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -10,7 +10,6 @@ import os import textwrap import glob -from django_sendfile import sendfile from copy import deepcopy from enum import Enum from functools import wraps @@ -39,6 +38,7 @@ from cvat.apps.lambda_manager.serializers import ( FunctionCallRequestSerializer, FunctionCallSerializer ) +from cvat.apps.engine.utils import sendfile from cvat.utils.http import make_requests_session from cvat.apps.iam.permissions import LambdaPermission from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS diff --git a/cvat/apps/opencv/views.py b/cvat/apps/opencv/views.py index 3090ad684fb..40a0d06d814 100644 --- a/cvat/apps/opencv/views.py +++ b/cvat/apps/opencv/views.py @@ -1,7 +1,7 @@ import os import glob from django.conf import settings -from django_sendfile import sendfile +from cvat.apps.engine.utils import sendfile def OpenCVLibrary(request): dirname = os.path.join(settings.STATIC_ROOT, 'opencv', 'js') diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 7c26b3b1229..0184f66e56c 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -635,6 +635,27 @@ def test_can_export_task_to_coco_format(self, admin_user, tid): assert annotations["tracks"][0]["shapes"][0]["frame"] == 0 assert annotations["tracks"][0]["elements"][0]["shapes"][0]["frame"] == 0 + @pytest.mark.usefixtures("restore_db_per_function") + def test_can_download_task_with_special_chars_in_name(self, admin_user): + # Control characters in filenames may conflict with the Content-Disposition header + # value restrictions, as it needs to include the downloaded file name. + + task_spec = { + "name": "test_special_chars_{}_in_name".format("".join(chr(c) for c in range(1, 127))), + "labels": [{"name": "cat"}], + } + + task_data = { + "image_quality": 75, + "client_files": generate_image_files(1), + } + + task_id, _ = create_task(admin_user, task_spec, task_data) + + response = self._test_export_task(admin_user, task_id, format="CVAT for images 1.1") + assert response.status == HTTPStatus.OK + assert zipfile.is_zipfile(io.BytesIO(response.data)) + @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_cvat_data")