-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue 1291: Implement storage of ClamAV scan results #1321
Changes from 11 commits
e7f9633
647de7d
121d8c8
bfc04f8
a95300f
b643792
47bdc15
8acb8bc
4e1e545
5bc46a6
b7a534b
22b0f33
772cbe4
1cda44e
2a45e0d
92cf09e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,18 +4,19 @@ | |
|
||
from tdpservice.data_files.errors import ImmutabilityError | ||
from tdpservice.data_files.models import DataFile | ||
from tdpservice.data_files.validators import validate_data_file | ||
from tdpservice.data_files.validators import ( | ||
validate_file_extension, | ||
validate_file_infection, | ||
) | ||
from tdpservice.security.models import ClamAVFileScan | ||
from tdpservice.stts.models import STT | ||
from tdpservice.users.models import User | ||
|
||
|
||
class DataFileSerializer(serializers.ModelSerializer): | ||
"""Serializer for Data files.""" | ||
|
||
file = serializers.FileField( | ||
write_only=True, | ||
validators=[validate_data_file] | ||
) | ||
file = serializers.FileField(write_only=True) | ||
stt = serializers.PrimaryKeyRelatedField(queryset=STT.objects.all()) | ||
user = serializers.PrimaryKeyRelatedField(queryset=User.objects.all()) | ||
|
||
|
@@ -39,8 +40,28 @@ class Meta: | |
|
||
def create(self, validated_data): | ||
"""Create a new entry with a new version number.""" | ||
return DataFile.create_new_version(validated_data) | ||
data_file = DataFile.create_new_version(validated_data) | ||
|
||
# Determine the matching ClamAVFileScan for this DataFile. | ||
av_scan = ClamAVFileScan.objects.filter( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
file_name=data_file.original_filename, | ||
uploaded_by=data_file.user | ||
).last() | ||
|
||
# Link the newly created DataFile to the relevant ClamAVFileScan. | ||
if av_scan is not None: | ||
av_scan.data_file = data_file | ||
av_scan.save() | ||
|
||
return data_file | ||
|
||
def update(self, instance, validated_data): | ||
"""Throw an error if a user tries to update a data_file.""" | ||
raise ImmutabilityError(instance, validated_data) | ||
|
||
def validate_file(self, file): | ||
"""Perform all validation steps on a given file.""" | ||
user = self.context.get('user') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This context needs to be passed in by the implementing view(s) in the |
||
validate_file_extension(file.name) | ||
validate_file_infection(file, file.name, user) | ||
return file |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,25 +4,42 @@ | |
|
||
from tdpservice.data_files.errors import ImmutabilityError | ||
from tdpservice.data_files.serializers import DataFileSerializer | ||
from tdpservice.data_files.validators import validate_file_extension | ||
from tdpservice.data_files.validators import ( | ||
validate_file_extension, | ||
validate_file_infection | ||
) | ||
from tdpservice.security.clients import ClamAVClient | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_serializer_with_valid_data(data_file_data): | ||
def test_serializer_with_valid_data(data_file_data, user): | ||
"""If a serializer has valid data it will return a valid object.""" | ||
create_serializer = DataFileSerializer(data=data_file_data) | ||
create_serializer = DataFileSerializer( | ||
context={'user': user}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We also need to manually pass this context in tests as it is used in validation steps. |
||
data=data_file_data | ||
) | ||
create_serializer.is_valid(raise_exception=True) | ||
assert create_serializer.is_valid() is True | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_serializer_increment_create(data_file_data, other_data_file_data): | ||
def test_serializer_increment_create( | ||
data_file_data, | ||
other_data_file_data, | ||
user | ||
): | ||
"""Test serializer produces data_files with correct version.""" | ||
serializer_1 = DataFileSerializer(data=data_file_data) | ||
serializer_1 = DataFileSerializer( | ||
context={'user': user}, | ||
data=data_file_data | ||
) | ||
assert serializer_1.is_valid() is True | ||
data_file_1 = serializer_1.save() | ||
|
||
serializer_2 = DataFileSerializer(data=other_data_file_data) | ||
serializer_2 = DataFileSerializer( | ||
context={'user': user}, | ||
data=other_data_file_data | ||
) | ||
assert serializer_2.is_valid() is True | ||
data_file_2 = serializer_2.save() | ||
|
||
|
@@ -45,13 +62,44 @@ def test_immutability_of_data_file(data_file_instance): | |
|
||
|
||
@pytest.mark.django_db | ||
def test_created_at(data_file_data): | ||
def test_created_at(data_file_data, user): | ||
"""If a serializer has valid data it will return a valid object.""" | ||
create_serializer = DataFileSerializer(data=data_file_data) | ||
create_serializer = DataFileSerializer( | ||
context={'user': user}, | ||
data=data_file_data | ||
) | ||
assert create_serializer.is_valid() is True | ||
data_file = create_serializer.save() | ||
|
||
assert data_file.created_at | ||
assert data_file.av_scans.exists() | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_data_file_still_created_if_av_scan_fails_to_create( | ||
data_file_data, | ||
mocker, | ||
user | ||
): | ||
"""Test valid DataFile is still created if ClamAV scan isn't recorded. | ||
|
||
In this scenario all validation would have already passed but in the event | ||
the ClamAVFileScan isn't found in the database due to an error or race | ||
condition we should still store the DataFile. | ||
""" | ||
mocker.patch( | ||
'tdpservice.security.models.ClamAVFileScanManager.record_scan', | ||
return_value=None | ||
) | ||
create_serializer = DataFileSerializer( | ||
context={'user': user}, | ||
data=data_file_data | ||
) | ||
assert create_serializer.is_valid() is True | ||
data_file = create_serializer.save() | ||
|
||
assert data_file is not None | ||
assert data_file.av_scans.count() == 0 | ||
|
||
|
||
@pytest.mark.parametrize("file_name", [ | ||
|
@@ -87,3 +135,26 @@ def test_rejects_invalid_file_extensions(file_name): | |
"""Test invalid file names are rejected by serializer validation.""" | ||
with pytest.raises(ValidationError): | ||
validate_file_extension(file_name) | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_rejects_infected_file(infected_file, fake_file_name, user): | ||
"""Test infected files are rejected by serializer validation.""" | ||
with pytest.raises(ValidationError): | ||
validate_file_infection(infected_file, fake_file_name, user) | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_rejects_uploads_on_clamav_connection_error( | ||
fake_file, | ||
fake_file_name, | ||
mocker, | ||
user | ||
): | ||
"""Test that DataFiles cannot pass validation if ClamAV is down.""" | ||
mocker.patch( | ||
'tdpservice.security.clients.ClamAVClient.scan_file', | ||
side_effect=ClamAVClient.ServiceUnavailable() | ||
) | ||
with pytest.raises(ValidationError): | ||
validate_file_infection(fake_file, fake_file_name, user) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,12 @@ def filter_queryset(self, queryset): | |
self.filterset_class = None | ||
return super().filter_queryset(queryset) | ||
|
||
def get_serializer_context(self): | ||
"""Retrieve additional context required by serializer.""" | ||
context = super().get_serializer_context() | ||
context['user'] = self.request.user | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what gives the |
||
return context | ||
|
||
@action(methods=["get"], detail=True) | ||
def download(self, request, pk=None): | ||
"""Retrieve a file from s3 then stream it to the client.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
"""Admin classes for the tdpservice.security app.""" | ||
from django.contrib import admin | ||
|
||
from tdpservice.security.models import ClamAVFileScan | ||
|
||
|
||
@admin.register(ClamAVFileScan) | ||
class ClamAVFileScanAdmin(admin.ModelAdmin): | ||
"""Admin interface for Clam AV File Scan instances.""" | ||
|
||
list_display = [ | ||
'scanned_at', | ||
'result', | ||
'uploaded_by', | ||
'file_name', | ||
'file_size_human', | ||
'file_shasum', | ||
] | ||
|
||
list_filter = [ | ||
'result', | ||
'uploaded_by', | ||
] | ||
|
||
@admin.display(description='File Size') | ||
def file_size_human(self, obj): | ||
"""Return human friendly file size, converted to appropriate unit.""" | ||
return obj.file_size_humanized |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
"""App configuration for tdpservice.security.""" | ||
from django.apps import AppConfig | ||
|
||
|
||
class SecurityConfig(AppConfig): | ||
"""App configuration.""" | ||
|
||
default_auto_field = 'django.db.models.BigAutoField' | ||
name = 'tdpservice.security' | ||
verbose_name = 'Security' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,15 @@ | ||
"""Globally available external client services.""" | ||
"""External client services related to security auditing.""" | ||
from requests.adapters import HTTPAdapter | ||
from requests.exceptions import ConnectionError | ||
from requests.packages.urllib3.util.retry import Retry | ||
from requests.sessions import Session | ||
import logging | ||
|
||
from django.conf import settings | ||
from django.core.files.base import File | ||
|
||
from tdpservice.security.models import ClamAVFileScan | ||
from tdpservice.users.models import User | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.DEBUG) | ||
|
@@ -44,15 +48,18 @@ def init_session(self): | |
session.mount(self.endpoint_url, HTTPAdapter(max_retries=retries)) | ||
return session | ||
|
||
def scan_file(self, file, file_name) -> bool: | ||
def scan_file(self, file: File, file_name: str, uploaded_by: User) -> bool: | ||
"""Scan a file for virus infections. | ||
|
||
:param file: | ||
The file or file-like object that should be scanned | ||
:param file_name: | ||
The name of the target file (str). | ||
The string name of the file. | ||
:param uploaded_by: | ||
The User that uploaded the given file. | ||
:returns is_file_clean: | ||
A boolean indicating whether or not the file passed the ClamAV scan | ||
:raises ClamAVClient.ServiceUnavailable: | ||
""" | ||
logger.debug(f'Initiating virus scan for file: {file_name}') | ||
try: | ||
|
@@ -62,21 +69,31 @@ def scan_file(self, file, file_name) -> bool: | |
files={'file': file}, | ||
timeout=settings.AV_SCAN_TIMEOUT | ||
jorgegonzalez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
except ConnectionError as err: | ||
logger.debug(f'Encountered error scanning file: {err}') | ||
logger.error(f'ClamAV connection failure: {err}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case we can't connect to ClamAV we will not store a result for the scan. |
||
raise self.ServiceUnavailable() | ||
|
||
if scan_response.status_code in self.SCAN_CODES['CLEAN']: | ||
logger.debug( | ||
f'File scan marked as CLEAN for file: {file_name}' | ||
) | ||
return True | ||
msg = f'File scan marked as CLEAN for file: {file_name}' | ||
scan_result = ClamAVFileScan.Result.CLEAN | ||
|
||
if scan_response.status_code in self.SCAN_CODES['INFECTED']: | ||
logger.debug( | ||
f'File scan marked as INFECTED for file: {file_name}' | ||
) | ||
return False | ||
elif scan_response.status_code in self.SCAN_CODES['INFECTED']: | ||
msg = f'File scan marked as INFECTED for file: {file_name}' | ||
scan_result = ClamAVFileScan.Result.INFECTED | ||
|
||
else: | ||
msg = f'Unable to scan file with name: {file_name}' | ||
scan_result = ClamAVFileScan.Result.ERROR | ||
|
||
# Log and create audit records with the results of this scan | ||
logger.debug(msg) | ||
ClamAVFileScan.objects.record_scan( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The execution flow was altered here so we only return at the end, after creating the |
||
file, | ||
file_name, | ||
msg, | ||
scan_result, | ||
uploaded_by | ||
) | ||
|
||
logger.debug(f'Unable to scan file with name: {file_name}') | ||
return False | ||
return True if scan_result == ClamAVFileScan.Result.CLEAN else False |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Generated by Django 3.2.5 on 2021-09-14 15:47 | ||
|
||
from django.conf import settings | ||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
initial = True | ||
|
||
dependencies = [ | ||
migrations.swappable_dependency(settings.AUTH_USER_MODEL), | ||
('data_files', '0009_update_log_entry_content_type'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='ClamAVFileScan', | ||
fields=[ | ||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), | ||
('scanned_at', models.DateTimeField(auto_now_add=True)), | ||
('file_name', models.TextField()), | ||
('file_size', models.PositiveBigIntegerField(help_text='The file size in bytes')), | ||
('file_shasum', models.TextField(help_text='The SHA256 checksum of the uploaded file')), | ||
('result', models.CharField(choices=[('CLEAN', 'Clean'), ('INFECTED', 'Infected'), ('ERROR', 'Error')], help_text='Scan result for uploaded file', max_length=12)), | ||
('data_file', models.ForeignKey(blank=True, help_text='The resulting DataFile object if this scan was clean', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='av_scans', to='data_files.datafile')), | ||
('uploaded_by', models.ForeignKey(help_text='The user that uploaded the scanned file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='av_scans', to=settings.AUTH_USER_MODEL)), | ||
], | ||
options={ | ||
'verbose_name': 'Clam AV File Scan', | ||
}, | ||
), | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This validation was moved to the serializers
validate_file
method instead so we could access serializer context.