Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 1291: Implement storage of ClamAV scan results #1321

Merged
merged 16 commits into from
Sep 24, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tdrs-backend/tdpservice/core/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class LogEntryAdmin(admin.ModelAdmin):
'content_type',
'object_link',
'action_flag',
'change_message',
]

def object_link(self, obj):
Expand Down
33 changes: 27 additions & 6 deletions tdrs-backend/tdpservice/data_files/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@

from tdpservice.data_files.errors import ImmutabilityError
from tdpservice.data_files.models import DataFile
from tdpservice.data_files.validators import validate_data_file
from tdpservice.data_files.validators import (
validate_file_extension,
validate_file_infection,
)
from tdpservice.security.models import ClamAVFileScan
from tdpservice.stts.models import STT
from tdpservice.users.models import User


class DataFileSerializer(serializers.ModelSerializer):
"""Serializer for Data files."""

file = serializers.FileField(
write_only=True,
validators=[validate_data_file]
)
file = serializers.FileField(write_only=True)
Copy link
Author

@jtwillis92 jtwillis92 Sep 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This validation was moved to the serializers validate_file method instead so we could access serializer context.

stt = serializers.PrimaryKeyRelatedField(queryset=STT.objects.all())
user = serializers.PrimaryKeyRelatedField(queryset=User.objects.all())

Expand All @@ -39,8 +40,28 @@ class Meta:

def create(self, validated_data):
"""Create a new entry with a new version number."""
return DataFile.create_new_version(validated_data)
data_file = DataFile.create_new_version(validated_data)

# Determine the matching ClamAVFileScan for this DataFile.
av_scan = ClamAVFileScan.objects.filter(
Copy link
Author

@jtwillis92 jtwillis92 Sep 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DataFile doesn't get created until after the scan is complete and recorded so we have to retroactively link these records if the creation was successful.

file_name=data_file.original_filename,
uploaded_by=data_file.user
).last()

# Link the newly created DataFile to the relevant ClamAVFileScan.
if av_scan is not None:
av_scan.data_file = data_file
av_scan.save()

return data_file

def update(self, instance, validated_data):
"""Throw an error if a user tries to update a data_file."""
raise ImmutabilityError(instance, validated_data)

def validate_file(self, file):
"""Perform all validation steps on a given file."""
user = self.context.get('user')
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This context needs to be passed in by the implementing view(s) in the get_serializer_context method.

validate_file_extension(file.name)
validate_file_infection(file, file.name, user)
return file
87 changes: 79 additions & 8 deletions tdrs-backend/tdpservice/data_files/test/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,42 @@

from tdpservice.data_files.errors import ImmutabilityError
from tdpservice.data_files.serializers import DataFileSerializer
from tdpservice.data_files.validators import validate_file_extension
from tdpservice.data_files.validators import (
validate_file_extension,
validate_file_infection
)
from tdpservice.security.clients import ClamAVClient


@pytest.mark.django_db
def test_serializer_with_valid_data(data_file_data):
def test_serializer_with_valid_data(data_file_data, user):
"""If a serializer has valid data it will return a valid object."""
create_serializer = DataFileSerializer(data=data_file_data)
create_serializer = DataFileSerializer(
context={'user': user},
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to manually pass this context in tests as it is used in validation steps.

data=data_file_data
)
create_serializer.is_valid(raise_exception=True)
assert create_serializer.is_valid() is True


@pytest.mark.django_db
def test_serializer_increment_create(data_file_data, other_data_file_data):
def test_serializer_increment_create(
data_file_data,
other_data_file_data,
user
):
"""Test serializer produces data_files with correct version."""
serializer_1 = DataFileSerializer(data=data_file_data)
serializer_1 = DataFileSerializer(
context={'user': user},
data=data_file_data
)
assert serializer_1.is_valid() is True
data_file_1 = serializer_1.save()

serializer_2 = DataFileSerializer(data=other_data_file_data)
serializer_2 = DataFileSerializer(
context={'user': user},
data=other_data_file_data
)
assert serializer_2.is_valid() is True
data_file_2 = serializer_2.save()

Expand All @@ -45,13 +62,44 @@ def test_immutability_of_data_file(data_file_instance):


@pytest.mark.django_db
def test_created_at(data_file_data):
def test_created_at(data_file_data, user):
"""If a serializer has valid data it will return a valid object."""
create_serializer = DataFileSerializer(data=data_file_data)
create_serializer = DataFileSerializer(
context={'user': user},
data=data_file_data
)
assert create_serializer.is_valid() is True
data_file = create_serializer.save()

assert data_file.created_at
assert data_file.av_scans.exists()


@pytest.mark.django_db
def test_data_file_still_created_if_av_scan_fails_to_create(
data_file_data,
mocker,
user
):
"""Test valid DataFile is still created if ClamAV scan isn't recorded.

In this scenario all validation would have already passed but in the event
the ClamAVFileScan isn't found in the database due to an error or race
condition we should still store the DataFile.
"""
mocker.patch(
'tdpservice.security.models.ClamAVFileScanManager.record_scan',
return_value=None
)
create_serializer = DataFileSerializer(
context={'user': user},
data=data_file_data
)
assert create_serializer.is_valid() is True
data_file = create_serializer.save()

assert data_file is not None
assert data_file.av_scans.count() == 0


@pytest.mark.parametrize("file_name", [
Expand Down Expand Up @@ -87,3 +135,26 @@ def test_rejects_invalid_file_extensions(file_name):
"""Test invalid file names are rejected by serializer validation."""
with pytest.raises(ValidationError):
validate_file_extension(file_name)


@pytest.mark.django_db
def test_rejects_infected_file(infected_file, fake_file_name, user):
"""Test infected files are rejected by serializer validation."""
with pytest.raises(ValidationError):
validate_file_infection(infected_file, fake_file_name, user)


@pytest.mark.django_db
def test_rejects_uploads_on_clamav_connection_error(
fake_file,
fake_file_name,
mocker,
user
):
"""Test that DataFiles cannot pass validation if ClamAV is down."""
mocker.patch(
'tdpservice.security.clients.ClamAVClient.scan_file',
side_effect=ClamAVClient.ServiceUnavailable()
)
with pytest.raises(ValidationError):
validate_file_infection(fake_file, fake_file_name, user)
13 changes: 3 additions & 10 deletions tdrs-backend/tdpservice/data_files/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.core.exceptions import ValidationError
from inflection import pluralize

from tdpservice.clients import ClamAVClient
from tdpservice.security.clients import ClamAVClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,11 +46,10 @@ def validate_file_extension(file_name: str):
raise ValidationError(msg)


def validate_file_infection(file):
def validate_file_infection(file, file_name, uploaded_by):
"""Validate file is not infected by scanning with ClamAV."""
av_client = ClamAVClient()
try:
is_file_clean = av_client.scan_file(file, file.name)
is_file_clean = ClamAVClient().scan_file(file, file_name, uploaded_by)
except ClamAVClient.ServiceUnavailable:
raise ValidationError(
'Unable to complete security inspection, please try again or '
Expand All @@ -61,9 +60,3 @@ def validate_file_infection(file):
raise ValidationError(
'Rejected: uploaded file did not pass security inspection'
)


def validate_data_file(file):
"""Perform all validation steps on a given file."""
validate_file_extension(file.name)
validate_file_infection(file)
6 changes: 6 additions & 0 deletions tdrs-backend/tdpservice/data_files/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def filter_queryset(self, queryset):
self.filterset_class = None
return super().filter_queryset(queryset)

def get_serializer_context(self):
"""Retrieve additional context required by serializer."""
context = super().get_serializer_context()
context['user'] = self.request.user
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what gives the validate_file method on the serializer access to the user.

return context

@action(methods=["get"], detail=True)
def download(self, request, pk=None):
"""Retrieve a file from s3 then stream it to the client."""
Expand Down
Empty file.
28 changes: 28 additions & 0 deletions tdrs-backend/tdpservice/security/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Admin classes for the tdpservice.security app."""
from django.contrib import admin

from tdpservice.security.models import ClamAVFileScan


@admin.register(ClamAVFileScan)
class ClamAVFileScanAdmin(admin.ModelAdmin):
"""Admin interface for Clam AV File Scan instances."""

list_display = [
'scanned_at',
'result',
'uploaded_by',
'file_name',
'file_size_human',
'file_shasum',
]

list_filter = [
'result',
'uploaded_by',
]

@admin.display(description='File Size')
def file_size_human(self, obj):
"""Return human friendly file size, converted to appropriate unit."""
return obj.file_size_humanized
10 changes: 10 additions & 0 deletions tdrs-backend/tdpservice/security/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""App configuration for tdpservice.security."""
from django.apps import AppConfig


class SecurityConfig(AppConfig):
"""App configuration."""

default_auto_field = 'django.db.models.BigAutoField'
name = 'tdpservice.security'
verbose_name = 'Security'
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Globally available external client services."""
"""External client services related to security auditing."""
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError
from requests.packages.urllib3.util.retry import Retry
from requests.sessions import Session
import logging

from django.conf import settings
from django.core.files.base import File

from tdpservice.security.models import ClamAVFileScan
from tdpservice.users.models import User

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -44,15 +48,18 @@ def init_session(self):
session.mount(self.endpoint_url, HTTPAdapter(max_retries=retries))
return session

def scan_file(self, file, file_name) -> bool:
def scan_file(self, file: File, file_name: str, uploaded_by: User) -> bool:
"""Scan a file for virus infections.

:param file:
The file or file-like object that should be scanned
:param file_name:
The name of the target file (str).
The string name of the file.
:param uploaded_by:
The User that uploaded the given file.
:returns is_file_clean:
A boolean indicating whether or not the file passed the ClamAV scan
:raises ClamAVClient.ServiceUnavailable:
"""
logger.debug(f'Initiating virus scan for file: {file_name}')
try:
Expand All @@ -62,21 +69,31 @@ def scan_file(self, file, file_name) -> bool:
files={'file': file},
timeout=settings.AV_SCAN_TIMEOUT
jorgegonzalez marked this conversation as resolved.
Show resolved Hide resolved
)

except ConnectionError as err:
logger.debug(f'Encountered error scanning file: {err}')
logger.error(f'ClamAV connection failure: {err}')
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case we can't connect to ClamAV we will not store a result for the scan.

raise self.ServiceUnavailable()

if scan_response.status_code in self.SCAN_CODES['CLEAN']:
logger.debug(
f'File scan marked as CLEAN for file: {file_name}'
)
return True
msg = f'File scan marked as CLEAN for file: {file_name}'
scan_result = ClamAVFileScan.Result.CLEAN

if scan_response.status_code in self.SCAN_CODES['INFECTED']:
logger.debug(
f'File scan marked as INFECTED for file: {file_name}'
)
return False
elif scan_response.status_code in self.SCAN_CODES['INFECTED']:
msg = f'File scan marked as INFECTED for file: {file_name}'
scan_result = ClamAVFileScan.Result.INFECTED

else:
msg = f'Unable to scan file with name: {file_name}'
scan_result = ClamAVFileScan.Result.ERROR

# Log and create audit records with the results of this scan
logger.debug(msg)
ClamAVFileScan.objects.record_scan(
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The execution flow was altered here so we only return at the end, after creating the ClamAVFileScan instance. This allows us to track all results from ClamAV - not just the CLEAN files.

file,
file_name,
msg,
scan_result,
uploaded_by
)

logger.debug(f'Unable to scan file with name: {file_name}')
return False
return True if scan_result == ClamAVFileScan.Result.CLEAN else False
34 changes: 34 additions & 0 deletions tdrs-backend/tdpservice/security/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 3.2.5 on 2021-09-14 15:47

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('data_files', '0009_update_log_entry_content_type'),
]

operations = [
migrations.CreateModel(
name='ClamAVFileScan',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('scanned_at', models.DateTimeField(auto_now_add=True)),
('file_name', models.TextField()),
('file_size', models.PositiveBigIntegerField(help_text='The file size in bytes')),
('file_shasum', models.TextField(help_text='The SHA256 checksum of the uploaded file')),
('result', models.CharField(choices=[('CLEAN', 'Clean'), ('INFECTED', 'Infected'), ('ERROR', 'Error')], help_text='Scan result for uploaded file', max_length=12)),
('data_file', models.ForeignKey(blank=True, help_text='The resulting DataFile object if this scan was clean', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='av_scans', to='data_files.datafile')),
('uploaded_by', models.ForeignKey(help_text='The user that uploaded the scanned file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='av_scans', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Clam AV File Scan',
},
),
]
Empty file.
Loading