Skip to content

Commit

Permalink
Adds custom CLAMSProvider faker class
Browse files Browse the repository at this point in the history
  • Loading branch information
mrharpo committed Apr 14, 2023
1 parent 64dfc61 commit a2d05a8
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 19 deletions.
15 changes: 10 additions & 5 deletions chowda/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,26 @@

from typing import Any, Dict, List, Optional
from pydantic import AnyHttpUrl, EmailStr, stricturl
from sqlalchemy import JSON, Column

from pydantic import Field as PydField
from pydantic.color import Color
from sqlalchemy import JSON, Column, DateTime, Enum, String, Text
from sqlalchemy import JSON, Column, DateTime, String, Text
from starlette.requests import Request
from sqlmodel import Field, Relationship, SQLModel

from enum import Enum

MediaUrl = stricturl(allowed_schemes=['video', 'audio', 'text'], tld_required=False)
"""Media url validator. Must have prefix of video, audio, or text. No TLD required.
Example:
video://*
"""


class AppStatus(Enum):
PENDING = 'pending'
RUNNING = 'running'
COMPLETE = 'complete'
FAILED = 'failed'


class User(SQLModel, table=True):
"""User model
Expand Down
42 changes: 33 additions & 9 deletions tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,43 @@
Pipeline,
ClamsEvent,
User,
AppStatus,
)
import factory
from sqlalchemy import orm
from faker import Faker
from chowda.db import engine
from faker.providers import BaseProvider


class CLAMSProvider(BaseProvider):
'''A custom Faker provider for generating CLAMS data'''

def app_name(self):
return f'app-{self.generator.word(part_of_speech="noun")}'

def guid(self):
return f'cpb-aacip-{str(self.generator.random_int())}-{self.generator.hexify(8*"^")}'

def collection_name(self):
if self.generator.random.choice([True, False]):
return self.title() + ' Collection'
else:
return self.generator.name() + ' Collection'

def batch_name(self):
return f'Batch {self.random_int()}: {self.title()}'

def title(self):
num_words = self.generator.random.randint(1, 10)
return self.generator.sentence(nb_words=num_words).title()[:-1]


factory.Faker.add_provider(CLAMSProvider)

# Create a factory-specific engine for factory data. This can be used to modify
# factory-generated data (see seeds.py)
factory_session = orm.scoped_session(orm.sessionmaker(engine))

fake = Faker()


class ChowdaFactory(factory.alchemy.SQLAlchemyModelFactory):
class Meta:
Expand All @@ -39,7 +63,7 @@ class MediaFileFactory(ChowdaFactory):
class Meta:
model = MediaFile

guid = factory.Faker('hexify', text='cpb-aacip-^^^-^^^^^^^^')
guid = factory.Faker('guid')

@factory.post_generation
def batches(self, create, extracted, **kwargs):
Expand All @@ -64,15 +88,15 @@ class CollectionFactory(ChowdaFactory):
class Meta:
model = Collection

name = factory.Sequence(lambda n: 'Collection %d' % n)
name = factory.Faker('collection_name')
description = factory.Faker('text')


class BatchFactory(ChowdaFactory):
class Meta:
model = Batch

name = factory.Sequence(lambda n: 'Batch %d' % n)
name = factory.Faker('batch_name')
description = factory.Faker('text')

@factory.post_generation
Expand All @@ -89,7 +113,7 @@ class ClamsAppFactory(ChowdaFactory):
class Meta:
model = ClamsApp

name = factory.Faker("last_name_nonbinary")
name = factory.Faker('app_name')
description = factory.Faker('text')
endpoint = factory.Faker('url')

Expand Down Expand Up @@ -124,5 +148,5 @@ class ClamsEventFactory(ChowdaFactory):
class Meta:
model = ClamsEvent

status: str = "TODO: REPLACE WITH ENUM VAL"
response_json: dict = {"TODO": "REPLACE WITH EXPECTED RESPONSE"}
status: str = factory.Faker('random_element', elements=AppStatus)
response_json: dict = factory.Faker('json')
27 changes: 22 additions & 5 deletions tests/seeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,44 +6,61 @@
PipelineFactory,
ClamsEventFactory,
CollectionFactory,
UserFactory,
)
from chowda.models import AppStatus
from random import sample, randint, choice

status = list(AppStatus)


def seed(
num_media_files: int = 1000,
num_collections: int = 100,
num_batches: int = 100,
num_clams_apps: int = 10,
num_pipelines: int = 10,
num_clams_events: int = 800,
num_clams_events: int = 1000,
num_users: int = 10,
):
"""Seed the database with sample data."""
# Create some sample Users
UserFactory.create_batch(num_users)

# Create some sample CLAMS Apps and Pipelines
clams_apps = ClamsAppFactory.create_batch(num_clams_apps)
pipelines = PipelineFactory.create_batch(num_pipelines)

# Randomly assign CLAMS Apps to Pipelines
for pipeline in pipelines:
pipeline.clams_apps = sample(clams_apps, randint(1, 4))
pipeline.clams_apps = sample(clams_apps, randint(1, num_clams_apps))

# Create the sample MediaFiles, Collections, and Batches
media_files = MediaFileFactory.create_batch(num_media_files)
collections = CollectionFactory.create_batch(num_collections)
batches = BatchFactory.create_batch(num_batches)

# Assign each batch to a random pipeline
for batch in batches:
batch.pipeline = choice(pipelines)

# Randomly assign all media files to 0-3 batches and to 1 collection
# Randomly assign all MediaFiles to 0-3 batches and to 1 collection
for media_file in media_files:
media_file.batches = sample(batches, randint(0, 3))
media_file.collections = [choice(collections)]

# Create some sample ClamsEvents on random batches, media files, and clams apps from the pipeline
for _ in range(num_clams_events):
batch = choice(batches)
ClamsEventFactory.create(
batch=choice(batches),
batch=batch,
media_file=choice(batch.media_files),
clams_app=choice(batch.pipeline.clams_apps),
status=str(choice(status)),
)

factory_session.commit()


if __name__ == "__main__":
# If we're running directly, then call seed() function.
seed()

0 comments on commit a2d05a8

Please sign in to comment.