From b6a6d5c1923ac94b3ea9d6d0334b4377434addd5 Mon Sep 17 00:00:00 2001 From: Harpo Harbert Date: Tue, 1 Aug 2023 09:44:28 -0700 Subject: [PATCH 1/2] Migrates to MediaFile.guid as primary key --- chowda/models.py | 35 +++- chowda/views.py | 5 +- .../versions/9a6ec8c9597c_sonyci_assets.py | 43 ----- ...36beb0_adds_mediafile_sonyciasset_links.py | 141 ++++++++++++++++ migrations/versions/d10392cd5788_init_db.py | 158 ------------------ 5 files changed, 169 insertions(+), 213 deletions(-) delete mode 100644 migrations/versions/9a6ec8c9597c_sonyci_assets.py create mode 100644 migrations/versions/9c426736beb0_adds_mediafile_sonyciasset_links.py delete mode 100644 migrations/versions/d10392cd5788_init_db.py diff --git a/chowda/models.py b/chowda/models.py index c2b01e09..c19bc358 100644 --- a/chowda/models.py +++ b/chowda/models.py @@ -61,8 +61,8 @@ async def __admin_repr__(self, request: Request): class MediaFileCollectionLink(SQLModel, table=True): - media_file_id: Optional[int] = Field( - default=None, foreign_key='media_files.id', primary_key=True + media_file_id: Optional[str] = Field( + default=None, foreign_key='media_files.guid', primary_key=True ) collection_id: Optional[int] = Field( default=None, foreign_key='collections.id', primary_key=True @@ -70,26 +70,40 @@ class MediaFileCollectionLink(SQLModel, table=True): class MediaFileBatchLink(SQLModel, table=True): - media_file_id: Optional[int] = Field( - default=None, foreign_key='media_files.id', primary_key=True + media_file_id: Optional[str] = Field( + default=None, foreign_key='media_files.guid', primary_key=True ) batch_id: Optional[int] = Field( default=None, foreign_key='batches.id', primary_key=True ) +class MediaFileSonyCiAssetLink(SQLModel, table=True): + media_file_id: Optional[str] = Field( + default=None, foreign_key='media_files.guid', primary_key=True + ) + sonyci_asset_id: Optional[str] = Field( + default=None, foreign_key='sonyci_assets.id', primary_key=True + ) + + class MediaFile(SQLModel, table=True): """Media file model Attributes: - id: SonyCi asset id - guid: asset guid + guid: MediaFile GUID + assets: List of SonyCiAssets + collections: List of Collections + batches: List of Batches + clams_events: List of ClamsEvents """ __tablename__ = 'media_files' - id: Optional[int] = Field(primary_key=True, default=None) - guid: str = Field(index=True) + guid: Optional[str] = Field(primary_key=True, default=None, index=True) mmif_json: Dict[str, Any] = Field(sa_column=Column(JSON), default=None) + assets: List['SonyCiAsset'] = Relationship( + back_populates='media_files', link_model=MediaFileSonyCiAssetLink + ) collections: List['Collection'] = Relationship( back_populates='media_files', link_model=MediaFileCollectionLink ) @@ -127,6 +141,9 @@ class SonyCiAsset(SQLModel, table=True): thumbnails: Optional[List[Dict[str, Any]]] = Field( sa_column=Column(postgresql.ARRAY(JSON)), default=None ) + media_files: List[MediaFile] = Relationship( + back_populates='assets', link_model=MediaFileSonyCiAssetLink + ) class Collection(SQLModel, table=True): @@ -217,7 +234,7 @@ class ClamsEvent(SQLModel, table=True): batch: Optional[Batch] = Relationship(back_populates='clams_events') clams_app_id: Optional[int] = Field(default=None, foreign_key='clams_apps.id') clams_app: Optional[ClamsApp] = Relationship(back_populates='clams_events') - media_file_id: Optional[int] = Field(default=None, foreign_key='media_files.id') + media_file_id: Optional[str] = Field(default=None, foreign_key='media_files.guid') media_file: Optional[MediaFile] = Relationship(back_populates='clams_events') async def __admin_repr__(self, request: Request): diff --git a/chowda/views.py b/chowda/views.py index 6a7b6ba3..3cae399b 100644 --- a/chowda/views.py +++ b/chowda/views.py @@ -2,6 +2,8 @@ from json import loads from typing import Any, ClassVar, Dict +from metaflow import Flow +from metaflow.exception import MetaflowNotFound from requests import Request from sqlmodel import Session, select from starlette.responses import Response @@ -15,9 +17,6 @@ from chowda.db import engine from chowda.models import MediaFile -from metaflow import Flow -from metaflow.exception import MetaflowNotFound - @dataclass class MediaFilesGuidLinkField(BaseField): diff --git a/migrations/versions/9a6ec8c9597c_sonyci_assets.py b/migrations/versions/9a6ec8c9597c_sonyci_assets.py deleted file mode 100644 index 491e00e6..00000000 --- a/migrations/versions/9a6ec8c9597c_sonyci_assets.py +++ /dev/null @@ -1,43 +0,0 @@ -"""SonyCi Assets - -Revision ID: 9a6ec8c9597c -Revises: d10392cd5788 -Create Date: 2023-07-07 10:35:18.523178 - -""" -from alembic import op -import sqlalchemy as sa -import sqlmodel -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '9a6ec8c9597c' -down_revision = 'd10392cd5788' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('sonyci_assets', - sa.Column('size', sa.BIGINT(), nullable=True), - sa.Column('type', sa.Enum('video', 'audio', name='mediatype'), nullable=True), - sa.Column('thumbnails', postgresql.ARRAY(sa.JSON()), nullable=True), - sa.Column('id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('format', sqlmodel.sql.sqltypes.AutoString(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_sonyci_assets_format'), 'sonyci_assets', ['format'], unique=False) - op.create_index(op.f('ix_sonyci_assets_id'), 'sonyci_assets', ['id'], unique=False) - op.create_index(op.f('ix_sonyci_assets_name'), 'sonyci_assets', ['name'], unique=False) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_sonyci_assets_name'), table_name='sonyci_assets') - op.drop_index(op.f('ix_sonyci_assets_id'), table_name='sonyci_assets') - op.drop_index(op.f('ix_sonyci_assets_format'), table_name='sonyci_assets') - op.drop_table('sonyci_assets') - # ### end Alembic commands ### diff --git a/migrations/versions/9c426736beb0_adds_mediafile_sonyciasset_links.py b/migrations/versions/9c426736beb0_adds_mediafile_sonyciasset_links.py new file mode 100644 index 00000000..218b57df --- /dev/null +++ b/migrations/versions/9c426736beb0_adds_mediafile_sonyciasset_links.py @@ -0,0 +1,141 @@ +"""Adds MediaFile <--> SonyCiAsset Links + +Revision ID: 9c426736beb0 +Revises: +Create Date: 2023-07-31 15:34:04.018312 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '9c426736beb0' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('clams_apps', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('endpoint', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('collections', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('media_files', + sa.Column('mmif_json', sa.JSON(), nullable=True), + sa.Column('guid', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('guid') + ) + op.create_index(op.f('ix_media_files_guid'), 'media_files', ['guid'], unique=False) + op.create_table('pipelines', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('sonyci_assets', + sa.Column('size', sa.BIGINT(), nullable=True), + sa.Column('type', sa.Enum('video', 'audio', name='mediatype'), nullable=True), + sa.Column('thumbnails', postgresql.ARRAY(sa.JSON()), nullable=True), + sa.Column('id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('format', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_sonyci_assets_format'), 'sonyci_assets', ['format'], unique=False) + op.create_index(op.f('ix_sonyci_assets_id'), 'sonyci_assets', ['id'], unique=False) + op.create_index(op.f('ix_sonyci_assets_name'), 'sonyci_assets', ['name'], unique=False) + op.create_table('users', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('email', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('first_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('last_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=False) + op.create_index(op.f('ix_users_first_name'), 'users', ['first_name'], unique=False) + op.create_index(op.f('ix_users_last_name'), 'users', ['last_name'], unique=False) + op.create_table('batches', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('pipeline_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['pipeline_id'], ['pipelines.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('clamsapppipelinelink', + sa.Column('clams_app_id', sa.Integer(), nullable=False), + sa.Column('pipeline_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['clams_app_id'], ['clams_apps.id'], ), + sa.ForeignKeyConstraint(['pipeline_id'], ['pipelines.id'], ), + sa.PrimaryKeyConstraint('clams_app_id', 'pipeline_id') + ) + op.create_table('mediafilecollectionlink', + sa.Column('media_file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('collection_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['collection_id'], ['collections.id'], ), + sa.ForeignKeyConstraint(['media_file_id'], ['media_files.guid'], ), + sa.PrimaryKeyConstraint('media_file_id', 'collection_id') + ) + op.create_table('mediafilesonyciassetlink', + sa.Column('media_file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('sonyci_asset_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.ForeignKeyConstraint(['media_file_id'], ['media_files.guid'], ), + sa.ForeignKeyConstraint(['sonyci_asset_id'], ['sonyci_assets.id'], ), + sa.PrimaryKeyConstraint('media_file_id', 'sonyci_asset_id') + ) + op.create_table('clams_events', + sa.Column('response_json', sa.JSON(), nullable=True), + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('status', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('batch_id', sa.Integer(), nullable=True), + sa.Column('clams_app_id', sa.Integer(), nullable=True), + sa.Column('media_file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.ForeignKeyConstraint(['batch_id'], ['batches.id'], ), + sa.ForeignKeyConstraint(['clams_app_id'], ['clams_apps.id'], ), + sa.ForeignKeyConstraint(['media_file_id'], ['media_files.guid'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('mediafilebatchlink', + sa.Column('media_file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('batch_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['batch_id'], ['batches.id'], ), + sa.ForeignKeyConstraint(['media_file_id'], ['media_files.guid'], ), + sa.PrimaryKeyConstraint('media_file_id', 'batch_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('mediafilebatchlink') + op.drop_table('clams_events') + op.drop_table('mediafilesonyciassetlink') + op.drop_table('mediafilecollectionlink') + op.drop_table('clamsapppipelinelink') + op.drop_table('batches') + op.drop_index(op.f('ix_users_last_name'), table_name='users') + op.drop_index(op.f('ix_users_first_name'), table_name='users') + op.drop_index(op.f('ix_users_email'), table_name='users') + op.drop_table('users') + op.drop_index(op.f('ix_sonyci_assets_name'), table_name='sonyci_assets') + op.drop_index(op.f('ix_sonyci_assets_id'), table_name='sonyci_assets') + op.drop_index(op.f('ix_sonyci_assets_format'), table_name='sonyci_assets') + op.drop_table('sonyci_assets') + op.drop_table('pipelines') + op.drop_index(op.f('ix_media_files_guid'), table_name='media_files') + op.drop_table('media_files') + op.drop_table('collections') + op.drop_table('clams_apps') + # ### end Alembic commands ### diff --git a/migrations/versions/d10392cd5788_init_db.py b/migrations/versions/d10392cd5788_init_db.py deleted file mode 100644 index 3786f060..00000000 --- a/migrations/versions/d10392cd5788_init_db.py +++ /dev/null @@ -1,158 +0,0 @@ -"""init_db - -Revision ID: d10392cd5788 -Revises: -Create Date: 2023-06-27 15:22:20.903418 - -""" -from alembic import op -import sqlalchemy as sa -import sqlmodel - - -# revision identifiers, used by Alembic. -revision = 'd10392cd5788' -down_revision = None -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'clams_apps', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('endpoint', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.PrimaryKeyConstraint('id'), - ) - op.create_table( - 'collections', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.PrimaryKeyConstraint('id'), - ) - op.create_table( - 'media_files', - sa.Column('mmif_json', sa.JSON(), nullable=True), - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('guid', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.PrimaryKeyConstraint('id'), - ) - op.create_index(op.f('ix_media_files_guid'), 'media_files', ['guid'], unique=False) - op.create_table( - 'pipelines', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.PrimaryKeyConstraint('id'), - ) - op.create_table( - 'users', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('email', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('first_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('last_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.PrimaryKeyConstraint('id'), - ) - op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=False) - op.create_index(op.f('ix_users_first_name'), 'users', ['first_name'], unique=False) - op.create_index(op.f('ix_users_last_name'), 'users', ['last_name'], unique=False) - op.create_table( - 'batches', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('description', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('pipeline_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ['pipeline_id'], - ['pipelines.id'], - ), - sa.PrimaryKeyConstraint('id'), - ) - op.create_table( - 'clamsapppipelinelink', - sa.Column('clams_app_id', sa.Integer(), nullable=False), - sa.Column('pipeline_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint( - ['clams_app_id'], - ['clams_apps.id'], - ), - sa.ForeignKeyConstraint( - ['pipeline_id'], - ['pipelines.id'], - ), - sa.PrimaryKeyConstraint('clams_app_id', 'pipeline_id'), - ) - op.create_table( - 'mediafilecollectionlink', - sa.Column('media_file_id', sa.Integer(), nullable=False), - sa.Column('collection_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint( - ['collection_id'], - ['collections.id'], - ), - sa.ForeignKeyConstraint( - ['media_file_id'], - ['media_files.id'], - ), - sa.PrimaryKeyConstraint('media_file_id', 'collection_id'), - ) - op.create_table( - 'clams_events', - sa.Column('response_json', sa.JSON(), nullable=True), - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('status', sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column('batch_id', sa.Integer(), nullable=True), - sa.Column('clams_app_id', sa.Integer(), nullable=True), - sa.Column('media_file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ['batch_id'], - ['batches.id'], - ), - sa.ForeignKeyConstraint( - ['clams_app_id'], - ['clams_apps.id'], - ), - sa.ForeignKeyConstraint( - ['media_file_id'], - ['media_files.id'], - ), - sa.PrimaryKeyConstraint('id'), - ) - op.create_table( - 'mediafilebatchlink', - sa.Column('media_file_id', sa.Integer(), nullable=False), - sa.Column('batch_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint( - ['batch_id'], - ['batches.id'], - ), - sa.ForeignKeyConstraint( - ['media_file_id'], - ['media_files.id'], - ), - sa.PrimaryKeyConstraint('media_file_id', 'batch_id'), - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('mediafilebatchlink') - op.drop_table('clams_events') - op.drop_table('mediafilecollectionlink') - op.drop_table('clamsapppipelinelink') - op.drop_table('batches') - op.drop_index(op.f('ix_users_last_name'), table_name='users') - op.drop_index(op.f('ix_users_first_name'), table_name='users') - op.drop_index(op.f('ix_users_email'), table_name='users') - op.drop_table('users') - op.drop_table('pipelines') - op.drop_index(op.f('ix_media_files_guid'), table_name='media_files') - op.drop_table('media_files') - op.drop_table('collections') - op.drop_table('clams_apps') - # ### end Alembic commands ### From e45364fdb0c99e28bce1ad46ff56797ee4485387 Mon Sep 17 00:00:00 2001 From: Harpo Harbert Date: Tue, 1 Aug 2023 10:42:03 -0700 Subject: [PATCH 2/2] Adds GUID resolution to IngestFlow and notebook --- chowda/flows/ingest.py | 26 +++++-- docs/examples/guids.ipynb | 142 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 docs/examples/guids.ipynb diff --git a/chowda/flows/ingest.py b/chowda/flows/ingest.py index 864c7b34..73b1aa4b 100644 --- a/chowda/flows/ingest.py +++ b/chowda/flows/ingest.py @@ -58,18 +58,36 @@ def get_batch(self, n): )['items'] def batch_ingest_page(self, n): - from sqlmodel import Session + from re import search, split + + from sqlmodel import Session, select from chowda.db import engine - from chowda.models import SonyCiAsset + from chowda.models import MediaFile, SonyCiAsset from chowda.utils import upsert with Session(engine) as session: batch = self.get_batch(n) media = [SonyCiAsset(**asset) for asset in batch] results = [] - for m in media: - results.append(session.execute(upsert(SonyCiAsset, m, ['id']))) + for asset in media: + results.append(session.execute(upsert(SonyCiAsset, asset, ['id']))) + # If it's a GUID + if search('^cpb-aacip-', asset.name): + # Extract the GUID name + guid = split(r'_|\.|-dupe', asset.name)[0] + # Check for existing MediaFile + media_file = session.exec( + select(MediaFile).where(MediaFile.guid == guid) + ).first() + if not media_file: + # Create a new MediaFile with the new guid + media_file = MediaFile(guid=guid) + ci_asset = session.get(SonyCiAsset, asset.id) + # Add the asset to the existing MediaFile + media_file.assets.append(ci_asset) + session.add(media_file) + result = sum([r.rowcount for r in results]) session.commit() log.success(f'Ingested page {n} with {result} assets') diff --git a/docs/examples/guids.ipynb b/docs/examples/guids.ipynb new file mode 100644 index 00000000..ab3d827a --- /dev/null +++ b/docs/examples/guids.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Resolve filenames to GUIDs\n", + "\n", + "In order to address MediaFiles by GUIDs, we need to extract the GUID from the `SonyCiAsset.name` field.\n", + "\n", + "## Steps\n", + "\n", + "1. Get the list of assets from the DB\n", + "1. Filter out any name that does not start with `cpb-aacip-`\n", + "1. Split the name on any of:\n", + " - `_` underscore\n", + " - `.` period\n", + " - `-dupe`\n", + "\n", + "There are 8 records with `-dupe` in the name. All other records are correctly filtered with `_` and `.`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from chowda.db import engine\n", + "from chowda.models import SonyCiAsset\n", + "from sqlmodel import Session, select\n", + "from re import search, split\n", + "\n", + "\n", + "def get_asset():\n", + " with Session(engine) as session:\n", + " statement = select(SonyCiAsset)\n", + " results = session.exec(statement)\n", + "\n", + " return [\n", + " (asset.id, split('_|\\.|-dupe', asset.name)[0])\n", + " for asset in results.all()\n", + " if search('^cpb-aacip-', asset.name)\n", + " ]\n", + "\n", + "\n", + "# assets = get_asset()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check the list\n", + "\n", + "If needed, write the guid list to a file and check it manually.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# len(assets)\n", + "\n", + "\n", + "def write_assets():\n", + " with open('guids.txt', 'w') as f:\n", + " for asset in assets:\n", + " f.write(f'{asset[1]}\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insert MediaFiles\n", + "\n", + "Iterate through the list of assets:\n", + "\n", + "- Split the GUID from the filename.\n", + "- Search the database for a matching MediaFile object.\n", + " - Insert a new MediaFile object if it does not already exist.\n", + "- Find the SonyCiAsset object in the database.\n", + "- Link the MediaFile object to the SonyCiAsset object.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from chowda.models import MediaFile\n", + "\n", + "\n", + "def insert_media_files():\n", + " with Session(engine) as session:\n", + " for asset in assets:\n", + " # Extract the GUID name\n", + " guid = split(r'_|\\.|-dupe', asset[1])[0]\n", + " # media_file = session.get(MediaFile, guid)\n", + " media_file = session.exec(\n", + " select(MediaFile).where(MediaFile.guid == guid)\n", + " ).first()\n", + " if not media_file:\n", + " # Create a new MediaFile with the new guid\n", + " media_file = MediaFile(guid=guid)\n", + " # session.add(media_file)\n", + " ci_asset = session.get(SonyCiAsset, asset[0])\n", + " # Add the asset to the existing MediaFile\n", + " media_file.assets.append(ci_asset)\n", + " # asset.media_files.append(media_file)\n", + " session.add(media_file)\n", + " session.commit()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}