From adf97b41ad8f6495331d85d7d671b33c5f781cc7 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Fri, 24 Apr 2020 11:43:39 -0700 Subject: [PATCH] Update bundle processing to include README Software now includes //File_Area_Text/File/file_name if it exists in the bundle.xml Resolves #55 --- src/pds/aipgen/sip.py | 6 +++--- src/pds/aipgen/utils.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/pds/aipgen/sip.py b/src/pds/aipgen/sip.py index 8a7e662..9897b49 100644 --- a/src/pds/aipgen/sip.py +++ b/src/pds/aipgen/sip.py @@ -36,7 +36,7 @@ PDS_SCHEMA_URL, AIP_PRODUCT_URI_PREFIX, PDS_LABEL_FILENAME_EXTENSION, HASH_ALGORITHMS, SIP_MANIFEST_URL ) from .utils import ( - getPrimariesAndOtherInfo, getMD5, getLogicalIdentifierAndFileInventory, parseXML, getDigest, addLoggingArguments + getPrimariesAndOtherInfo, getMD5, getLogicalIdentifierAndFileInventory, parseXML, getDigest, addLoggingArguments, getBundleFiles ) from datetime import datetime from lxml import etree @@ -222,8 +222,8 @@ def _getLocalFileInfo(bundle, primaries, bundleLidvid, allCollections, con): cursor.execute('''CREATE INDEX IF NOT EXISTS lidvidIndex ON lidvids (lidvid)''') cursor.execute('''CREATE UNIQUE INDEX lidvidPairing ON lidvids (lidvid, xmlFile)''') - # Add bundle to manifest - lidvidsToFiles[bundleLidvid] = {'file:' + bundle} + # Add bundle and associated files to manifest + lidvidsToFiles[bundleLidvid] = getBundleFiles(bundle) # OK, here we go root = os.path.dirname(bundle) diff --git a/src/pds/aipgen/utils.py b/src/pds/aipgen/utils.py index f6608d2..292bccf 100644 --- a/src/pds/aipgen/utils.py +++ b/src/pds/aipgen/utils.py @@ -153,6 +153,20 @@ def addLoggingArguments(parser): ) +def getBundleFiles(bundle): + files = {'file:' + bundle} + + tree = parseXML(bundle) + root = tree.getroot() + matches = root.findall(f'./{{{PDS_NS_URI}}}File_Area_Text/{{{PDS_NS_URI}}}File/{{{PDS_NS_URI}}}file_name') + + if matches: + for i in matches: + files.add('file:' + os.path.join(os.path.dirname(bundle), i.text.strip())) + + return files + + # Classes # -------