From e0bca34a257f4da33de98eccabea04e4c116867e Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Sun, 28 Jan 2024 06:36:13 -0500 Subject: [PATCH] Add catalog summary printing functionality Signed-off-by: Elron Bandel --- src/unitxt/catalog.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/unitxt/catalog.py b/src/unitxt/catalog.py index 477df9bd3..e58467cc4 100644 --- a/src/unitxt/catalog.py +++ b/src/unitxt/catalog.py @@ -2,12 +2,13 @@ import re from pathlib import Path from typing import Optional - +from collections import Counter import requests -from .artifact import Artifact, Artifactory, reset_artifacts_cache +from .artifact import Artifact, Artifactory, reset_artifacts_cache, Artifactories from .logging_utils import get_logger from .version import version +from .text_utils import print_dict logger = get_logger() COLLECTION_SEPARATOR = "." @@ -35,6 +36,7 @@ class Catalog(Artifactory): class LocalCatalog(Catalog): name: str = "local" location: str = default_catalog_path + is_local: bool = True def path(self, artifact_identifier: str): assert ( @@ -92,6 +94,7 @@ class GithubCatalog(LocalCatalog): repo = "unitxt" repo_dir = "src/unitxt/catalog" user = "IBM" + is_local: bool = False def prepare(self): tag = version @@ -133,3 +136,34 @@ def add_to_catalog( artifact, name, overwrite=overwrite, verbose=verbose ) # remove collection (its actually the dir). # verify name + + +def get_local_catalogs_paths(): + result = [] + for artifactory in Artifactories(): + if isinstance(artifactory, LocalCatalog): + if artifactory.is_local: + result.append(artifactory.location) + return result + +def count_files_recursively(folder): + file_count = 0 + for _, _, files in os.walk(folder): + file_count += len(files) + return file_count + +def local_catalog_summary(catalog_path): + result = {} + + for dir in os.listdir(catalog_path): + if os.path.isdir(os.path.join(catalog_path, dir)): + result[dir] = count_files_recursively(os.path.join(catalog_path, dir)) + + return result + +def summary(): + result = Counter() + for local_catalog_path in get_local_catalogs_paths(): + result += Counter(local_catalog_summary(local_catalog_path)) + print_dict(result) + return result