Skip to content

Commit

Permalink
Add catalog summary printing functionality
Browse files Browse the repository at this point in the history
Signed-off-by: Elron Bandel <elron.bandel@ibm.com>
  • Loading branch information
elronbandel committed Jan 28, 2024
1 parent aba3da2 commit e0bca34
Showing 1 changed file with 36 additions and 2 deletions.
38 changes: 36 additions & 2 deletions src/unitxt/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import re
from pathlib import Path
from typing import Optional

from collections import Counter
import requests

from .artifact import Artifact, Artifactory, reset_artifacts_cache
from .artifact import Artifact, Artifactory, reset_artifacts_cache, Artifactories
from .logging_utils import get_logger
from .version import version
from .text_utils import print_dict

logger = get_logger()
COLLECTION_SEPARATOR = "."
Expand Down Expand Up @@ -35,6 +36,7 @@ class Catalog(Artifactory):
class LocalCatalog(Catalog):
name: str = "local"
location: str = default_catalog_path
is_local: bool = True

def path(self, artifact_identifier: str):
assert (
Expand Down Expand Up @@ -92,6 +94,7 @@ class GithubCatalog(LocalCatalog):
repo = "unitxt"
repo_dir = "src/unitxt/catalog"
user = "IBM"
is_local: bool = False

def prepare(self):
tag = version
Expand Down Expand Up @@ -133,3 +136,34 @@ def add_to_catalog(
artifact, name, overwrite=overwrite, verbose=verbose
) # remove collection (its actually the dir).
# verify name


def get_local_catalogs_paths():
result = []
for artifactory in Artifactories():
if isinstance(artifactory, LocalCatalog):
if artifactory.is_local:
result.append(artifactory.location)
return result

def count_files_recursively(folder):
file_count = 0
for _, _, files in os.walk(folder):
file_count += len(files)
return file_count

def local_catalog_summary(catalog_path):
result = {}

for dir in os.listdir(catalog_path):
if os.path.isdir(os.path.join(catalog_path, dir)):
result[dir] = count_files_recursively(os.path.join(catalog_path, dir))

return result

def summary():
result = Counter()
for local_catalog_path in get_local_catalogs_paths():
result += Counter(local_catalog_summary(local_catalog_path))
print_dict(result)
return result

0 comments on commit e0bca34

Please sign in to comment.