From 9ab9ee7c7031ee81320de018e055869e90fd160d Mon Sep 17 00:00:00 2001 From: Roman Skurikhin Date: Mon, 25 Oct 2021 12:33:46 +0300 Subject: [PATCH] Add blob du command (#2372) * Add blob du command * Add changelog * Get bucket disk usage --- CHANGELOG.D/2372.feature | 1 + CLI.md | 22 +++++++++++++++++ neuro-cli/docs/blob.md | 23 +++++++++++++++++ neuro-cli/src/neuro_cli/blob_storage.py | 33 +++++++++++++++++++++++++ neuro-cli/src/neuro_cli/root.py | 33 ++----------------------- neuro-sdk/docs/buckets_reference.rst | 29 ++++++++++++++++++++++ neuro-sdk/src/neuro_sdk/buckets.py | 24 ++++++++++++++++++ 7 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 CHANGELOG.D/2372.feature diff --git a/CHANGELOG.D/2372.feature b/CHANGELOG.D/2372.feature new file mode 100644 index 000000000..2bfc0e6eb --- /dev/null +++ b/CHANGELOG.D/2372.feature @@ -0,0 +1 @@ +Added `blob du ` command to change bucket's storage usage. diff --git a/CLI.md b/CLI.md index c8a9111f7..046163484 100644 --- a/CLI.md +++ b/CLI.md @@ -87,6 +87,7 @@ * [neuro blob glob](#neuro-blob-glob) * [neuro blob rm](#neuro-blob-rm) * [neuro blob sign-url](#neuro-blob-sign-url) + * [neuro blob du](#neuro-blob-du) * [neuro secret](#neuro-secret) * [neuro secret ls](#neuro-secret-ls) * [neuro secret add](#neuro-secret-add) @@ -2044,6 +2045,7 @@ Name | Description| | _[neuro blob glob](#neuro-blob-glob)_| List resources that match PATTERNS | | _[neuro blob rm](#neuro-blob-rm)_| Remove blobs from bucket | | _[neuro blob sign-url](#neuro-blob-sign-url)_| Make signed url for blob in bucket | +| _[neuro blob du](#neuro-blob-du)_| Get storage usage for BUCKET | @@ -2388,6 +2390,26 @@ Name | Description| +### neuro blob du + +Get storage usage for BUCKET. + +**Usage:** + +```bash +neuro blob du [OPTIONS] BUCKET +``` + +**Options:** + +Name | Description| +|----|------------| +|_--help_|Show this message and exit.| +|_--cluster CLUSTER_|Look on a specified cluster \(the current cluster by default).| + + + + ## neuro secret Operations with secrets. diff --git a/neuro-cli/docs/blob.md b/neuro-cli/docs/blob.md index 0bc4dceba..76a0e9e01 100644 --- a/neuro-cli/docs/blob.md +++ b/neuro-cli/docs/blob.md @@ -28,6 +28,7 @@ Blob storage operations. | [_glob_](blob.md#glob) | List resources that match PATTERNS | | [_rm_](blob.md#rm) | Remove blobs from bucket | | [_sign-url_](blob.md#sign-url) | Make signed url for blob in bucket | +| [_du_](blob.md#du) | Get storage usage for BUCKET | ### lsbucket @@ -425,3 +426,25 @@ Make signed url for blob in bucket. | _--expires TIMEDELTA_ | Duration this signature will be valid in the format '1h2m3s' _\[default: 1h\]_ | + +### du + +Get storage usage for BUCKET + + +#### Usage + +```bash +neuro blob du [OPTIONS] BUCKET +``` + +Get storage usage for `BUCKET`. + +#### Options + +| Name | Description | +| :--- | :--- | +| _--help_ | Show this message and exit. | +| _--cluster CLUSTER_ | Look on a specified cluster \(the current cluster by default\). | + + diff --git a/neuro-cli/src/neuro_cli/blob_storage.py b/neuro-cli/src/neuro_cli/blob_storage.py index 81d51d3a1..a29981a5e 100644 --- a/neuro-cli/src/neuro_cli/blob_storage.py +++ b/neuro-cli/src/neuro_cli/blob_storage.py @@ -42,6 +42,7 @@ uri_formatter, ) from neuro_cli.parse_utils import parse_timedelta +from neuro_cli.utils import format_size from .const import EX_OSFILE from .formatters.blob_storage import ( @@ -340,6 +341,37 @@ async def statbucket( root.print(bucket_fmtr(bucket_obj)) +@command() +@option( + "--cluster", + type=CLUSTER, + help="Look on a specified cluster (the current cluster by default).", +) +@argument("bucket", type=BUCKET) +async def du(root: Root, cluster: Optional[str], bucket: str) -> None: + """ + Get storage usage for BUCKET. + """ + bucket_obj = await root.client.buckets.get(bucket, cluster_name=cluster) + + base_str = f"Calculating bucket {bucket_obj.name or bucket_obj.id} disk usage" + + with root.status(base_str) as status: + async with root.client.buckets.get_disk_usage( + bucket_obj.id, cluster + ) as usage_it: + async for usage in usage_it: + status.update( + f"{base_str}: total size {format_size(usage.total_bytes)}, " + f"objects count {usage.object_count}" + ) + root.print( + f"Bucket {bucket_obj.name or bucket_obj.id} disk usage:\n" + f"Total size: {format_size(usage.total_bytes)}\n" + f"Objects count: {usage.object_count}" + ) + + @command() @option( "--cluster", @@ -1001,3 +1033,4 @@ async def rmcredentials( blob_storage.add_command(glob) blob_storage.add_command(rm) blob_storage.add_command(sign_url) +blob_storage.add_command(du) diff --git a/neuro-cli/src/neuro_cli/root.py b/neuro-cli/src/neuro_cli/root.py index 3ef389845..70bd0fbde 100644 --- a/neuro-cli/src/neuro_cli/root.py +++ b/neuro-cli/src/neuro_cli/root.py @@ -21,11 +21,9 @@ import aiohttp import click -from rich.console import Console, PagerContext, RenderableType +from rich.console import Console, PagerContext from rich.pager import Pager -from rich.spinner import Spinner -from rich.status import Status as RichStatus -from rich.style import StyleType +from rich.status import Status from rich.text import Text as RichText from neuro_sdk import Client, ConfigError, Factory, gen_trace_id @@ -63,33 +61,6 @@ def show(self, content: str) -> None: print(content, end="") -class Status(RichStatus): - # Patched version of library class, avoid spinner animation - # reset on updates that do not change spinner style - - def update( - self, - status: Optional[RenderableType] = None, - *, - spinner: Optional[str] = None, - spinner_style: Optional[StyleType] = None, - speed: Optional[float] = None, - ) -> None: - if status is not None: - self.status = status - if spinner is not None: - self.spinner = spinner - if spinner_style is not None: - self.spinner_style = spinner_style - if speed is not None: - self.speed = speed - if spinner is not None or spinner_style is not None or speed is not None: - self._spinner = Spinner( - self.spinner, style=self.spinner_style, speed=self.speed - ) - self._live.update(self.renderable, refresh=True) - - @dataclass class Root: color: bool diff --git a/neuro-sdk/docs/buckets_reference.rst b/neuro-sdk/docs/buckets_reference.rst index 39ec0f11f..239093123 100644 --- a/neuro-sdk/docs/buckets_reference.rst +++ b/neuro-sdk/docs/buckets_reference.rst @@ -368,6 +368,19 @@ Buckets :return: Signed url (:class:`yarl.URL`) + .. comethod:: get_disk_usage(bucket_id_or_name: str, \ + cluster_name: Optional[str] = None, \ + bucket_owner: Optional[str) = None, \ + ) -> AsyncContextManager[AsyncIterator[BucketUsage]] + + Get disk space usage of a given bucket. Iterator yield partial results as calculation + for the whole bucket can take time. + + :param str bucket_id_or_name: bucket's id or name. + :param str cluster_name: cluster to look for a bucket. Default is current cluster. + :param str bucket_owner: bucket owner's username. Used only if looking up for bucket by it's name. + Default is current user. + .. comethod:: persistent_credentials_list(cluster_name: Optional[str] = None) -> AsyncContextManager[AsyncIterator[PersistentBucketCredentials]] :async-for: @@ -590,3 +603,19 @@ BlobCommonPrefix An ancestor of :class:`BucketEntry` for describing common prefixes for blobs in non-recursive listing. You can treat it as a kind of *folder* on Blob Storage. + + +BucketUsage +=========== + +.. class:: BucketUsage + + An :class:`~dataclasses.dataclass` for describing bucket disk space usage. + + .. attribute:: total_bytes + + Total size of all objects in bytes, :class:`int`. + + .. attribute:: object_count + + Total number of objects, :class:`int`. diff --git a/neuro-sdk/src/neuro_sdk/buckets.py b/neuro-sdk/src/neuro_sdk/buckets.py index 99cec99a7..ad6882196 100644 --- a/neuro-sdk/src/neuro_sdk/buckets.py +++ b/neuro-sdk/src/neuro_sdk/buckets.py @@ -996,6 +996,12 @@ class Provider(str, enum.Enum): GCP = "gcp" +@dataclass(frozen=True) +class BucketUsage: + total_bytes: int + object_count: int + + @dataclass(frozen=True) class BucketCredentials: bucket_id: str @@ -1157,6 +1163,24 @@ async def request_tmp_credentials( payload = await resp.json() return self._parse_bucket_credentials_payload(payload) + @asyncgeneratorcontextmanager + async def get_disk_usage( + self, + bucket_id_or_name: str, + cluster_name: Optional[str] = None, + bucket_owner: Optional[str] = None, + ) -> AsyncIterator[BucketUsage]: + total_bytes = 0 + obj_count = 0 + async with self._get_provider( + bucket_id_or_name, cluster_name, bucket_owner + ) as provider: + async with provider.list_blobs("", recursive=True) as it: + async for obj in it: + total_bytes += obj.size + obj_count += 1 + yield BucketUsage(total_bytes, obj_count) + # Helper functions @asynccontextmanager