diff --git a/benchmarks/profiler.py b/benchmarks/profiler.py new file mode 100644 index 0000000..a5ee033 --- /dev/null +++ b/benchmarks/profiler.py @@ -0,0 +1,105 @@ +"""Profiler for titiler-xarray reader.""" + +import cProfile +import json +import logging +import pstats +import time +from io import StringIO +from typing import Callable, Dict, List, Optional + +from loguru import logger as log + + +# This code is copied from marblecutter +# https://github.com/mojodna/marblecutter/blob/master/marblecutter/stats.py +# License: +# Original work Copyright 2016 Stamen Design +# Modified work Copyright 2016-2017 Seth Fitzsimmons +# Modified work Copyright 2016 American Red Cross +# Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team +# Modified work Copyright 2017 Mapzen +class Timer(object): + """Time a code block.""" + + def __enter__(self): + """Start timer.""" + self.start = time.time() + return self + + def __exit__(self, ty, val, tb): + """Stop timer.""" + self.end = time.time() + self.elapsed = self.end - self.start + + +def parse_logs(logs: List[str]) -> Dict: + """Parse S3FS Logs.""" + s3_get = len([line for line in logs if "get_object" in line]) + # s3_head = len([line for line in logs if "head_object" in line]) + # s3_list = len([line for line in logs if "list_object" in line]) + return { + # "LIST": s3_list, + # "HEAD": s3_head, + "GET": s3_get, + } + + +def profile( + add_to_return: bool = False, + quiet: bool = False, + raw: bool = False, + cprofile: bool = False, + config: Optional[Dict] = None, +): + """Profiling.""" + + def wrapper(func: Callable): + """Wrap a function.""" + + def wrapped_f(*args, **kwargs): + """Wrapped function.""" + # Catch s3fs Logs + log_stream = StringIO() + logger = logging.getLogger("s3fs") + logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler(log_stream) + logger.addHandler(handler) + + with Timer() as t: + # cProfile is a simple Python profiling + prof = cProfile.Profile() + retval = prof.runcall(func, *args, **kwargs) + profile_stream = StringIO() + ps = pstats.Stats(prof, stream=profile_stream) + ps.strip_dirs().sort_stats("time").print_stats() + + logger.removeHandler(handler) + handler.close() + + s3fs_lines = log_stream.getvalue().splitlines() + results = parse_logs(s3fs_lines) + + results["Timing"] = t.elapsed + + if cprofile: + profile_lines = [p for p in profile_stream.getvalue().splitlines() if p] + stats_to_print = [ + p for p in profile_lines[3:13] if float(p.split()[1]) > 0.0 + ] + results["cprofile"] = [profile_lines[2], *stats_to_print] + + if raw: + results["s3fs"] = s3fs_lines + + if not quiet: + log.info(json.dumps(results)) + + if add_to_return: + return retval, results + + return retval + + return wrapped_f + + return wrapper diff --git a/benchmarks/profiling.ipynb b/benchmarks/profiling.ipynb new file mode 100644 index 0000000..d766be8 --- /dev/null +++ b/benchmarks/profiling.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fe3b2e76", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any\n", + "from rio_tiler.io import XarrayReader\n", + "from titiler.xarray.factory import xarray_open_dataset, update_dataset\n", + "\n", + "from profiler import profile" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "58d411cc", + "metadata": {}, + "outputs": [], + "source": [ + "import morecantile\n", + "from rio_tiler.io import XarrayReader\n", + "\n", + "\n", + "@profile(add_to_return=True, cprofile=True, quiet=True)\n", + "def get_tile(src_path: str, x: int, y: int, z: int, *, variable: str, **kwargs: Any):\n", + "\n", + " with xarray_open_dataset(\n", + " src_path,\n", + " z=z,\n", + " **kwargs,\n", + " ) as dataset:\n", + " \n", + " dataarray, _ = update_dataset(dataset, variable=variable)\n", + " \n", + " with XarrayReader(dataarray) as src_dst:\n", + " return src_dst.tile(x, y, z)\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "3a7aba89", + "metadata": {}, + "source": [ + "\n", + "s3://veda-data-store-staging/EIS/zarr/FWI-GEOS-5-Hourly (this is a protected data source but the titiler deployment I working with has access, configuring the domain name now...)\n", + "\n", + "s3://power-analysis-ready-datastore/power_901_monthly_meteorology_utc.zarr (publicly available, variable TS)\n", + "\n", + "s3://cmip6-pds/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/historical/r2i1p1f1/Amon/tas/gn/v20180827/ (publicly available, variable tas)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e75492e0", + "metadata": {}, + "outputs": [], + "source": [ + "_, logs = get_tile(\n", + " \"s3://power-analysis-ready-datastore/power_901_monthly_meteorology_utc.zarr\", \n", + " 4, \n", + " 4, \n", + " 4, \n", + " variable=\"TS\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ca0a1933", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'GET': 52,\n", + " 'Timing': 5.186955213546753,\n", + " 'cprofile': [' ncalls tottime percall cumtime percall filename:lineno(function)',\n", + " \" 40 4.844 0.121 4.844 0.121 {method 'acquire' of '_thread.lock' objects}\",\n", + " \" 1 0.027 0.027 0.028 0.028 {method 'start' of 'rasterio._env.GDALEnv' objects}\",\n", + " ' 8 0.017 0.002 0.021 0.003 core.py:2142(_decode_chunk)',\n", + " ' 1 0.015 0.015 0.015 0.015 {rasterio._warp._reproject}',\n", + " ' 408 0.011 0.000 0.015 0.000 typing.py:1065(_get_protocol_attrs)',\n", + " '113732/112582 0.011 0.000 0.042 0.000 {built-in method builtins.isinstance}',\n", + " '2221/2189 0.010 0.000 0.014 0.000 indexing.py:512(shape)',\n", + " ' 9 0.007 0.001 0.007 0.001 crs.py:183(__init__)',\n", + " ' 2437 0.006 0.000 0.082 0.000 variable.py:194(as_compatible_data)',\n", + " '39365/33478 0.004 0.000 0.005 0.000 {built-in method builtins.len}']}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22efea90", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}