Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sketch profiling #4

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions benchmarks/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Profiler for titiler-xarray reader."""

import cProfile
import json
import logging
import pstats
import time
from io import StringIO
from typing import Callable, Dict, List, Optional

from loguru import logger as log


# This code is copied from marblecutter
# https://github.com/mojodna/marblecutter/blob/master/marblecutter/stats.py
# License:
# Original work Copyright 2016 Stamen Design
# Modified work Copyright 2016-2017 Seth Fitzsimmons
# Modified work Copyright 2016 American Red Cross
# Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team
# Modified work Copyright 2017 Mapzen
class Timer(object):
"""Time a code block."""

def __enter__(self):
"""Start timer."""
self.start = time.time()
return self

def __exit__(self, ty, val, tb):
"""Stop timer."""
self.end = time.time()
self.elapsed = self.end - self.start


def parse_logs(logs: List[str]) -> Dict:
"""Parse S3FS Logs."""
s3_get = len([line for line in logs if "get_object" in line])
# s3_head = len([line for line in logs if "head_object" in line])
# s3_list = len([line for line in logs if "list_object" in line])
return {
# "LIST": s3_list,
# "HEAD": s3_head,
"GET": s3_get,
}


def profile(
add_to_return: bool = False,
quiet: bool = False,
raw: bool = False,
cprofile: bool = False,
config: Optional[Dict] = None,
):
"""Profiling."""

def wrapper(func: Callable):
"""Wrap a function."""

def wrapped_f(*args, **kwargs):
"""Wrapped function."""
# Catch s3fs Logs
log_stream = StringIO()
logger = logging.getLogger("s3fs")
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(log_stream)
logger.addHandler(handler)

with Timer() as t:
# cProfile is a simple Python profiling
prof = cProfile.Profile()
retval = prof.runcall(func, *args, **kwargs)
profile_stream = StringIO()
ps = pstats.Stats(prof, stream=profile_stream)
ps.strip_dirs().sort_stats("time").print_stats()

logger.removeHandler(handler)
handler.close()

s3fs_lines = log_stream.getvalue().splitlines()
results = parse_logs(s3fs_lines)

results["Timing"] = t.elapsed

if cprofile:
profile_lines = [p for p in profile_stream.getvalue().splitlines() if p]
stats_to_print = [
p for p in profile_lines[3:13] if float(p.split()[1]) > 0.0
]
results["cprofile"] = [profile_lines[2], *stats_to_print]

if raw:
results["s3fs"] = s3fs_lines

if not quiet:
log.info(json.dumps(results))

if add_to_return:
return retval, results

return retval

return wrapped_f

return wrapper
136 changes: 136 additions & 0 deletions benchmarks/profiling.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "fe3b2e76",
"metadata": {},
"outputs": [],
"source": [
"from typing import Any\n",
"from rio_tiler.io import XarrayReader\n",
"from titiler.xarray.factory import xarray_open_dataset, update_dataset\n",
"\n",
"from profiler import profile"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "58d411cc",
"metadata": {},
"outputs": [],
"source": [
"import morecantile\n",
"from rio_tiler.io import XarrayReader\n",
"\n",
"\n",
"@profile(add_to_return=True, cprofile=True, quiet=True)\n",
"def get_tile(src_path: str, x: int, y: int, z: int, *, variable: str, **kwargs: Any):\n",
"\n",
" with xarray_open_dataset(\n",
" src_path,\n",
" z=z,\n",
" **kwargs,\n",
" ) as dataset:\n",
" \n",
" dataarray, _ = update_dataset(dataset, variable=variable)\n",
" \n",
" with XarrayReader(dataarray) as src_dst:\n",
" return src_dst.tile(x, y, z)\n",
" "
]
},
{
"cell_type": "markdown",
"id": "3a7aba89",
"metadata": {},
"source": [
"\n",
"s3://veda-data-store-staging/EIS/zarr/FWI-GEOS-5-Hourly (this is a protected data source but the titiler deployment I working with has access, configuring the domain name now...)\n",
"\n",
"s3://power-analysis-ready-datastore/power_901_monthly_meteorology_utc.zarr (publicly available, variable TS)\n",
"\n",
"s3://cmip6-pds/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/historical/r2i1p1f1/Amon/tas/gn/v20180827/ (publicly available, variable tas)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e75492e0",
"metadata": {},
"outputs": [],
"source": [
"_, logs = get_tile(\n",
" \"s3://power-analysis-ready-datastore/power_901_monthly_meteorology_utc.zarr\", \n",
" 4, \n",
" 4, \n",
" 4, \n",
" variable=\"TS\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ca0a1933",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'GET': 52,\n",
" 'Timing': 5.186955213546753,\n",
" 'cprofile': [' ncalls tottime percall cumtime percall filename:lineno(function)',\n",
" \" 40 4.844 0.121 4.844 0.121 {method 'acquire' of '_thread.lock' objects}\",\n",
" \" 1 0.027 0.027 0.028 0.028 {method 'start' of 'rasterio._env.GDALEnv' objects}\",\n",
" ' 8 0.017 0.002 0.021 0.003 core.py:2142(_decode_chunk)',\n",
" ' 1 0.015 0.015 0.015 0.015 {rasterio._warp._reproject}',\n",
" ' 408 0.011 0.000 0.015 0.000 typing.py:1065(_get_protocol_attrs)',\n",
" '113732/112582 0.011 0.000 0.042 0.000 {built-in method builtins.isinstance}',\n",
" '2221/2189 0.010 0.000 0.014 0.000 indexing.py:512(shape)',\n",
" ' 9 0.007 0.001 0.007 0.001 crs.py:183(__init__)',\n",
" ' 2437 0.006 0.000 0.082 0.000 variable.py:194(as_compatible_data)',\n",
" '39365/33478 0.004 0.000 0.005 0.000 {built-in method builtins.len}']}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22efea90",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}