Skip to content

Commit

Permalink
Merge pull request #387 from andreidenissov-cog/feature/386
Browse files Browse the repository at this point in the history
Implement local filesystem option for studioml experiment storage/database.
  • Loading branch information
andreidenissov-cog authored Feb 1, 2020
2 parents a8c0b9a + 0291be0 commit 863af5b
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 1 deletion.
45 changes: 45 additions & 0 deletions docs/local_filesystem_setup.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
Setting up experiment storage and database in local filesystem
==============================================================

This page describes how to setup studioml to use
local filesystem for storing experiment artifacts and meta-data.
With this option, there is no need to setup any external
connection to S3/Minio/GCS etc.

StudioML configuration
--------------------

::

"studio_ml_config": {

...

"database": {
"type": "local",
"endpoint": SOME_DB_LOCAL_PATH,
"bucket": DB_BUCKET_NAME,
"authentication": "none"
},
"storage": {
"type": "local",
"endpoint": SOME_ARTIFACTS_LOCAL_PATH,
"bucket": ARTIFACTS_BUCKET_NAME,
}

...
}


With StudioML database type set to "local",
all experiment meta-data will be stored locally under
directory: SOME_DB_LOCAL_PATH/DB_BUCKET_NAME.
Similarly, with storage type set to "local",
all experiment artifacts will be stored locally under
directory: SOME_ARTIFACTS_LOCAL_PATH/ARTIFACTS_BUCKET_NAME.

Note: if you are using "local" mode, it is recommended to use it
for both storage and database configuration.
But it's technically possible to mix, for example, local storage configuration
and S3-based database configuration etc.

4 changes: 3 additions & 1 deletion studio/artifact_store.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from .firebase_artifact_store import FirebaseArtifactStore
from .gcloud_artifact_store import GCloudArtifactStore
from .local_artifact_store import LocalArtifactStore
from .s3_artifact_store import S3ArtifactStore


def get_artifact_store(config, blocking_auth=True, verbose=10):
if config['type'].lower() == 'firebase':
return FirebaseArtifactStore(
Expand All @@ -11,5 +11,7 @@ def get_artifact_store(config, blocking_auth=True, verbose=10):
return GCloudArtifactStore(config, verbose=verbose)
elif config['type'].lower() == 's3':
return S3ArtifactStore(config, verbose=verbose)
elif config['type'].lower() == 'local':
return LocalArtifactStore(config, verbose=verbose)
else:
raise ValueError('Unknown storage type: ' + config['type'])
64 changes: 64 additions & 0 deletions studio/local_artifact_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import calendar
import os
import shutil

from .tartifact_store import TartifactStore

class LocalArtifactStore(TartifactStore):
def __init__(self, config,
bucket_name=None,
verbose=10,
measure_timestamp_diff=False,
compression=None):

if compression is None:
compression = config.get('compression')

self.endpoint = config.get('endpoint', '~')
self.store_root = os.path.realpath(os.path.expanduser(self.endpoint))
if not os.path.exists(self.store_root) \
or not os.path.isdir(self.store_root):
raise ValueError()

self.bucket = bucket_name
if self.bucket is None:
self.bucket = config.get('bucket')
self.store_root = os.path.join(self.store_root, self.bucket)
self._ensure_path_dirs_exist(self.store_root)

super(LocalArtifactStore, self).__init__(
measure_timestamp_diff,
compression=compression,
verbose=verbose)

def _ensure_path_dirs_exist(self, path):
dirs = os.path.dirname(path)
os.makedirs(dirs, mode = 0o777, exist_ok = True)

def _upload_file(self, key, local_path):
target_path = os.path.join(self.store_root, key)
self._ensure_path_dirs_exist(target_path)
shutil.copyfile(local_path, target_path)

def _download_file(self, key, local_path, bucket=None):
source_path = os.path.join(self.store_root, key)
self._ensure_path_dirs_exist(local_path)
shutil.copyfile(source_path, local_path)

def _delete_file(self, key):
os.remove(os.path.join(self.store_root, key))

def _get_file_url(self, key, method='GET'):
return str(os.path.join(self.store_root, key))

def _get_file_post(self, key):
return str(os.path.join(self.store_root, key))

def _get_file_timestamp(self, key):
return None

def get_qualified_location(self, key):
return 'file:/' + self.store_root + '/' + key

def get_bucket(self):
return self.bucket
51 changes: 51 additions & 0 deletions studio/local_db_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import json

from .keyvalue_provider import KeyValueProvider
from .local_artifact_store import LocalArtifactStore

class LocalDbProvider(KeyValueProvider):

def __init__(self, config, blocking_auth=True, verbose=10, store=None):
self.config = config
self.bucket = config.get('bucket', 'studioml-meta')

self.endpoint = config.get('endpoint', '~')
self.db_root = os.path.realpath(os.path.expanduser(self.endpoint))
if not os.path.exists(self.db_root) \
or not os.path.isdir(self.db_root):
raise ValueError("Local DB root {} doesn't exist or not a directory!".format(self.db_root))

self.bucket = config.get('bucket')
self.db_root = os.path.join(self.db_root, self.bucket)
self._ensure_path_dirs_exist(self.db_root)

super(LocalDbProvider, self).__init__(
config,
blocking_auth,
verbose,
store)

def _ensure_path_dirs_exist(self, path):
dirs = os.path.dirname(path)
os.makedirs(dirs, mode = 0o777, exist_ok = True)

def _get(self, key, shallow=False):
file_name = os.path.join(self.db_root, key)
if not os.path.exists(file_name):
return None
with open(file_name) as infile:
result = json.load(infile)
return result

def _delete(self, key):
file_name = os.path.join(self.db_root, key)
if os.path.exists(file_name):
os.remove(file_name)

def _set(self, key, value):
file_name = os.path.join(self.db_root, key)
self._ensure_path_dirs_exist(file_name)
with open(file_name, 'w') as outfile:
json.dump(value, outfile)

12 changes: 12 additions & 0 deletions studio/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .artifact_store import get_artifact_store
from .http_provider import HTTPProvider
from .firebase_provider import FirebaseProvider
from .local_artifact_store import LocalArtifactStore
from .local_db_provider import LocalDbProvider
from .s3_provider import S3Provider
from .gs_provider import GSProvider
from .model_setup import setup_model
Expand Down Expand Up @@ -102,6 +104,16 @@ def get_db_provider(config=None, blocking_auth=True):
blocking_auth=blocking_auth)
artifact_store = db_provider.get_artifact_store()

elif db_config['type'].lower() == 'local':
if artifact_store is None:
artifact_store = LocalArtifactStore(db_config, "storage", verbose)

db_provider = LocalDbProvider(db_config,
verbose=verbose,
store=artifact_store,
blocking_auth=blocking_auth)
artifact_store = db_provider.get_artifact_store()

else:
_model_setup = None
raise ValueError('Unknown type of the database ' + db_config['type'])
Expand Down

0 comments on commit 863af5b

Please sign in to comment.