Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ install:
- "pip install setuptools --upgrade; pip install -r test_requirements.txt; python setup.py install"
# command to run tests
env:
- AWS_ACCESS_KEY_ID=foo AWS_SECRET_ACCESS_KEY=bar TESTCASE=tests/tests.py
- AWS_ACCESS_KEY_ID=foo AWS_SECRET_ACCESS_KEY=bar TESTCASE=tests/unit_tests.py
script:
- nosetests $TESTCASE --with-coverage --cover-package=nodb --with-timer
# - coverage combine --append
Expand Down
110 changes: 88 additions & 22 deletions nodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import uuid
from datetime import datetime
from io import BytesIO
import re

import boto3
import botocore
Expand Down Expand Up @@ -34,8 +35,10 @@ class NoDB(object):
encoding = 'utf8'
profile_name = None
bucket = None

s3 = boto3.resource('s3', config=botocore.client.Config(signature_version=signature_version))
invalid_s3_path_pattern = re.compile("[^0-9a-zA-Z!\-_.*'()/]")
custom_index_func = None
region = None
s3 = boto3.resource('s3', config=botocore.client.Config(signature_version=signature_version, region_name=region), region_name=region)

##
# Advanced config
Expand All @@ -57,7 +60,7 @@ def __init__(self, bucket=None, profile_name=None, session=None):
if self.profile_name:
session = boto3.session.Session(profile_name=self.profile_name)
if session:
self.s3 = session.resource('s3', config=botocore.client.Config(signature_version=self.signature_version))
self.s3 = session.resource('s3', config=botocore.client.Config(signature_version=self.signature_version), region_name=region)

def save(self, obj, index=None):
"""
Expand All @@ -82,6 +85,7 @@ def save(self, obj, index=None):

s3_object = self.s3.Object(self.bucket, self.prefix + real_index)
result = s3_object.put('rb', Body=bytesIO)
logging.basicConfig(level=logging.DEBUG)
logging.debug("Put remote bytes: " + self.prefix + real_index)

if result['ResponseMetadata']['HTTPStatusCode'] == 200:
Expand All @@ -94,10 +98,11 @@ def save(self, obj, index=None):

base_cache_path = self._get_base_cache_path()
cache_path = os.path.join(base_cache_path, real_index)
if not os.path.exists(cache_path):
open(cache_path, 'w+').close()
if not os.path.exists(os.path.dirname(os.path.abspath(cache_path))):
os.makedirs(os.path.dirname(os.path.abspath(cache_path)))
with open(cache_path, "wb") as in_file:
in_file.write(serialized.encode(self.encoding))
serialized = pickle.dump(serialized, in_file)
# in_file.write(serialized.encode(self.encoding))
logging.debug("Wrote to cache file: " + cache_path)

return resp
Expand All @@ -120,7 +125,8 @@ def load(self, index, metainfo=False, default=None):
# Cache hit!
if os.path.isfile(cache_path):
with open(cache_path, "rb") as in_file:
serialized = in_file.read()
serialized = pickle.load(in_file)
# serialized = in_file.read()
cache_hit = True
logging.debug("Loaded bytes from cache file: " + cache_path)
else:
Expand All @@ -139,12 +145,12 @@ def load(self, index, metainfo=False, default=None):
# Store the cache result
if self.cache:

if not os.path.exists(cache_path):
open(cache_path, 'w+').close()
if not os.path.exists(os.path.dirname(os.path.abspath(cache_path))):
os.makedirs(os.path.dirname(os.path.abspath(cache_path)))

with open(cache_path, "wb") as in_file:
in_file.write(serialized.encode(self.encoding))

pickle.dump(serialized, in_file)
# in_file.write(serialized.encode(self.encoding))
logging.debug("Wrote to cache file: " + cache_path)

# Then read the data format
Expand Down Expand Up @@ -176,21 +182,74 @@ def delete(self, index):
else:
return False

def all(self, metainfo=False):
def all(self, metainfo=False, subpath=''):
"""
Retrieve all objects from the backend datastore.
:return: list of all objects
"""
if subpath and not self.human_readable_indexes:
raise Exception("Subpath query only supported when human_readable_indexes=True")

serialized_objects = []

deserialized_objects = []

bucket = self.s3.Bucket(self.bucket)
for obj in bucket.objects.all():
serialized = obj.get()["Body"].read()
# deserialize and add to list
deserialized_objects.append(self._deserialize(serialized))
# If cache enabled, check local filestore for bytes
cache_hit = False
if self.cache:
index = (subpath or "all") + "-all"
real_index = self._format_index_value(index)
base_cache_path = self._get_base_cache_path()
cache_path = os.path.join(base_cache_path, real_index)
# Cache hit!
if os.path.isfile(cache_path):
with open(cache_path, "rb") as in_file:
# serialized = in_file.read()
serialized_objects = pickle.load(in_file)
deserialized_objects = [self._deserialize(o) for o in serialized_objects]
cache_hit = True
logging.debug("Loaded bytes from cache file: " + cache_path)
else:
cache_hit = False

# Next, get the bytes (if any)
if not self.cache or not cache_hit:

bucket = self.s3.Bucket(self.bucket)
if subpath:
if subpath.startswith("/"):
subpath = subpath[1:]
s3_prefix = self.prefix + subpath
bucket_enumerator = bucket.objects.filter(Prefix=s3_prefix)
else:
bucket_enumerator = bucket.objects.all()

try:
for obj in bucket_enumerator:
serialized = obj.get()["Body"].read()
serialized_objects.append(serialized)

# deserialize and add to list
deserialized_objects.append(self._deserialize(serialized))


except botocore.exceptions.ClientError as e:
# No Key? Return default.
logging.debug("No remote objects, returning default.")
return []

# sort by insert datetime
deserialized_objects.sort(key=lambda x: x['dt'])
# Store the cache result
if self.cache:
if not os.path.exists(os.path.dirname(os.path.abspath(cache_path))):
os.makedirs(os.path.dirname(os.path.abspath(cache_path)))

with open(cache_path, "wb") as in_file:
pickle.dump(serialized_objects, in_file)
# in_file.write(serialized_objects.encode(self.encoding))
logging.debug("Wrote to cache file: " + cache_path)

# sort by insert datetime
deserialized_objects.sort(key=lambda x: x['dt'])

if metainfo:
return deserialized_objects
Expand Down Expand Up @@ -266,7 +325,9 @@ def _get_object_index(self, obj, index):
"""

index_value = None
if type(obj) is dict:
if self.custom_index_func:
index_value = self.custom_index_func(obj, index)
elif type(obj) is dict:
if index in obj:
index_value = obj[index]
else:
Expand All @@ -287,11 +348,16 @@ def _format_index_value(self, index_value):
logging.debug("Formatting index value: " + str(index_value))

if self.human_readable_indexes:
# You are on your own here! This may not work!
return index_value
return self._escape_path_s3(index_value)
else:
return self.hash_function(index_value.encode(self.encoding)).hexdigest()

def _escape_path_s3(self, path):
if re.search(self.invalid_s3_path_pattern, path):
logging.warning('Object path with disallowed characters (replaced with \'-\'): ' + path)
return re.sub(self.invalid_s3_path_pattern, "-", path)
return path

def _get_base_cache_path(self):
"""
Make sure that the cache directory is real. Returns the path.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='nodb',
version='0.4.0',
version='0.4.1',
packages=['nodb'],
install_requires=required,
tests_require=test_required,
Expand Down
30 changes: 30 additions & 0 deletions tests/integration_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf8 -*-
import random
import string
import unittest
from nodb import NoDB


class TestNoDBIntegration(unittest.TestCase):

def test_nodb_save_load_all_subpath(self):
bucket_name = 'noahonnumbers-blog'

nodb = NoDB(bucket_name)
nodb.human_readable_indexes = True
nodb.index = "path"

jeff = {"Name": "Jeff", "age": 19, "path": "persons/jeff", "type": "person"}
michael = {"Name": "Michael", "age": 19, "path": "persons/michael", "type": "person"}
car = {"Name": "Acura TSX", "path": "vehicles/car", "type": "vehicle"}

nodb.save(jeff)
nodb.save(michael)
nodb.save(car)

persons = nodb.all(subpath="persons/")
self.assertListEqual([jeff, michael], persons)


if __name__ == '__main__':
unittest.main()
22 changes: 22 additions & 0 deletions tests/tests.py → tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,28 @@ def test_nodb_all(self):
all_objects = nodb.all()
self.assertListEqual([{"Name": "John", "age": 19}, {"Name": "Jane", "age": 20}], all_objects)

@moto.mock_s3
def test_nodb_all_subpath(self):
# create dummy bucket and store some objects
bucket_name = 'dummy_bucket_12345_qwerty'
self._create_mock_bucket(bucket_name)

nodb = NoDB(bucket_name)
nodb.human_readable_indexes = True
nodb.index = "path"

jeff = {"Name": "Jeff", "age": 19, "path": "persons/jeff", "type": "person"}
michael = {"Name": "Michael", "age": 19, "path": "persons/michael", "type": "person"}
car = {"Name": "Acura TSX", "path": "vehicles/car", "type": "vehicle"}

nodb.save(jeff)
nodb.save(michael)
nodb.save(car)

persons = nodb.all(subpath="persons/")
self.assertListEqual([jeff, michael], persons)


def _create_mock_bucket(self, bucket_name):
boto3.resource('s3').Bucket(bucket_name).create()

Expand Down