From e07d01e6301b0e06205bf1472d96f8639b8aea3f Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Sun, 2 Oct 2016 01:48:04 +0200 Subject: [PATCH 01/15] Rewriting cache using SQLAlchemy ORM --- acd_cli.py | 2 +- acdcli/cache/cursors.py | 28 --- acdcli/cache/db.py | 131 +++-------- acdcli/cache/format.py | 14 +- acdcli/cache/keyvaluestorage.py | 21 ++ acdcli/cache/query.py | 350 ++++++++-------------------- acdcli/cache/schema.py | 155 +----------- acdcli/cache/sync.py | 150 ++++++------ acdcli/cache/templates/__init__.py | 1 + acdcli/cache/templates/base.py | 3 + acdcli/cache/templates/files.py | 9 + acdcli/cache/templates/labels.py | 8 + acdcli/cache/templates/metadata.py | 8 + acdcli/cache/templates/nodes.py | 54 +++++ acdcli/cache/templates/parentage.py | 8 + setup.py | 2 +- tests/test_cache.py | 2 +- 17 files changed, 327 insertions(+), 619 deletions(-) delete mode 100755 acdcli/cache/cursors.py create mode 100644 acdcli/cache/keyvaluestorage.py create mode 100644 acdcli/cache/templates/__init__.py create mode 100644 acdcli/cache/templates/base.py create mode 100644 acdcli/cache/templates/files.py create mode 100644 acdcli/cache/templates/labels.py create mode 100644 acdcli/cache/templates/metadata.py create mode 100644 acdcli/cache/templates/nodes.py create mode 100644 acdcli/cache/templates/parentage.py diff --git a/acd_cli.py b/acd_cli.py index 3003243..55ecfb8 100755 --- a/acd_cli.py +++ b/acd_cli.py @@ -1579,7 +1579,7 @@ def main(): if args.func not in nocache_actions: try: - cache = db.NodeCache(CACHE_PATH, SETTINGS_PATH, args.check) + cache = db.NodeCache(CACHE_PATH, SETTINGS_PATH) except: raise sys.exit(INIT_FAILED_RETVAL) diff --git a/acdcli/cache/cursors.py b/acdcli/cache/cursors.py deleted file mode 100755 index f9db7f2..0000000 --- a/acdcli/cache/cursors.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Cursor context managers""" - -class cursor(object): - def __init__(self, conn): - self.conn = conn - - def __enter__(self): - self.cursor = self.conn.cursor() - return self.cursor - - def __exit__(self, exc_type, exc_val, exc_tb): - self.cursor.close() - - -class mod_cursor(object): - def __init__(self, conn): - self.conn = conn - - def __enter__(self): - self.cursor = self.conn.cursor() - return self.cursor - - def __exit__(self, exc_type, exc_val, exc_tb): - if exc_type is None: - self.conn.commit() - else: - self.conn.rollback() - self.cursor.close() diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py index 994a925..aa9d803 100644 --- a/acdcli/cache/db.py +++ b/acdcli/cache/db.py @@ -1,31 +1,22 @@ -import configparser import logging -import os -import re -import sqlite3 -from threading import local +import configparser +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker from acdcli.utils.conf import get_conf -from .cursors import * -from .format import FormatterMixin -from .query import QueryMixin from .schema import SchemaMixin +from .query import QueryMixin +from .format import FormatterMixin from .sync import SyncMixin +from .templates.nodes import Nodes +from .keyvaluestorage import KeyValueStorage logger = logging.getLogger(__name__) -_ROOT_ID_SQL = 'SELECT id FROM nodes WHERE name IS NULL AND type == "folder" ORDER BY created' - - +_DB_DEFAULT = 'nodes.db' _SETTINGS_FILENAME = 'cache.ini' -_def_conf = configparser.ConfigParser() -_def_conf['sqlite'] = dict(filename='nodes.db', busy_timeout=30000, journal_mode='wal') -_def_conf['blacklist'] = dict(folders= []) - - - class IntegrityError(Exception): def __init__(self, msg): self.msg = msg @@ -33,100 +24,32 @@ def __init__(self, msg): def __str__(self): return repr(self.msg) - -def _create_conn(path: str) -> sqlite3.Connection: - c = sqlite3.connect(path) - c.row_factory = sqlite3.Row # allow dict-like access on rows with col name - return c - - -def _regex_match(pattern: str, cell: str) -> bool: - if cell is None: - return False - return re.match(pattern, cell, re.IGNORECASE) is not None - - -class NodeCache(SchemaMixin, QueryMixin, SyncMixin, FormatterMixin): +class NodeCache(SchemaMixin, QueryMixin, FormatterMixin, SyncMixin): IntegrityCheckType = dict(full=0, quick=1, none=2) - """types of SQLite integrity checks""" - - def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckType['full']): - self._conf = get_conf(settings_path, _SETTINGS_FILENAME, _def_conf) - self.db_path = os.path.join(cache_path, self._conf['sqlite']['filename']) - self.tl = local() - - self.integrity_check(check) + def __init__(self, cache_path: str='', settings_path=''): + self.init_config(cache_path, settings_path) + self._engine = create_engine(self._conf["database"]["url"]) self.init() - self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match) - - with cursor(self._conn) as c: - c.execute(_ROOT_ID_SQL) - row = c.fetchone() - if not row: - self.root_id = '' - return - first_id = row['id'] - - if c.fetchone(): - raise IntegrityError('Could not uniquely identify root node.') + self._DBSession = sessionmaker(bind=self._engine) + self._session = self._DBSession() - self.root_id = first_id + self.KeyValueStorage = KeyValueStorage(self._session) - self._execute_pragma('busy_timeout', self._conf['sqlite']['busy_timeout']) - self._execute_pragma('journal_mode', self._conf['sqlite']['journal_mode']) + self.KeyValueStorage.__setitem__("Hello", "die") - @property - def _conn(self) -> sqlite3.Connection: - if not hasattr(self.tl, '_conn'): - self.tl._conn = _create_conn(self.db_path) - return self.tl._conn - - def _execute_pragma(self, key, value) -> str: - with cursor(self._conn) as c: - c.execute('PRAGMA %s=%s;' % (key, value)) - r = c.fetchone() - if r: - logger.debug('Set %s to %s. Result: %s.' % (key, value, r[0])) - return r[0] - - def remove_db_file(self) -> bool: - """Removes database file.""" - self._conn.close() - - import os - import random - import string - import tempfile - - tmp_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(16)) - tmp_name = os.path.join(tempfile.gettempdir(), tmp_name) - - try: - os.rename(self.db_path, tmp_name) - except OSError: - logger.critical('Error renaming/removing database file "%s".' % self.db_path) - return False + rootNodes = self._session.query(Nodes).filter(Nodes.name == None).all() + if len(rootNodes) > 1: + raise IntegrityError('Could not uniquely identify root node.') + elif len(rootNodes) == 0: + self.root_id = '' else: - try: - os.remove(tmp_name) - except OSError: - logger.info('Database file was moved, but not deleted.') - return True + self.root_id = rootNodes[0].id - def integrity_check(self, type_: IntegrityCheckType): - """Performs a `self-integrity check - `_ on the database.""" + def init_config(self, cache_path, settings_path): + _def_conf = configparser.ConfigParser() + _def_conf['database'] = dict(url='sqlite:///' + cache_path + '/' + _DB_DEFAULT) + _def_conf['blacklist'] = dict(folders=[]) - with cursor(self._conn) as c: - if type_ == NodeCache.IntegrityCheckType['full']: - r = c.execute('PRAGMA integrity_check;') - elif type_ == NodeCache.IntegrityCheckType['quick']: - r = c.execute('PRAGMA quick_check;') - else: - return - r = c.fetchone() - if not r or r[0] != 'ok': - logger.warn('Sqlite database integrity check failed. ' - 'You may need to clear the cache if you encounter any errors.') + self._conf = get_conf(settings_path, _SETTINGS_FILENAME, _def_conf) diff --git a/acdcli/cache/format.py b/acdcli/cache/format.py index 19806f4..69ead39 100644 --- a/acdcli/cache/format.py +++ b/acdcli/cache/format.py @@ -7,7 +7,7 @@ import sys import datetime -from .cursors import cursor +from .templates.nodes import Status try: colors = filter(None, os.environ.get('LS_COLORS', '').split(':')) @@ -36,7 +36,7 @@ def init(color=ColorMode['auto']): global get_adfixes, color_path, color_status, seq_tpl, nor_fmt get_adfixes = lambda _: ('', '') color_path = lambda x: x - color_status = lambda x: x[0] + color_status = lambda x: x.value[0] seq_tpl = '%s' nor_fmt = '%s' @@ -64,11 +64,11 @@ def color_path(path: str) -> str: def color_status(status): """Creates a colored one-character status abbreviation.""" - if status == 'AVAILABLE': - return seq_tpl % '32' + status[0] + res # green - elif status == 'TRASH': - return seq_tpl % '31' + status[0] + res # red - return status[0] + if status == Status.AVAILABLE: + return seq_tpl % '32' + status.value[0] + res # green + elif status == Status.TRASH: + return seq_tpl % '31' + status.value[0] + res # red + return status.value[0] def date_str(time_: datetime.datetime) -> str: diff --git a/acdcli/cache/keyvaluestorage.py b/acdcli/cache/keyvaluestorage.py new file mode 100644 index 0000000..21e7bfb --- /dev/null +++ b/acdcli/cache/keyvaluestorage.py @@ -0,0 +1,21 @@ +from .templates.metadata import Metadata + + +class KeyValueStorage(object): + def __init__(self, session): + self._session = session + + def __getitem__(self, key: str): + return self._session.query(Metadata.value).filter(Metadata.key == key).scalar() + + def __setitem__(self, key: str, value: str): + self._session.merge(Metadata(key=key, value=value)) + self._session.commit() + + def get(self, key: str, default: str=None): + r = self._session.query(Metadata).filter(Metadata.key == key).first() + return r.value if r else default + + def update(self, dict_: dict): + for key in dict_.keys(): + self.__setitem__(key, dict_[key]) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index 44ea869..556fd84 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -1,265 +1,111 @@ -import logging -from datetime import datetime -from .cursors import cursor +import re -logger = logging.getLogger(__name__) +from sqlalchemy import func - -def datetime_from_string(dt: str) -> datetime: - try: - dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f+00:00') - except ValueError: - dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S+00:00') - return dt - - -CHILDREN_SQL = """SELECT n.*, f.* FROM nodes n - JOIN parentage p ON n.id = p.child - LEFT OUTER JOIN files f ON n.id = f.id - WHERE p.parent = (?) - ORDER BY n.name""" - -CHILDRENS_NAMES_SQL = """SELECT n.name FROM nodes n - JOIN parentage p ON n.id = p.child - WHERE p.parent = (?) AND n.status == 'AVAILABLE' - ORDER BY n.name""" - -NUM_CHILDREN_SQL = """SELECT COUNT(n.id) FROM nodes n - JOIN parentage p ON n.id = p.child - WHERE p.parent = (?) AND n.status == 'AVAILABLE'""" - -NUM_PARENTS_SQL = """SELECT COUNT(n.id) FROM nodes n - JOIN parentage p ON n.id = p.parent - WHERE p.child = (?) AND n.status == 'AVAILABLE'""" - -NUM_NODES_SQL = 'SELECT COUNT(*) FROM nodes' -NUM_FILES_SQL = 'SELECT COUNT(*) FROM files' -NUM_FOLDERS_SQL = 'SELECT COUNT(*) FROM nodes WHERE type == "folder"' - -CHILD_OF_SQL = """SELECT n.*, f.* FROM nodes n - JOIN parentage p ON n.id = p.child - LEFT OUTER JOIN files f ON n.id = f.id - WHERE n.name = (?) AND p.parent = (?) - ORDER BY n.status""" - -NODE_BY_ID_SQL = """SELECT n.*, f.* FROM nodes n LEFT OUTER JOIN files f ON n.id = f.id - WHERE n.id = (?)""" - -USAGE_SQL = 'SELECT SUM(size) FROM files' - -FIND_BY_NAME_SQL = """SELECT n.*, f.* FROM nodes n - LEFT OUTER JOIN files f ON n.id = f.id - WHERE n.name LIKE ? - ORDER BY n.name""" - -FIND_BY_REGEX_SQL = """SELECT n.*, f.* FROM nodes n - LEFT OUTER JOIN files f ON n.id = f.id - WHERE n.name REGEXP ? - ORDER BY n.name""" - -FIND_BY_MD5_SQL = """SELECT n.*, f.* FROM nodes n - LEFT OUTER JOIN files f ON n.id = f.id - WHERE f.md5 == (?) - ORDER BY n.name""" - -FIND_FIRST_PARENT_SQL = """SELECT n.* FROM nodes n - JOIN parentage p ON n.id = p.parent - WHERE p.child = (?) - ORDER BY n.status, n.id""" - -# TODO: exclude files in trashed folders?! -FILE_SIZE_EXISTS_SQL = """SELECT COUNT(*) FROM files f - JOIN nodes n ON n.id = f.id - WHERE f.size == (?) AND n.status == 'AVAILABLE'""" - - -class Node(object): - def __init__(self, row): - self.id = row['id'] - self.type = row['type'] - self.name = row['name'] - self.description = row['description'] - self.cre = row['created'] - self.mod = row['modified'] - self.updated = row['updated'] - self.status = row['status'] - - try: - self.md5 = row['md5'] - except IndexError: - self.md5 = None - try: - self.size = row['size'] - except IndexError: - self.size = 0 - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return hash(self.id) - - def __repr__(self): - return 'Node(%r, %r)' % (self.id, self.name) - - @property - def is_folder(self): - return self.type == 'folder' - - @property - def is_file(self): - return self.type == 'file' - - @property - def is_available(self): - return self.status == 'AVAILABLE' - - @property - def is_trashed(self): - return self.status == 'TRASH' - - @property - def created(self): - return datetime_from_string(self.cre) - - @property - def modified(self): - return datetime_from_string(self.mod) - - @property - def simple_name(self): - if self.is_file: - return self.name - return (self.name if self.name else '') + '/' +from .templates.nodes import Nodes, Status +from .templates.files import Files +from .templates.parentage import Parentage class QueryMixin(object): - def get_node(self, id) -> 'Union[Node|None]': - with cursor(self._conn) as c: - c.execute(NODE_BY_ID_SQL, [id]) - r = c.fetchone() - if r: - return Node(r) + def get_node(self, id) -> 'Union[Nodes|None]': + return self._session.query(Nodes).filter(Nodes.id == id) def get_root_node(self): return self.get_node(self.root_id) def get_conflicting_node(self, name: str, parent_id: str): - """Finds conflicting node in folder specified by *parent_id*, if one exists.""" folders, files = self.list_children(parent_id) for n in folders + files: if n.is_available and n.name.lower() == name.lower(): return n - def resolve(self, path: str, trash=False) -> 'Union[Node|None]': + def resolve(self, path: str, trash=False) -> 'Union[Nodes|None]': segments = list(filter(bool, path.split('/'))) if not segments: if not self.root_id: return - with cursor(self._conn) as c: - c.execute(NODE_BY_ID_SQL, [self.root_id]) - r = c.fetchone() - return Node(r) + return self._session.query(Nodes).filter(Nodes.id == self.root_id).first() parent = self.root_id for i, segment in enumerate(segments): - with cursor(self._conn) as c: - c.execute(CHILD_OF_SQL, [segment, parent]) - r = c.fetchone() - r2 = c.fetchone() + result = self._session.query(Nodes) \ + .join(Parentage, Parentage.child == Nodes.id) \ + .filter(Parentage.parent == parent) \ + .filter(Nodes.name == segment).first() - if not r: + if not result: return - r = Node(r) - - if not r.is_available: + if not result.is_available: if not trash: return - if r2: - logger.debug('None-unique trash name "%s" in %s.' %(segment, parent)) - return if i + 1 == segments.__len__(): - return r - if r.is_folder: - parent = r.id - continue + return result + if result.is_folder: + parent = result.id else: return def childrens_names(self, folder_id) -> 'List[str]': - with cursor(self._conn) as c: - c.execute(CHILDRENS_NAMES_SQL, [folder_id]) - kids = [] - row = c.fetchone() - while row: - kids.append(row['name']) - row = c.fetchone() - return kids + names = self._session.query(Nodes.name) \ + .join(Parentage, Parentage.child == Nodes.id) \ + .filter(Parentage.parent == folder_id) \ + .filter(Nodes.status == Status.AVAILABLE) \ + .all() + return list(list(zip(*names))[0]) def get_node_count(self) -> int: - with cursor(self._conn) as c: - c.execute(NUM_NODES_SQL) - r = c.fetchone()[0] - return r + return self._session.query(func.count(Nodes.id)).scalar() - def get_folder_count(self) -> int: - with cursor(self._conn) as c: - c.execute(NUM_FOLDERS_SQL) - r = c.fetchone()[0] - return r + def get_files_count(self) -> int: + return self._session.query(func.count(Files.id)).scalar() - def get_file_count(self) -> int: - with cursor(self._conn) as c: - c.execute(NUM_FILES_SQL) - r = c.fetchone()[0] - return r + def get_folders_count(self) -> int: + return self._session.query(func.count(Nodes.id)).filter(Nodes.type == "folder").scalar() def calculate_usage(self): - with cursor(self._conn) as c: - c.execute(USAGE_SQL) - r = c.fetchone() - return r[0] if r and r[0] else 0 + return self._session.query(func.sum(Files.size)).scalar() def num_children(self, folder_id) -> int: - with cursor(self._conn) as c: - c.execute(NUM_CHILDREN_SQL, [folder_id]) - num = c.fetchone()[0] - return num + return self._session.query(func.count(Nodes.name)) \ + .join(Parentage, Parentage.child == Nodes.id) \ + .filter(Parentage.parent == folder_id) \ + .filter(Nodes.status == Status.AVAILABLE) \ + .scalar() def num_parents(self, node_id) -> int: - with cursor(self._conn) as c: - c.execute(NUM_PARENTS_SQL, [node_id]) - num = c.fetchone()[0] - return num - - def get_child(self, folder_id, child_name) -> 'Union[Node|None]': - with cursor(self._conn) as c: - c.execute(CHILD_OF_SQL, [child_name, folder_id]) - r = c.fetchone() - if r: - r = Node(r) - if r.is_available: - return r - - def list_children(self, folder_id, trash=False) -> 'Tuple[List[Node], List[Node]]': + return self._session.query(func.count(Nodes.name)) \ + .join(Parentage, Parentage.parent == Nodes.id) \ + .filter(Parentage.child == node_id) \ + .filter(Nodes.status == Status.AVAILABLE) \ + .scalar() + + def get_child(self, folder_id, child_name) -> 'Union[Nodes|None]': + return self._session.query(Nodes) \ + .join(Parentage, Parentage.child == Nodes.id) \ + .filter(Parentage.parent == folder_id) \ + .filter(Nodes.name == child_name) \ + .filter(Nodes.status == Status.AVAILABLE) \ + .first() + + def list_children(self, folder_id, trash=False) -> 'Tuple[List[Nodes], List[Nodes]]': files = [] folders = [] - with cursor(self._conn) as c: - c.execute(CHILDREN_SQL, [folder_id]) - node = c.fetchone() - while node: - node = Node(node) - if node.is_available or trash: - if node.is_file: - files.append(node) - elif node.is_folder: - folders.append(node) - node = c.fetchone() + results = self._session.query(Nodes) \ + .join(Parentage, Parentage.child == Nodes.id) \ + .filter(Parentage.parent == folder_id).all() + + for result in results: + if result.is_available or trash: + if result.is_file: + files.append(result) + elif result.is_folder: + folders.append(result) return folders, files - def list_trashed_children(self, folder_id) -> 'Tuple[List[Node], List[Node]]': + def list_trashed_children(self, folder_id) -> 'Tuple[List[Nodes], List[Nodes]]': folders, files = self.list_children(folder_id, True) folders[:] = [f for f in folders if f.is_trashed] files[:] = [f for f in files if f.is_trashed] @@ -268,47 +114,45 @@ def list_trashed_children(self, folder_id) -> 'Tuple[List[Node], List[Node]]': def first_path(self, node_id: str) -> str: if node_id == self.root_id: return '/' - with cursor(self._conn) as c: - c.execute(FIND_FIRST_PARENT_SQL, (node_id,)) - r = c.fetchone() - node = Node(r) + node = self._session.query(Nodes) \ + .join(Parentage, Parentage.parent == Nodes.id) \ + .filter(Parentage.child == node_id) \ + .order_by(Nodes.status) \ + .order_by(Nodes.id) \ + .first() if node.id == self.root_id: return node.simple_name return self.first_path(node.id) + node.name + '/' - def find_by_name(self, name: str) -> 'List[Node]': - nodes = [] - with cursor(self._conn) as c: - c.execute(FIND_BY_NAME_SQL, ['%' + name + '%']) - r = c.fetchone() - while r: - nodes.append(Node(r)) - r = c.fetchone() - return nodes - - def find_by_md5(self, md5) -> 'List[Node]': - nodes = [] - with cursor(self._conn) as c: - c.execute(FIND_BY_MD5_SQL, (md5,)) - r = c.fetchone() - while r: - nodes.append(Node(r)) - r = c.fetchone() - return nodes - - def find_by_regex(self, regex) -> 'List[Node]': - nodes = [] - with cursor(self._conn) as c: - c.execute(FIND_BY_REGEX_SQL, (regex,)) - r = c.fetchone() - while r: - nodes.append(Node(r)) - r = c.fetchone() - return nodes + def find_by_name(self, name: str) -> 'List[Nodes]': + return self._session.query(Nodes) \ + .filter(Nodes.name.like("%" + name + "%")) \ + .order_by(Nodes.name) \ + .all() + + def find_by_md5(self, md5) -> 'List[Nodes]': + return self._session.query(Nodes) \ + .join(Files, Files.id == Nodes.id) \ + .filter(Files.md5 == md5) \ + .order_by(Nodes.name) \ + .all() + + def find_by_regex(self, regex) -> 'List[Nodes]': + if self._conf["database"]["url"].startswith("sqlite") or self._conf["database"]["url"].startswith("mysql"): + return self._session.query(Nodes) \ + .filter(Nodes.name.op("REGEXP")(regex)) \ + .order_by(Nodes.name) \ + .all() + + nodes = self._session.query(Nodes).order_by(Nodes.name).all() + match = [] + for n in nodes: + if re.match(regex, n.name): + match.append(n) + return match def file_size_exists(self, size) -> bool: - with cursor(self._conn) as c: - c.execute(FILE_SIZE_EXISTS_SQL, [size]) - no = c.fetchone()[0] - - return bool(no) + return bool(self._session.query(func.count(Files.id)) \ + .join(Nodes, Nodes.id == Files.id) \ + .filter(Files.size == size) \ + .filter(Nodes.status == Status.AVAILABLE).scalar()) diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py index d5e138b..cd533e1 100644 --- a/acdcli/cache/schema.py +++ b/acdcli/cache/schema.py @@ -1,162 +1,15 @@ import logging -from sqlite3 import OperationalError -from .cursors import * - -logger = logging.getLogger(__name__) - -# _KeyValueStorage - - -_CREATION_SCRIPT = """ - CREATE TABLE metadata ( - "key" VARCHAR(64) NOT NULL, - value VARCHAR, - PRIMARY KEY ("key") - ); - - CREATE TABLE nodes ( - id VARCHAR(50) NOT NULL, - type VARCHAR(15), - name VARCHAR(256), - description VARCHAR(500), - created DATETIME, - modified DATETIME, - updated DATETIME, - status VARCHAR(9), - PRIMARY KEY (id), - UNIQUE (id), - CHECK (status IN ('AVAILABLE', 'TRASH', 'PURGED', 'PENDING')) - ); - - CREATE TABLE labels ( - id VARCHAR(50) NOT NULL, - name VARCHAR(256) NOT NULL, - PRIMARY KEY (id, name), - FOREIGN KEY(id) REFERENCES nodes (id) - ); - - CREATE TABLE files ( - id VARCHAR(50) NOT NULL, - md5 VARCHAR(32), - size BIGINT, - PRIMARY KEY (id), - UNIQUE (id), - FOREIGN KEY(id) REFERENCES nodes (id) - ); - - CREATE TABLE parentage ( - parent VARCHAR(50) NOT NULL, - child VARCHAR(50) NOT NULL, - PRIMARY KEY (parent, child), - FOREIGN KEY(parent) REFERENCES folders (id), - FOREIGN KEY(child) REFERENCES nodes (id) - ); - - CREATE INDEX ix_nodes_names ON nodes(name); - PRAGMA user_version = 2; - """ - -_GEN_DROP_TABLES_SQL = \ - 'SELECT "DROP TABLE " || name || ";" FROM sqlite_master WHERE type == "table"' - - -def _0_to_1(conn): - conn.executescript( - 'ALTER TABLE nodes ADD updated DATETIME;' - 'ALTER TABLE nodes ADD description VARCHAR(500);' - 'PRAGMA user_version = 1;' - ) - conn.commit() - - -def _1_to_2(conn): - conn.executescript( - 'DROP TABLE IF EXISTS folders;' - 'CREATE INDEX IF NOT EXISTS ix_nodes_names ON nodes(name);' - 'REINDEX;' - 'PRAGMA user_version = 2;' - ) - conn.commit() +from .templates import * -_migrations = [_0_to_1, _1_to_2] -"""list of all migrations from index -> index+1""" +logger = logging.getLogger(__name__) class SchemaMixin(object): _DB_SCHEMA_VER = 2 def init(self): - try: - self.create_tables() - except OperationalError: - pass - with cursor(self._conn) as c: - c.execute('PRAGMA user_version;') - r = c.fetchone() - ver = r[0] - - logger.info('DB schema version is %i.' % ver) - - if self._DB_SCHEMA_VER > ver: - self._migrate(ver) - - self.KeyValueStorage = _KeyValueStorage(self._conn) - - def create_tables(self): - self._conn.executescript(_CREATION_SCRIPT) - self._conn.commit() - - def _migrate(self, version): - for i, migration in enumerate(_migrations[version:]): - v = i + version - logger.info('Migrating from schema version %i to %i' % (v, v + 1)) - migration(self._conn) + base.Base.metadata.create_all(self._engine, checkfirst=True) def drop_all(self): - drop_sql = [] - with cursor(self._conn) as c: - c.execute(_GEN_DROP_TABLES_SQL) - dt = c.fetchone() - while dt: - drop_sql.append(dt[0]) - dt = c.fetchone() - - with mod_cursor(self._conn) as c: - for drop in drop_sql: - c.execute(drop) - self._conn.commit() - logger.info('Dropped all tables.') - return True - - -class _KeyValueStorage(object): - def __init__(self, conn): - self.conn = conn - - def __getitem__(self, key: str): - with cursor(self.conn) as c: - c.execute('SELECT value FROM metadata WHERE key = (?)', [key]) - r = c.fetchone() - if r: - return r['value'] - else: - raise KeyError - - def __setitem__(self, key: str, value: str): - with mod_cursor(self.conn) as c: - c.execute('INSERT OR REPLACE INTO metadata VALUES (?, ?)', [key, value]) - - # def __len__(self): - # return self.Session.query(Metadate).count() - - def get(self, key: str, default: str = None): - with cursor(self.conn) as c: - c.execute('SELECT value FROM metadata WHERE key == ?', [key]) - r = c.fetchone() - - return r['value'] if r else default - - def update(self, dict_: dict): - for key in dict_.keys(): - self.__setitem__(key, dict_[key]) + base.Base.metadata.drop_all(self._engine) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index d6dbe80..d258237 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -1,17 +1,17 @@ -""" -Syncs Amazon Node API objects with SQLite database. -""" - import logging -from datetime import datetime from itertools import islice -from .cursors import mod_cursor + +from datetime import datetime import dateutil.parser as iso_date +from .templates.nodes import Nodes, Status +from .templates.files import Files +from .templates.parentage import Parentage +from .templates.labels import Labels + logger = logging.getLogger(__name__) -# prevent sqlite3 from throwing too many arguments errors (#145) def gen_slice(list_, length=100): it = iter(list_) while True: @@ -21,35 +21,39 @@ def gen_slice(list_, length=100): yield slice_ -def placeholders(args): - return '(%s)' % ','.join('?' * len(args)) - - class SyncMixin(object): - """Sync mixin to the :class:`NodeCache `""" - def remove_purged(self, purged: list): - """Removes purged nodes from database - - :param purged: list of purged node IDs""" - if not purged: return for slice_ in gen_slice(purged): - with mod_cursor(self._conn) as c: - c.execute('DELETE FROM nodes WHERE id IN %s' % placeholders(slice_), slice_) - c.execute('DELETE FROM files WHERE id IN %s' % placeholders(slice_), slice_) - c.execute('DELETE FROM parentage WHERE parent IN %s' % placeholders(slice_), slice_) - c.execute('DELETE FROM parentage WHERE child IN %s' % placeholders(slice_), slice_) - c.execute('DELETE FROM labels WHERE id IN %s' % placeholders(slice_), slice_) + self._session.query(Nodes) \ + .filter(Nodes.id.in_(slice_)) \ + .delete(synchronize_session=False) + self._session.query(Files) \ + .filter(Files.id.in_(slice_)) \ + .delete(synchronize_session=False) + + self._session.query(Parentage) \ + .filter(Parentage.parent.in_(slice_)) \ + .delete(synchronize_session=False) + + self._session.query(Parentage) \ + .filter(Parentage.child.in_(slice_)) \ + .delete(synchronize_session=False) + + self._session.query(Labels) \ + .filter(Labels.id.in_(slice_)) \ + .delete(synchronize_session=False) + + self._session.commit() logger.info('Purged %i node(s).' % len(purged)) def insert_nodes(self, nodes: list, partial=True): - """Inserts mixed list of files and folders into cache.""" files = [] folders = [] + for node in nodes: if node['status'] == 'PENDING': continue @@ -61,11 +65,9 @@ def insert_nodes(self, nodes: list, partial=True): files.append(node) elif kind == 'FOLDER': if (not 'name' in node or not node['name']) \ - and (not 'isRoot' in node or not node['isRoot']): - logger.warning('Skipping non-root folder %s because its name is empty.' - % node['id']) + and (not 'isRoot' in node or not node['isRoot']): + logger.warning('Skipping non-root folder %s because its name is empty.' % node['id']) continue - folders.append(node) elif kind != 'ASSET': logger.warning('Cannot insert unknown node type "%s".' % kind) self.insert_folders(folders) @@ -74,57 +76,54 @@ def insert_nodes(self, nodes: list, partial=True): self.insert_parentage(files + folders, partial) def insert_node(self, node: dict): - """Inserts single file or folder into cache.""" if not node: return self.insert_nodes([node]) def insert_folders(self, folders: list): - """ Inserts list of folders into cache. Sets 'update' column to current date. - - :param folders: list of raw dict-type folders""" - if not folders: return - with mod_cursor(self._conn) as c: - for f in folders: - c.execute( - 'INSERT OR REPLACE INTO nodes ' - '(id, type, name, description, created, modified, updated, status) ' - 'VALUES (?, "folder", ?, ?, ?, ?, ?, ?)', - [f['id'], f.get('name'), f.get('description'), - iso_date.parse(f['createdDate']), iso_date.parse(f['modifiedDate']), - datetime.utcnow(), - f['status'] - ] - ) - + for f in folders: + self._session.merge( + Nodes( + id=f['id'], + type="folder", + name=f.get('name'), + description=f.get('description'), + created=iso_date.parse(f['createdDate']), + modified=iso_date.parse(f['modifiedDate']), + updated=datetime.utcnow(), + status=Status(f['status']) + )) + + self._session.commit() logger.info('Inserted/updated %d folder(s).' % len(folders)) def insert_files(self, files: list): if not files: return - with mod_cursor(self._conn) as c: - for f in files: - c.execute('INSERT OR REPLACE INTO nodes ' - '(id, type, name, description, created, modified, updated, status)' - 'VALUES (?, "file", ?, ?, ?, ?, ?, ?)', - [f['id'], f.get('name'), f.get('description'), - iso_date.parse(f['createdDate']), iso_date.parse(f['modifiedDate']), - datetime.utcnow(), - f['status'] - ] - ) - c.execute('INSERT OR REPLACE INTO files (id, md5, size) VALUES (?, ?, ?)', - [f['id'], - f.get('contentProperties', {}).get('md5', - 'd41d8cd98f00b204e9800998ecf8427e'), - f.get('contentProperties', {}).get('size', 0) - ] - ) - + for f in files: + self._session.merge( + Nodes( + id=f['id'], + type="file", + name=f.get('name'), + description=f.get('description'), + created=iso_date.parse(f['createdDate']), + modified=iso_date.parse(f['modifiedDate']), + updated=datetime.utcnow(), + status=(Status(f['status'])) + )) + self._session.merge( + Files( + id=f['id'], + md5=f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'), + size=f.get('contentProperties', {}).get('size', 0) + )) + + self._session.commit() logger.info('Inserted/updated %d file(s).' % len(files)) def insert_parentage(self, nodes: list, partial=True): @@ -132,14 +131,19 @@ def insert_parentage(self, nodes: list, partial=True): return if partial: - with mod_cursor(self._conn) as c: - for slice_ in gen_slice(nodes): - c.execute('DELETE FROM parentage WHERE child IN %s' % placeholders(slice_), - [n['id'] for n in slice_]) + for slice_ in gen_slice(nodes): + self._session.query(Parentage) \ + .filter(Parentage.child.in_([n['id'] for n in slice_])) \ + .delete(synchronize_session=False) + + self._session.commit() - with mod_cursor(self._conn) as c: - for n in nodes: - for p in n['parents']: - c.execute('INSERT OR IGNORE INTO parentage VALUES (?, ?)', [p, n['id']]) + for n in nodes: + for p in n['parents']: + self._session.merge(Parentage( + parent=p, + child=n['id'] + )) + self._session.commit() logger.info('Parented %d node(s).' % len(nodes)) diff --git a/acdcli/cache/templates/__init__.py b/acdcli/cache/templates/__init__.py new file mode 100644 index 0000000..53c46cd --- /dev/null +++ b/acdcli/cache/templates/__init__.py @@ -0,0 +1 @@ +__all__ = ["base", "metadata", "nodes", "labels", "files", "parentage"] diff --git a/acdcli/cache/templates/base.py b/acdcli/cache/templates/base.py new file mode 100644 index 0000000..860e542 --- /dev/null +++ b/acdcli/cache/templates/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() diff --git a/acdcli/cache/templates/files.py b/acdcli/cache/templates/files.py new file mode 100644 index 0000000..a2b0cee --- /dev/null +++ b/acdcli/cache/templates/files.py @@ -0,0 +1,9 @@ +from sqlalchemy import Column, String, ForeignKey, BigInteger +from .base import Base + + +class Files(Base): + __tablename__ = 'files' + id = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False, unique=True) + md5 = Column(String(32)) + size = Column(BigInteger) diff --git a/acdcli/cache/templates/labels.py b/acdcli/cache/templates/labels.py new file mode 100644 index 0000000..76c34b5 --- /dev/null +++ b/acdcli/cache/templates/labels.py @@ -0,0 +1,8 @@ +from sqlalchemy import Column, String, ForeignKey +from .base import Base + + +class Labels(Base): + __tablename__ = 'labels' + id = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) + name = Column(String(256), primary_key=True, nullable=False) diff --git a/acdcli/cache/templates/metadata.py b/acdcli/cache/templates/metadata.py new file mode 100644 index 0000000..e26d9b6 --- /dev/null +++ b/acdcli/cache/templates/metadata.py @@ -0,0 +1,8 @@ +from sqlalchemy import Column, String +from .base import Base + + +class Metadata(Base): + __tablename__ = 'metadata' + key = Column(String(64), primary_key=True, nullable=False) + value = Column(String()) diff --git a/acdcli/cache/templates/nodes.py b/acdcli/cache/templates/nodes.py new file mode 100644 index 0000000..f12da19 --- /dev/null +++ b/acdcli/cache/templates/nodes.py @@ -0,0 +1,54 @@ +import enum + +from sqlalchemy import Column, String, DateTime, Enum +from .base import Base + + +class Status(enum.Enum): + AVAILABLE = "AVAILABLE" + TRASH = "TRASH" + PURGED = "PURGED" + PENDING = "PENDING" + + +class Nodes(Base): + __tablename__ = 'nodes' + id = Column(String(50), primary_key=True, nullable=False, unique=True) + type = Column(String(15)) + name = Column(String(256)) + description = Column(String(500)) + created = Column(DateTime()) + modified = Column(DateTime()) + updated = Column(DateTime()) + status = Column(Enum(Status)) + + def __lt__(self, other): + return self.name < other.name + + def __hash__(self): + return hash(self.id) + + def __repr__(self): + return 'Node(%r, %r)' % (self.id, self.name) + + @property + def is_folder(self): + return self.type == 'folder' + + @property + def is_file(self): + return self.type == 'file' + + @property + def is_available(self): + return self.status == Status.AVAILABLE + + @property + def is_trashed(self): + return self.status == Status.TRASH + + @property + def simple_name(self): + if self.is_file: + return self.name + return (self.name if self.name else '') + '/' diff --git a/acdcli/cache/templates/parentage.py b/acdcli/cache/templates/parentage.py new file mode 100644 index 0000000..709e46c --- /dev/null +++ b/acdcli/cache/templates/parentage.py @@ -0,0 +1,8 @@ +from sqlalchemy import Column, String, ForeignKey +from .base import Base + + +class Parentage(Base): + __tablename__ = 'parentage' + parent = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) + child = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) diff --git a/setup.py b/setup.py index 97d2a0f..91850ac 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read(fname: str) -> str: StrictVersion(version) dependencies = ['appdirs', 'colorama', 'fusepy', 'python_dateutil', - 'requests>=2.1.0,!=2.9.0', 'requests_toolbelt!=0.5.0'] + 'requests>=2.1.0,!=2.9.0', 'requests_toolbelt!=0.5.0', 'sqlalchemy>=1.1.0b3'] doc_dependencies = ['sphinx_paramlinks'] test_dependencies = ['httpretty<0.8.11', 'mock'] diff --git a/tests/test_cache.py b/tests/test_cache.py index bba08dc..9a9ef94 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,7 +1,7 @@ import unittest import os -from acdcli.cache import db, schema +from acdcli.cache import db from .test_helper import gen_file, gen_folder, gen_bunch_of_nodes From 3f0762373a55192e0deab2addc50719061e93d84 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Tue, 4 Oct 2016 14:02:06 +0200 Subject: [PATCH 02/15] Fix mysql --- acdcli/cache/db.py | 2 -- acdcli/cache/schema.py | 1 + acdcli/cache/templates/labels.py | 2 +- acdcli/cache/templates/metadata.py | 2 +- acdcli/cache/templates/parentage.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py index aa9d803..3bea237 100644 --- a/acdcli/cache/db.py +++ b/acdcli/cache/db.py @@ -37,8 +37,6 @@ def __init__(self, cache_path: str='', settings_path=''): self.KeyValueStorage = KeyValueStorage(self._session) - self.KeyValueStorage.__setitem__("Hello", "die") - rootNodes = self._session.query(Nodes).filter(Nodes.name == None).all() if len(rootNodes) > 1: raise IntegrityError('Could not uniquely identify root node.') diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py index cd533e1..ac609a3 100644 --- a/acdcli/cache/schema.py +++ b/acdcli/cache/schema.py @@ -12,4 +12,5 @@ def init(self): base.Base.metadata.create_all(self._engine, checkfirst=True) def drop_all(self): + self._session.commit() base.Base.metadata.drop_all(self._engine) diff --git a/acdcli/cache/templates/labels.py b/acdcli/cache/templates/labels.py index 76c34b5..9db8017 100644 --- a/acdcli/cache/templates/labels.py +++ b/acdcli/cache/templates/labels.py @@ -5,4 +5,4 @@ class Labels(Base): __tablename__ = 'labels' id = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) - name = Column(String(256), primary_key=True, nullable=False) + name = Column(String(255), primary_key=True, nullable=False) diff --git a/acdcli/cache/templates/metadata.py b/acdcli/cache/templates/metadata.py index e26d9b6..fe37be5 100644 --- a/acdcli/cache/templates/metadata.py +++ b/acdcli/cache/templates/metadata.py @@ -5,4 +5,4 @@ class Metadata(Base): __tablename__ = 'metadata' key = Column(String(64), primary_key=True, nullable=False) - value = Column(String()) + value = Column(String(255)) diff --git a/acdcli/cache/templates/parentage.py b/acdcli/cache/templates/parentage.py index 709e46c..8f1d07e 100644 --- a/acdcli/cache/templates/parentage.py +++ b/acdcli/cache/templates/parentage.py @@ -4,5 +4,5 @@ class Parentage(Base): __tablename__ = 'parentage' - parent = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) + parent = Column(String(50), primary_key=True, nullable=False) child = Column(String(50), ForeignKey('nodes.id'), primary_key=True, nullable=False) From 5d5efb61c0b6000352b269a3b52c1f5afec51127 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Tue, 4 Oct 2016 14:34:18 +0200 Subject: [PATCH 03/15] Folder insertion --- acdcli/cache/sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index d258237..7ccc857 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -68,6 +68,7 @@ def insert_nodes(self, nodes: list, partial=True): and (not 'isRoot' in node or not node['isRoot']): logger.warning('Skipping non-root folder %s because its name is empty.' % node['id']) continue + folders.append(node) elif kind != 'ASSET': logger.warning('Cannot insert unknown node type "%s".' % kind) self.insert_folders(folders) From 1041c6a1a5c18530ab109bb970d2546b2b0dac25 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Tue, 4 Oct 2016 14:49:43 +0200 Subject: [PATCH 04/15] Resolving file size --- acdcli/cache/query.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index 556fd84..208443c 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -40,6 +40,10 @@ def resolve(self, path: str, trash=False) -> 'Union[Nodes|None]': if not trash: return if i + 1 == segments.__len__(): + if result.is_file: + result.size = self._session.query(Files.size) \ + .filter(Files.id == result.id) \ + .scalar() return result if result.is_folder: parent = result.id From 7175e7f9e939b11205d1ee252e4d3b6c643e78b5 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Tue, 4 Oct 2016 15:03:00 +0200 Subject: [PATCH 05/15] Fix querry on empty folder --- acdcli/cache/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index 208443c..f95d339 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -56,7 +56,7 @@ def childrens_names(self, folder_id) -> 'List[str]': .filter(Parentage.parent == folder_id) \ .filter(Nodes.status == Status.AVAILABLE) \ .all() - return list(list(zip(*names))[0]) + return [name[0] for name in names] def get_node_count(self) -> int: return self._session.query(func.count(Nodes.id)).scalar() From a2efab5bbd35c4708ea6e20e0de6b4f1a7b34e09 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Tue, 4 Oct 2016 15:59:12 +0200 Subject: [PATCH 06/15] Fix file information --- acdcli/cache/query.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index f95d339..abd6df0 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -9,7 +9,15 @@ class QueryMixin(object): def get_node(self, id) -> 'Union[Nodes|None]': - return self._session.query(Nodes).filter(Nodes.id == id) + result = self._session.query(Nodes).filter(Nodes.id == id).first() + return self.resolve_files_properties(result) + + def resolve_files_properties(self, node): + if node.is_file: + file = self._session.query(Files).filter(Files.id == node.id).first() + node.md5 = file.md5 + node.size = file.size + return node def get_root_node(self): return self.get_node(self.root_id) @@ -40,11 +48,7 @@ def resolve(self, path: str, trash=False) -> 'Union[Nodes|None]': if not trash: return if i + 1 == segments.__len__(): - if result.is_file: - result.size = self._session.query(Files.size) \ - .filter(Files.id == result.id) \ - .scalar() - return result + return self.resolve_files_properties(result) if result.is_folder: parent = result.id else: From 6e1b3855710899f9da5e57f983bbc3024966cbbc Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Wed, 5 Oct 2016 02:07:13 +0200 Subject: [PATCH 07/15] Fixing tests and removing sqlite db --- acdcli/cache/query.py | 9 +++++---- acdcli/cache/schema.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index abd6df0..0a29433 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -13,7 +13,7 @@ def get_node(self, id) -> 'Union[Nodes|None]': return self.resolve_files_properties(result) def resolve_files_properties(self, node): - if node.is_file: + if node is not None and node.is_file: file = self._session.query(Files).filter(Files.id == node.id).first() node.md5 = file.md5 node.size = file.size @@ -72,7 +72,8 @@ def get_folders_count(self) -> int: return self._session.query(func.count(Nodes.id)).filter(Nodes.type == "folder").scalar() def calculate_usage(self): - return self._session.query(func.sum(Files.size)).scalar() + usage = self._session.query(func.sum(Files.size)).scalar() + return 0 if usage is None else usage def num_children(self, folder_id) -> int: return self._session.query(func.count(Nodes.name)) \ @@ -82,8 +83,8 @@ def num_children(self, folder_id) -> int: .scalar() def num_parents(self, node_id) -> int: - return self._session.query(func.count(Nodes.name)) \ - .join(Parentage, Parentage.parent == Nodes.id) \ + return self._session.query(func.count(Parentage.parent)) \ + .join(Nodes, Nodes.id == Parentage.parent) \ .filter(Parentage.child == node_id) \ .filter(Nodes.status == Status.AVAILABLE) \ .scalar() diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py index ac609a3..bc0dd7b 100644 --- a/acdcli/cache/schema.py +++ b/acdcli/cache/schema.py @@ -1,3 +1,4 @@ +import os import logging from .templates import * @@ -14,3 +15,17 @@ def init(self): def drop_all(self): self._session.commit() base.Base.metadata.drop_all(self._engine) + return True + + def remove_db_file(self): + if self._conf["database"]["url"].startswith("sqlite://"): + try: + os.remove(self._conf["database"]["url"][9:]) + logger.info('Database removed.') + except OSError: + logger.info('Database file was not deleted.') + return False + else: + logger.info('''Database is not sqlite, can't remove db file, droping all tables.''') + self.drop_all() + return True From 54347d1e156b80f292bfb29052d5518441ff6ca4 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Wed, 5 Oct 2016 14:22:46 +0200 Subject: [PATCH 08/15] Fix sqlite multithread --- acdcli/cache/db.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py index 3bea237..00e6d29 100644 --- a/acdcli/cache/db.py +++ b/acdcli/cache/db.py @@ -1,6 +1,7 @@ import logging import configparser from sqlalchemy import create_engine +from sqlalchemy.pool import StaticPool from sqlalchemy.orm import sessionmaker from acdcli.utils.conf import get_conf @@ -29,7 +30,12 @@ class NodeCache(SchemaMixin, QueryMixin, FormatterMixin, SyncMixin): def __init__(self, cache_path: str='', settings_path=''): self.init_config(cache_path, settings_path) - self._engine = create_engine(self._conf["database"]["url"]) + if self._conf["database"]["url"].startswith("sqlite"): + self._engine = create_engine(self._conf["database"]["url"], + connect_args={'check_same_thread': False}, + poolclass=StaticPool) + else: + self._engine = create_engine(self._conf["database"]["url"]) self.init() self._DBSession = sessionmaker(bind=self._engine) From 66616f88f256aa90079df57f3bb8cbfd9f366c00 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Wed, 5 Oct 2016 19:18:38 +0200 Subject: [PATCH 09/15] Using core SQLAlchemy request for sync --- acdcli/cache/sync.py | 75 +++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index 7ccc857..edf878f 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -85,46 +85,42 @@ def insert_folders(self, folders: list): if not folders: return - for f in folders: - self._session.merge( - Nodes( - id=f['id'], - type="folder", - name=f.get('name'), - description=f.get('description'), - created=iso_date.parse(f['createdDate']), - modified=iso_date.parse(f['modifiedDate']), - updated=datetime.utcnow(), - status=Status(f['status']) - )) - - self._session.commit() + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f['id'], + "type": "folder", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in folders]) logger.info('Inserted/updated %d folder(s).' % len(folders)) def insert_files(self, files: list): if not files: return - for f in files: - self._session.merge( - Nodes( - id=f['id'], - type="file", - name=f.get('name'), - description=f.get('description'), - created=iso_date.parse(f['createdDate']), - modified=iso_date.parse(f['modifiedDate']), - updated=datetime.utcnow(), - status=(Status(f['status'])) - )) - self._session.merge( - Files( - id=f['id'], - md5=f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'), - size=f.get('contentProperties', {}).get('size', 0) - )) + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f["id"], + "type": "file", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in files]) + + self._engine.execute(Files.__table__.insert(), [ + { + "id": f['id'], + "md5": f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'), + "size": f.get('contentProperties', {}).get('size', 0) + } for f in files]) - self._session.commit() logger.info('Inserted/updated %d file(s).' % len(files)) def insert_parentage(self, nodes: list, partial=True): @@ -139,12 +135,11 @@ def insert_parentage(self, nodes: list, partial=True): self._session.commit() - for n in nodes: - for p in n['parents']: - self._session.merge(Parentage( - parent=p, - child=n['id'] - )) + req = [{ + "parent": p, + "child": n['id'] + } for n in nodes for p in n['parents']] + if len(req) > 0: + self._engine.execute(Parentage.__table__.insert(), req) - self._session.commit() logger.info('Parented %d node(s).' % len(nodes)) From 0e26c7c1dcded8d5484c5c5fe24236e2c7d7a069 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Fri, 4 Nov 2016 16:54:12 +0100 Subject: [PATCH 10/15] Fix duplicate UNIQUE column on sync --- acdcli/cache/sync.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index edf878f..c1805de 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -85,6 +85,10 @@ def insert_folders(self, folders: list): if not folders: return + for i in range(0, len(folders), 500): + c = folders[i:i + 500] + self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) + self._engine.execute(Nodes.__table__.insert(), [ { "id": f['id'], @@ -102,6 +106,11 @@ def insert_files(self, files: list): if not files: return + for i in range(0, len(files), 500): + c = files[i:i + 500] + self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) + self._session.execute(Files.__table__.delete().where(Files.id.in_([f["id"] for f in c]))) + self._engine.execute(Nodes.__table__.insert(), [ { "id": f["id"], From 78653219700b0af19ab5bbc2c0b8b8339770a84e Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Fri, 4 Nov 2016 17:53:28 +0100 Subject: [PATCH 11/15] Always use StaticPool --- acdcli/cache/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py index 00e6d29..d28f582 100644 --- a/acdcli/cache/db.py +++ b/acdcli/cache/db.py @@ -35,7 +35,7 @@ def __init__(self, cache_path: str='', settings_path=''): connect_args={'check_same_thread': False}, poolclass=StaticPool) else: - self._engine = create_engine(self._conf["database"]["url"]) + self._engine = create_engine(self._conf["database"]["url"], poolclass=StaticPool) self.init() self._DBSession = sessionmaker(bind=self._engine) From 12ae0688dfb83565f14543d2a7fceb81a0363b45 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Fri, 4 Nov 2016 17:55:20 +0100 Subject: [PATCH 12/15] Chunked sync insert --- acdcli/cache/sync.py | 59 ++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index c1805de..cbf692c 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -89,17 +89,18 @@ def insert_folders(self, folders: list): c = folders[i:i + 500] self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) - self._engine.execute(Nodes.__table__.insert(), [ - { - "id": f['id'], - "type": "folder", - "name": f.get('name'), - "description": f.get('description'), - "created": iso_date.parse(f['createdDate']), - "modified": iso_date.parse(f['modifiedDate']), - "updated": datetime.utcnow(), - "status": Status(f['status']) - } for f in folders]) + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f['id'], + "type": "folder", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c]) + logger.info('Inserted/updated %d folder(s).' % len(folders)) def insert_files(self, files: list): @@ -111,24 +112,24 @@ def insert_files(self, files: list): self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) self._session.execute(Files.__table__.delete().where(Files.id.in_([f["id"] for f in c]))) - self._engine.execute(Nodes.__table__.insert(), [ - { - "id": f["id"], - "type": "file", - "name": f.get('name'), - "description": f.get('description'), - "created": iso_date.parse(f['createdDate']), - "modified": iso_date.parse(f['modifiedDate']), - "updated": datetime.utcnow(), - "status": Status(f['status']) - } for f in files]) - - self._engine.execute(Files.__table__.insert(), [ - { - "id": f['id'], - "md5": f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'), - "size": f.get('contentProperties', {}).get('size', 0) - } for f in files]) + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f["id"], + "type": "file", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c]) + + self._engine.execute(Files.__table__.insert(), [ + { + "id": f['id'], + "md5": f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'), + "size": f.get('contentProperties', {}).get('size', 0) + } for f in c]) logger.info('Inserted/updated %d file(s).' % len(files)) From 177ed920ae292536c7be5500d6a46fcee50797d0 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Fri, 4 Nov 2016 23:13:34 +0100 Subject: [PATCH 13/15] First select available nodes --- acdcli/cache/query.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py index 0a29433..38619ed 100644 --- a/acdcli/cache/query.py +++ b/acdcli/cache/query.py @@ -40,7 +40,9 @@ def resolve(self, path: str, trash=False) -> 'Union[Nodes|None]': result = self._session.query(Nodes) \ .join(Parentage, Parentage.child == Nodes.id) \ .filter(Parentage.parent == parent) \ - .filter(Nodes.name == segment).first() + .filter(Nodes.name == segment) \ + .order_by(Nodes.status) \ + .first() if not result: return From f7fa2730c0f8990c25604922e4425817d5e9e963 Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Sat, 5 Nov 2016 00:04:55 +0100 Subject: [PATCH 14/15] Updating existing nodes --- acdcli/cache/sync.py | 86 ++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index cbf692c..834e434 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -85,21 +85,34 @@ def insert_folders(self, folders: list): if not folders: return - for i in range(0, len(folders), 500): - c = folders[i:i + 500] - self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) - - self._engine.execute(Nodes.__table__.insert(), [ - { - "id": f['id'], - "type": "folder", - "name": f.get('name'), - "description": f.get('description'), - "created": iso_date.parse(f['createdDate']), - "modified": iso_date.parse(f['modifiedDate']), - "updated": datetime.utcnow(), - "status": Status(f['status']) - } for f in c]) + for i in range(0, len(folders), 900): + c = folders[i:i + 900] + update = [id[0] for id in self._session.query(Nodes.id).filter(Nodes.id.in_([f["id"] for f in c])).all()] + + if len(update) > 0: + self._engine.execute(Nodes.__table__.update().where(Nodes.id.in_(update)), [ + { + "type": "folder", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c if f['id'] in update]) + + if len(update) < len(c): + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f['id'], + "type": "folder", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c if f['id'] not in update]) logger.info('Inserted/updated %d folder(s).' % len(folders)) @@ -107,22 +120,35 @@ def insert_files(self, files: list): if not files: return - for i in range(0, len(files), 500): - c = files[i:i + 500] - self._session.execute(Nodes.__table__.delete().where(Nodes.id.in_([f["id"] for f in c]))) + for i in range(0, len(files), 900): + c = files[i:i + 900] self._session.execute(Files.__table__.delete().where(Files.id.in_([f["id"] for f in c]))) - - self._engine.execute(Nodes.__table__.insert(), [ - { - "id": f["id"], - "type": "file", - "name": f.get('name'), - "description": f.get('description'), - "created": iso_date.parse(f['createdDate']), - "modified": iso_date.parse(f['modifiedDate']), - "updated": datetime.utcnow(), - "status": Status(f['status']) - } for f in c]) + update = [id[0] for id in self._session.query(Nodes.id).filter(Nodes.id.in_([f["id"] for f in c])).all()] + + if len(update) > 0: + self._engine.execute(Nodes.__table__.update().where(Nodes.id.in_(update)), [ + { + "type": "file", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c if f['id'] in update]) + + if len(update) < len(c): + self._engine.execute(Nodes.__table__.insert(), [ + { + "id": f["id"], + "type": "file", + "name": f.get('name'), + "description": f.get('description'), + "created": iso_date.parse(f['createdDate']), + "modified": iso_date.parse(f['modifiedDate']), + "updated": datetime.utcnow(), + "status": Status(f['status']) + } for f in c if f['id'] not in update]) self._engine.execute(Files.__table__.insert(), [ { From 9eca05033639ed72e41d7618c6ae0874db88505f Mon Sep 17 00:00:00 2001 From: DROUARD Benjamin Date: Sat, 5 Nov 2016 01:02:08 +0100 Subject: [PATCH 15/15] Update binding params --- acdcli/cache/sync.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py index 834e434..75e6cc1 100644 --- a/acdcli/cache/sync.py +++ b/acdcli/cache/sync.py @@ -3,6 +3,7 @@ from datetime import datetime import dateutil.parser as iso_date +from sqlalchemy import bindparam from .templates.nodes import Nodes, Status from .templates.files import Files @@ -90,7 +91,7 @@ def insert_folders(self, folders: list): update = [id[0] for id in self._session.query(Nodes.id).filter(Nodes.id.in_([f["id"] for f in c])).all()] if len(update) > 0: - self._engine.execute(Nodes.__table__.update().where(Nodes.id.in_(update)), [ + self._engine.execute(Nodes.__table__.update().where(Nodes.id == bindparam("where_id")), [ { "type": "folder", "name": f.get('name'), @@ -98,7 +99,8 @@ def insert_folders(self, folders: list): "created": iso_date.parse(f['createdDate']), "modified": iso_date.parse(f['modifiedDate']), "updated": datetime.utcnow(), - "status": Status(f['status']) + "status": Status(f['status']), + "where_id": f["id"] } for f in c if f['id'] in update]) if len(update) < len(c): @@ -126,7 +128,7 @@ def insert_files(self, files: list): update = [id[0] for id in self._session.query(Nodes.id).filter(Nodes.id.in_([f["id"] for f in c])).all()] if len(update) > 0: - self._engine.execute(Nodes.__table__.update().where(Nodes.id.in_(update)), [ + self._engine.execute(Nodes.__table__.update().where(Nodes.id == bindparam("where_id")), [ { "type": "file", "name": f.get('name'), @@ -134,7 +136,8 @@ def insert_files(self, files: list): "created": iso_date.parse(f['createdDate']), "modified": iso_date.parse(f['modifiedDate']), "updated": datetime.utcnow(), - "status": Status(f['status']) + "status": Status(f['status']), + "where_id": f["id"] } for f in c if f['id'] in update]) if len(update) < len(c):