diff --git a/changelog.md b/changelog.md index 1ae07cfd..91e7a2cb 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,8 @@ Features: --------- * Add ability to specify alternative myclirc file. (Thanks: [Dick Marinus]). +* Add new display formats for pretty printing query results. (Thanks: [Amjith + Ramanujam], [Dick Marinus], [Thomas Roten]). * Add logic to shorten the default prompt if it becomes too long once generated. (Thanks: [John Sterling]). Bug Fixes: @@ -16,6 +18,7 @@ Bug Fixes: * Fix requirements and remove old compatibility code (Thanks: [Dick Marinus]) * Fix bug where mycli would not start due to the thanks/credit intro text. (Thanks: [Thomas Roten]). +* Use pymysql default conversions (issue #375). (Thanks: [Dick Marinus]). Internal Changes: ----------------- diff --git a/mycli/completion_refresher.py b/mycli/completion_refresher.py index 33afa009..2bbe32d0 100644 --- a/mycli/completion_refresher.py +++ b/mycli/completion_refresher.py @@ -13,9 +13,8 @@ def __init__(self): self._completer_thread = None self._restart_refresh = threading.Event() - def refresh(self, executor, callbacks): - """ - Creates a SQLCompleter object and populates it with the relevant + def refresh(self, executor, callbacks, completer_options={}): + """Creates a SQLCompleter object and populates it with the relevant completion suggestions in a background thread. executor - SQLExecute object, used to extract the credentials to connect @@ -23,14 +22,17 @@ def refresh(self, executor, callbacks): callbacks - A function or a list of functions to call after the thread has completed the refresh. The newly created completion object will be passed in as an argument to each callback. + completer_options - dict of options to pass to SQLCompleter. + """ if self.is_refreshing(): self._restart_refresh.set() return [(None, None, None, 'Auto-completion refresh restarted.')] else: - self._completer_thread = threading.Thread(target=self._bg_refresh, - args=(executor, callbacks), - name='completion_refresh') + self._completer_thread = threading.Thread( + target=self._bg_refresh, + args=(executor, callbacks, completer_options), + name='completion_refresh') self._completer_thread.setDaemon(True) self._completer_thread.start() return [(None, None, None, @@ -39,8 +41,8 @@ def refresh(self, executor, callbacks): def is_refreshing(self): return self._completer_thread and self._completer_thread.is_alive() - def _bg_refresh(self, sqlexecute, callbacks): - completer = SQLCompleter(smart_completion=True) + def _bg_refresh(self, sqlexecute, callbacks, completer_options): + completer = SQLCompleter(**completer_options) # Create a new pgexecute method to popoulate the completions. e = sqlexecute diff --git a/mycli/encodingutils.py b/mycli/encodingutils.py index 29564d08..1a8b5bbb 100644 --- a/mycli/encodingutils.py +++ b/mycli/encodingutils.py @@ -1,24 +1,58 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import binascii import sys PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] == 3 +if PY2: + text_type = unicode + binary_type = str +else: + text_type = str + binary_type = bytes + + def unicode2utf8(arg): - """ - Only in Python 2. Psycopg2 expects the args as bytes not unicode. - In Python 3 the args are expected as unicode. + """Convert strings to UTF8-encoded bytes. + + Only in Python 2. In Python 3 the args are expected as unicode. + """ - if PY2 and isinstance(arg, unicode): + if PY2 and isinstance(arg, text_type): return arg.encode('utf-8') return arg + def utf8tounicode(arg): - """ - Only in Python 2. Psycopg2 returns the error message as utf-8. - In Python 3 the errors are returned as unicode. + """Convert UTF8-encoded bytes to strings. + + Only in Python 2. In Python 3 the errors are returned as strings. + """ - if PY2 and isinstance(arg, str): + if PY2 and isinstance(arg, binary_type): return arg.decode('utf-8') return arg + + +def bytes_to_string(b): + """Convert bytes to a string. Hexlify bytes that can't be decoded. + + >>> print(bytes_to_string(b"\\xff")) + 0xff + >>> print(bytes_to_string('abc')) + abc + >>> print(bytes_to_string('✌')) + ✌ + + """ + if isinstance(b, binary_type): + try: + return b.decode('utf8') + except UnicodeDecodeError: + return '0x' + binascii.hexlify(b).decode('ascii') + return b diff --git a/mycli/main.py b/mycli/main.py index 0b209b73..ff3e8d9c 100755 --- a/mycli/main.py +++ b/mycli/main.py @@ -5,9 +5,7 @@ import os import os.path import sys -import csv import traceback -import socket import logging import threading from time import time @@ -15,12 +13,6 @@ from random import choice from io import open -# support StringIO for Python 2 and 3 -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - import click import sqlparse from prompt_toolkit import CommandLineInterface, Application, AbortAction @@ -33,11 +25,8 @@ ConditionalProcessor) from prompt_toolkit.history import FileHistory from pygments.token import Token -from configobj import ConfigObj, ConfigObjError -from .packages.tabulate import tabulate, table_formats -from .packages.expanded import expanded_table -from .packages.special.main import (COMMANDS, NO_QUERY) +from .packages.special.main import NO_QUERY import mycli.packages.special as special from .sqlcompleter import SQLCompleter from .clitoolbar import create_toolbar_tokens_func @@ -46,9 +35,9 @@ from .clibuffer import CLIBuffer from .completion_refresher import CompletionRefresher from .config import (write_default_config, get_mylogin_cnf_path, - open_mylogin_cnf, read_config_file, - read_config_files, str_to_bool) + open_mylogin_cnf, read_config_files, str_to_bool) from .key_bindings import mycli_bindings +from .output_formatter import output_formatter from .encodingutils import utf8tounicode from .lexer import MyCliLexer from .__init__ import __version__ @@ -118,7 +107,8 @@ def __init__(self, sqlexecute=None, prompt=None, self.multi_line = c['main'].as_bool('multi_line') self.key_bindings = c['main']['key_bindings'] special.set_timing_enabled(c['main'].as_bool('timing')) - self.table_format = c['main']['table_format'] + self.formatter = output_formatter.OutputFormatter( + format_name=c['main']['table_format']) self.syntax_style = c['main']['syntax_style'] self.less_chatty = c['main'].as_bool('less_chatty') self.cli_style = c['colors'] @@ -157,7 +147,9 @@ def __init__(self, sqlexecute=None, prompt=None, # Initialize completer. self.smart_completion = c['main'].as_bool('smart_completion') - self.completer = SQLCompleter(self.smart_completion) + self.completer = SQLCompleter( + self.smart_completion, + supported_formats=self.formatter.supported_formats()) self._completer_lock = threading.Lock() # Register custom special commands. @@ -192,14 +184,16 @@ def register_special_commands(self): '\\R', 'Change prompt format.', aliases=('\\R',), case_sensitive=True) def change_table_format(self, arg, **_): - if not arg in table_formats(): - msg = "Table type %s not yet implemented. Allowed types:" % arg - for table_type in table_formats(): - msg += "\n\t%s" % table_type + try: + self.formatter.set_format_name(arg) + yield (None, None, None, + 'Changed table type to {}'.format(arg)) + except ValueError: + msg = 'Table type {} not yet implemented. Allowed types:'.format( + arg) + for table_type in self.formatter.supported_formats(): + msg += "\n\t{}".format(table_type) yield (None, None, None, msg) - else: - self.table_format = arg - yield (None, None, None, "Changed table Type to %s" % self.table_format) def change_db(self, arg, **_): if arg is None: @@ -537,9 +531,9 @@ def one_iteration(document=None): else: max_width = None - formatted = format_output(title, cur, headers, - status, self.table_format, - special.is_expanded_output(), max_width) + formatted = self.format_output(title, cur, headers, status, + special.is_expanded_output(), + max_width) output.extend(formatted) end = time() @@ -676,8 +670,10 @@ def refresh_completions(self, reset=False): if reset: with self._completer_lock: self.completer.reset_completions() - self.completion_refresher.refresh(self.sqlexecute, - self._on_completions_refreshed) + self.completion_refresher.refresh( + self.sqlexecute, self._on_completions_refreshed, + {'smart_completion': self.smart_completion, + 'supported_formats': self.formatter.supported_formats()}) return [(None, None, None, 'Auto-completion refresh started in the background.')] @@ -719,15 +715,41 @@ def get_prompt(self, string): string = string.replace('\\_', ' ') return string - def run_query(self, query, table_format=None, new_line=True): - """Runs query""" + def run_query(self, query, new_line=True): + """Runs *query*.""" results = self.sqlexecute.run(query) for result in results: title, cur, headers, status = result - output = format_output(title, cur, headers, None, table_format) + output = self.format_output(title, cur, headers, None) for line in output: click.echo(line, nl=new_line) + def format_output(self, title, cur, headers, status, expanded=False, + max_width=None): + expanded = expanded or self.formatter.get_format_name() == 'expanded' + output = [] + + if title: # Only print the title if it's not None. + output.append(title) + + if cur: + rows = list(cur) + formatted = self.formatter.format_output( + rows, headers, format_name='expanded' if expanded else None) + + if (not expanded and max_width and rows and + content_exceeds_width(rows[0], max_width) and headers): + formatted = self.formatter.format_output( + rows, headers, format_name='expanded') + + output.append(formatted) + + if status: # Only print the status if it's not None. + output.append(status) + + return output + + @click.command() @click.option('-h', '--host', envvar='MYSQL_HOST', help='Host address of the database.') @click.option('-P', '--port', envvar='MYSQL_TCP_PORT', type=int, help='Port number to use for connection. Honors ' @@ -825,12 +847,12 @@ def cli(database, user, host, port, socket, password, dbname, # --execute argument if execute: try: - table_format = None - if table: - table_format = mycli.table_format - elif csv: - table_format = 'csv' - mycli.run_query(execute, table_format=table_format) + if csv: + mycli.formatter.set_format_name('csv') + elif not table: + mycli.formatter.set_format_name('tsv') + + mycli.run_query(execute) exit(0) except Exception as e: click.secho(str(e), err=True, fg='red') @@ -851,58 +873,21 @@ def cli(database, user, host, port, socket, password, dbname, confirm_destructive_query(stdin_text) is False): exit(0) try: - table_format = None new_line = True if csv: - table_format = 'csv' + mycli.formatter.set_format_name('csv') new_line = False - elif table: - table_format = mycli.table_format + elif not table: + mycli.formatter.set_format_name('tsv') - mycli.run_query(stdin_text, table_format=table_format, new_line=new_line) + mycli.run_query(stdin_text, new_line=new_line) exit(0) except Exception as e: click.secho(str(e), err=True, fg='red') exit(1) -def format_output(title, cur, headers, status, table_format, expanded=False, max_width=None): - output = [] - if title: # Only print the title if it's not None. - output.append(title) - if cur: - headers = [utf8tounicode(x) for x in headers] - table_format = 'tsv' if table_format is None else table_format - - if expanded: - output.append(expanded_table(cur, headers)) - elif table_format == 'csv': - content = StringIO() - writer = csv.writer(content) - writer.writerow(headers) - - for row in cur: - row = ['null' if val is None else str(val) for val in row] - writer.writerow(row) - - output.append(content.getvalue()) - content.close() - else: - rows = list(cur) - tabulated, frows = tabulate(rows, headers, tablefmt=table_format, - missingval='') - if (max_width and rows and - content_exceeds_width(frows[0], max_width) and - headers): - output.append(expanded_table(rows, headers)) - else: - output.append(tabulated) - if status: # Only print the status if it's not None. - output.append(status) - - return output - def content_exceeds_width(row, width): # Account for 3 characters between each column separator_space = (len(row)*3) diff --git a/mycli/myclirc b/mycli/myclirc index baf50510..01a11426 100644 --- a/mycli/myclirc +++ b/mycli/myclirc @@ -30,10 +30,11 @@ log_level = INFO # Timing of sql statments and table rendering. timing = True -# Table format. Possible values: psql, plain, simple, grid, fancy_grid, pipe, -# orgtbl, rst, mediawiki, html, latex, latex_booktabs, tsv. -# Recommended: psql, fancy_grid and grid. -table_format = psql +# Table format. Possible values: ascii, double, github, +# psql, plain, simple, grid, fancy_grid, pipe, orgtbl, rst, mediawiki, html, +# latex, latex_booktabs, textile, moinmoin, jira, expanded, tsv, csv. +# Recommended: ascii +table_format = ascii # Syntax coloring style. Possible values (many support the "-dark" suffix): # manni, igor, xcode, vim, autumn, vs, rrt, native, perldoc, borland, tango, emacs, @@ -66,7 +67,7 @@ less_chatty = False login_path_as_host = False # Cause result sets to be displayed vertically if they are too wide for the current window, -# and using normal tabular format otherwise. (This applies to statements terminated by ; or \G.) +# and using normal tabular format otherwise. (This applies to statements terminated by ; or \G.) auto_vertical_output = False # Custom colors for the completion menu, toolbar, etc. diff --git a/mycli/output_formatter/__init__.py b/mycli/output_formatter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mycli/output_formatter/delimited_output_adapter.py b/mycli/output_formatter/delimited_output_adapter.py new file mode 100644 index 00000000..a01a2843 --- /dev/null +++ b/mycli/output_formatter/delimited_output_adapter.py @@ -0,0 +1,28 @@ +import contextlib +import csv +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + +from .preprocessors import override_missing_value, bytes_to_string + +supported_formats = ('csv', 'tsv') +preprocessors = (override_missing_value, bytes_to_string) + + +def adapter(data, headers, table_format='csv', **_): + """Wrap CSV formatting inside a standard function for OutputFormatter.""" + with contextlib.closing(StringIO()) as content: + if table_format == 'csv': + writer = csv.writer(content, delimiter=',') + elif table_format == 'tsv': + writer = csv.writer(content, delimiter='\t') + else: + raise ValueError('Invalid table_format specified.') + + writer.writerow(headers) + for row in data: + writer.writerow(row) + + return content.getvalue() diff --git a/mycli/output_formatter/expanded.py b/mycli/output_formatter/expanded.py new file mode 100644 index 00000000..f77c1ee3 --- /dev/null +++ b/mycli/output_formatter/expanded.py @@ -0,0 +1,34 @@ +"""Format data into a vertical, expanded table layout.""" + +from __future__ import unicode_literals + + +def get_separator(num): + """Get a row separator for row *num*.""" + return "{divider}[ {n}. row ]{divider}\n".format( + divider='*' * 27, n=num + 1) + + +def format_row(headers, row): + """Format a row.""" + formatted_row = [' | '.join(field) for field in zip(headers, row)] + return '\n'.join(formatted_row) + + +def expanded_table(rows, headers, **_): + """Format *rows* and *headers* as an expanded table. + + The values in *rows* and *headers* must be strings. + + """ + header_len = max([len(x) for x in headers]) + padded_headers = [x.ljust(header_len) for x in headers] + formatted_rows = [format_row(padded_headers, row) for row in rows] + + output = [] + for i, result in enumerate(formatted_rows): + output.append(get_separator(i)) + output.append(result) + output.append('\n') + + return ''.join(output) diff --git a/mycli/output_formatter/output_formatter.py b/mycli/output_formatter/output_formatter.py new file mode 100644 index 00000000..61e3c8d5 --- /dev/null +++ b/mycli/output_formatter/output_formatter.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +"""A generic output formatter interface.""" + +from __future__ import unicode_literals +from collections import namedtuple + +from .expanded import expanded_table +from .preprocessors import (override_missing_value, convert_to_string) + +from . import delimited_output_adapter +from . import tabulate_adapter +from . import terminaltables_adapter + +MISSING_VALUE = '' + +OutputFormatHandler = namedtuple( + 'OutputFormatHandler', + 'format_name preprocessors formatter formatter_args') + + +class OutputFormatter(object): + """A class with a standard interface for various formatting libraries.""" + + _output_formats = {} + + def __init__(self, format_name=None): + """Set the default *format_name*.""" + self._format_name = format_name + + def set_format_name(self, format_name): + """Set the OutputFormatter's default format.""" + if format_name in self.supported_formats(): + self._format_name = format_name + else: + raise ValueError('unrecognized format_name: {}'.format( + format_name)) + + def get_format_name(self): + """Get the OutputFormatter's default format.""" + return self._format_name + + def supported_formats(self): + """Return the supported output format names.""" + return tuple(self._output_formats.keys()) + + @classmethod + def register_new_formatter(cls, format_name, handler, preprocessors=(), + kwargs={}): + """Register a new formatter to format the output.""" + cls._output_formats[format_name] = OutputFormatHandler( + format_name, preprocessors, handler, kwargs) + + def format_output(self, data, headers, format_name=None, **kwargs): + """Format the headers and data using a specific formatter. + + *format_name* must be a formatter available in `supported_formats()`. + + All keyword arguments are passed to the specified formatter. + + """ + format_name = format_name or self._format_name + if format_name not in self.supported_formats(): + raise ValueError('unrecognized format: {}'.format(format_name)) + + (_, preprocessors, formatter, + fkwargs) = self._output_formats[format_name] + fkwargs.update(kwargs) + if preprocessors: + for f in preprocessors: + data, headers = f(data, headers, **fkwargs) + return formatter(data, headers, **fkwargs) + + +OutputFormatter.register_new_formatter('expanded', expanded_table, + (override_missing_value, + convert_to_string), + {'missing_value': MISSING_VALUE}) + +for delimiter_format in delimited_output_adapter.supported_formats: + OutputFormatter.register_new_formatter( + delimiter_format, delimited_output_adapter.adapter, + delimited_output_adapter.preprocessors, + {'table_format': delimiter_format, 'missing_value': MISSING_VALUE}) + +for tabulate_format in tabulate_adapter.supported_formats: + OutputFormatter.register_new_formatter( + tabulate_format, tabulate_adapter.adapter, + tabulate_adapter.preprocessors, + {'table_format': tabulate_format, 'missing_value': MISSING_VALUE}) + +for terminaltables_format in terminaltables_adapter.supported_formats: + OutputFormatter.register_new_formatter( + terminaltables_format, terminaltables_adapter.adapter, + terminaltables_adapter.preprocessors, + {'table_format': terminaltables_format, 'missing_value': MISSING_VALUE}) diff --git a/mycli/output_formatter/preprocessors.py b/mycli/output_formatter/preprocessors.py new file mode 100644 index 00000000..6f2e459c --- /dev/null +++ b/mycli/output_formatter/preprocessors.py @@ -0,0 +1,87 @@ +from decimal import Decimal + +from mycli import encodingutils + + +def to_string(value): + """Convert *value* to a string.""" + if isinstance(value, encodingutils.binary_type): + return encodingutils.bytes_to_string(value) + else: + return encodingutils.text_type(value) + + +def convert_to_string(data, headers, **_): + """Convert all *data* and *headers* to strings.""" + return ([[to_string(v) for v in row] for row in data], + [to_string(h) for h in headers]) + + +def override_missing_value(data, headers, missing_value='', **_): + """Override missing values in the data with *missing_value*.""" + return ([[missing_value if v is None else v for v in row] for row in data], + headers) + + +def bytes_to_string(data, headers, **_): + """Convert all *data* and *headers* bytes to strings.""" + return ([[encodingutils.bytes_to_string(v) for v in row] for row in data], + [encodingutils.bytes_to_string(h) for h in headers]) + + +def intlen(value): + """Find (character) length. + + >>> intlen('11.1') + 2 + >>> intlen('11') + 2 + >>> intlen('1.1') + 1 + + """ + pos = value.find('.') + if pos < 0: + pos = len(value) + return pos + + +def align_decimals(data, headers, **_): + """Align decimals to decimal point.""" + pointpos = len(headers) * [0] + for row in data: + for i, v in enumerate(row): + if isinstance(v, Decimal): + v = encodingutils.text_type(v) + pointpos[i] = max(intlen(v), pointpos[i]) + results = [] + for row in data: + result = [] + for i, v in enumerate(row): + if isinstance(v, Decimal): + v = encodingutils.text_type(v) + result.append((pointpos[i] - intlen(v)) * " " + v) + else: + result.append(v) + results.append(result) + return results, headers + + +def quote_whitespaces(data, headers, quotestyle="'", **_): + """Quote leading/trailing whitespace.""" + quote = len(headers) * [False] + for row in data: + for i, v in enumerate(row): + v = encodingutils.text_type(v) + if v.startswith(' ') or v.endswith(' '): + quote[i] = True + + results = [] + for row in data: + result = [] + for i, v in enumerate(row): + quotation = quotestyle if quote[i] else '' + result.append('{quotestyle}{value}{quotestyle}'.format( + quotestyle=quotation, value=v)) + results.append(result) + return results, headers diff --git a/mycli/output_formatter/tabulate_adapter.py b/mycli/output_formatter/tabulate_adapter.py new file mode 100644 index 00000000..b89dcc0b --- /dev/null +++ b/mycli/output_formatter/tabulate_adapter.py @@ -0,0 +1,22 @@ +from mycli.packages import tabulate +from .preprocessors import bytes_to_string, align_decimals + +tabulate.PRESERVE_WHITESPACE = True + +supported_markup_formats = ('mediawiki', 'html', 'latex', 'latex_booktabs', + 'textile', 'moinmoin', 'jira') +supported_table_formats = ('plain', 'simple', 'grid', 'fancy_grid', 'pipe', + 'orgtbl', 'psql', 'rst') +supported_formats = supported_markup_formats + supported_table_formats + +preprocessors = (bytes_to_string, align_decimals) + + +def adapter(data, headers, table_format=None, missing_value='', **_): + """Wrap tabulate inside a standard function for OutputFormatter.""" + kwargs = {'tablefmt': table_format, 'missingval': missing_value, + 'disable_numparse': True} + if table_format in supported_markup_formats: + kwargs.update(numalign=None, stralign=None) + + return tabulate.tabulate(data, headers, **kwargs) diff --git a/mycli/output_formatter/terminaltables_adapter.py b/mycli/output_formatter/terminaltables_adapter.py new file mode 100644 index 00000000..a8f50f98 --- /dev/null +++ b/mycli/output_formatter/terminaltables_adapter.py @@ -0,0 +1,25 @@ +import terminaltables + +from .preprocessors import (bytes_to_string, align_decimals, + override_missing_value) + +supported_formats = ('ascii', 'double', 'github') +preprocessors = (bytes_to_string, override_missing_value, align_decimals) + + +def adapter(data, headers, table_format=None, **_): + """Wrap terminaltables inside a standard function for OutputFormatter.""" + + table_format_handler = { + 'ascii': terminaltables.AsciiTable, + 'double': terminaltables.DoubleTable, + 'github': terminaltables.GithubFlavoredMarkdownTable, + } + + try: + table = table_format_handler[table_format] + except KeyError: + raise ValueError('unrecognized table format: {}'.format(table_format)) + + t = table([headers] + data) + return t.table diff --git a/mycli/packages/expanded.py b/mycli/packages/expanded.py deleted file mode 100644 index 128e9c69..00000000 --- a/mycli/packages/expanded.py +++ /dev/null @@ -1,49 +0,0 @@ -from .tabulate import _text_type -import binascii - -def pad(field, total, char=u" "): - return field + (char * (total - len(field))) - -def get_separator(num, header_len, data_len): - - sep = u"***************************[ %d. row ]***************************\n" % (num + 1) - return sep - -def format_field(value): - # Returns the field as a text type, otherwise will hexify the string - try: - if isinstance(value, bytes): - return _text_type(value, "ascii") - else: - return _text_type(value) - except UnicodeDecodeError: - return _text_type('0x' + binascii.hexlify(value).decode('ascii')) - -def expanded_table(rows, headers): - header_len = max([len(x) for x in headers]) - max_row_len = 0 - results = [] - - padded_headers = [pad(x, header_len) + u" |" for x in headers] - header_len += 2 - - for row in rows: - row = [format_field(x) for x in row] - row_len = max([len(x) for x in row]) - row_result = [] - if row_len > max_row_len: - max_row_len = row_len - - for header, value in zip(padded_headers, row): - if value is None: value = '' - row_result.append(u"%s %s" % (header, value)) - - results.append('\n'.join(row_result)) - - output = [] - for i, result in enumerate(results): - output.append(get_separator(i, header_len, max_row_len)) - output.append(result) - output.append('\n') - - return ''.join(output) diff --git a/mycli/packages/tabulate.py b/mycli/packages/tabulate.py index fa971826..1e67cea7 100644 --- a/mycli/packages/tabulate.py +++ b/mycli/packages/tabulate.py @@ -4,18 +4,16 @@ from __future__ import print_function from __future__ import unicode_literals -from collections import namedtuple -from decimal import Decimal +from collections import namedtuple, Iterable from platform import python_version_tuple -from wcwidth import wcswidth import re -import binascii if python_version_tuple()[0] < "3": from itertools import izip_longest from functools import partial _none_type = type(None) + _bool_type = bool _int_type = int _long_type = long _float_type = float @@ -29,23 +27,41 @@ def _is_file(f): from itertools import zip_longest as izip_longest from functools import reduce, partial _none_type = type(None) + _bool_type = bool _int_type = int _long_type = int _float_type = float _text_type = str _binary_type = bytes + basestring = str import io + def _is_file(f): return isinstance(f, io.IOBase) +try: + import wcwidth # optional wide-character (CJK) support +except ImportError: + wcwidth = None + -__all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] -__version__ = "0.7.4" +__all__ = ["tabulate", "tabulate_formats"] +__version__ = "0.8.0" +# minimum extra space in headers MIN_PADDING = 2 +PRESERVE_WHITESPACE = False + +_DEFAULT_FLOATFMT = "g" +_DEFAULT_MISSINGVAL = "" + + +# if True, enable wide-character (CJK) support +WIDE_CHARS_MODE = wcwidth is not None + Line = namedtuple("Line", ["begin", "hline", "sep", "end"]) @@ -82,12 +98,13 @@ def _is_file(f): # with_header_hide: # # - either None, to display all table elements unconditionally, -# - or a list of elements not to be displayed if the table has column headers. +# - or a list of elements not to be displayed if the table has column +# headers. # TableFormat = namedtuple("TableFormat", ["lineabove", "linebelowheader", "linebetweenrows", "linebelow", "headerrow", "datarow", - "padding", "with_header_hide", "with_align"]) + "padding", "with_header_hide"]) def _pipe_segment_with_colons(align, colwidth): @@ -107,15 +124,16 @@ def _pipe_segment_with_colons(align, colwidth): def _pipe_line_with_colons(colwidths, colaligns): """Return a horizontal line with optional colons to indicate column's alignment (as in `pipe` output format).""" - segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)] + segments = [_pipe_segment_with_colons(a, w) for a, w in + zip(colaligns, colwidths)] return "|" + "|".join(segments) + "|" def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): - alignment = { "left": '', - "right": 'align="right"| ', - "center": 'align="center"| ', - "decimal": 'align="right"| ' } + alignment = {"left": '', + "right": 'align="right"| ', + "center": 'align="center"| ', + "decimal": 'align="right"| '} # hard-coded padding _around_ align attribute and value together # rather than padding parameter which affects only the value values_with_attrs = [' ' + alignment.get(a, '') + c + ' ' @@ -124,161 +142,242 @@ def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): return (separator + colsep.join(values_with_attrs)).rstrip() +def _textile_row_with_attrs(cell_values, colwidths, colaligns): + cell_values[0] += ' ' + alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."} + values = (alignment.get(a, '') + v for a, v in zip(colaligns, cell_values)) + return '|' + '|'.join(values) + '|' + + +def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore): + # this table header will be suppressed if there is a header row + return "\n".join(["", ""]) + + def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns): - alignment = { "left": '', - "right": ' style="text-align: right;"', - "center": ' style="text-align: center;"', - "decimal": ' style="text-align: right;"' } - values_with_attrs = ["<{0}{1}>{2}".format(celltag, alignment.get(a, ''), c) + alignment = {"left": '', + "right": ' style="text-align: right;"', + "center": ' style="text-align: center;"', + "decimal": ' style="text-align: right;"'} + values_with_attrs = ["<{0}{1}>{2}".format( + celltag, alignment.get(a, ''), c) for c, a in + zip(cell_values, colaligns)] + rowhtml = "" + "".join(values_with_attrs).rstrip() + "" + if celltag == "th": # it's a header row, create a new table header + rowhtml = "\n".join(["
", + "", + rowhtml, + "", + ""]) + return rowhtml + + +def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, + header=''): + alignment = {"left": '', + "right": '', + "center": '', + "decimal": ''} + values_with_attrs = ["{0}{1} {2} ".format(celltag, + alignment.get(a, ''), + header + c + header) for c, a in zip(cell_values, colaligns)] - return "" + "".join(values_with_attrs).rstrip() + "" + return "".join(values_with_attrs) + "||" def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False): - alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" } + alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"} tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}", "\\toprule" if booktabs else "\hline"]) + LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#", r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}", r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}", r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"} -def _latex_row(cell_values, colwidths, colaligns): +def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES): def escape_char(c): - return LATEX_ESCAPE_RULES.get(c, c) + return escrules.get(c, c) escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values] rowfmt = DataRow("", "&", "\\\\") return _build_simple_row(escaped_values, rowfmt) +def _rst_escape_first_column(rows, headers): + def escape_empty(val): + if isinstance(val, (_text_type, _binary_type)) and val.strip() is "": + return ".." + else: + return val + new_headers = list(headers) + new_rows = [] + if headers: + new_headers[0] = escape_empty(headers[0]) + for row in rows: + new_row = list(row) + if new_row: + new_row[0] = escape_empty(row[0]) + new_rows.append(new_row) + return new_rows, new_headers + + _table_formats = {"simple": - TableFormat(lineabove=Line("", "-", " ", ""), - linebelowheader=Line("", "-", " ", ""), - linebetweenrows=None, - linebelow=Line("", "-", " ", ""), - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, - with_header_hide=["lineabove", "linebelow"], with_align=True), + TableFormat( + lineabove=Line("", "-", " ", ""), + linebelowheader=Line("", "-", " ", ""), + linebetweenrows=None, + linebelow=Line("", "-", " ", ""), + headerrow=DataRow("", " ", ""), + datarow=DataRow("", " ", ""), + padding=0, + with_header_hide=["lineabove", "linebelow"]), "plain": - TableFormat(lineabove=None, linebelowheader=None, - linebetweenrows=None, linebelow=None, - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, with_header_hide=None, with_align=True), + TableFormat( + lineabove=None, linebelowheader=None, + linebetweenrows=None, linebelow=None, + headerrow=DataRow("", " ", ""), + datarow=DataRow("", " ", ""), + padding=0, with_header_hide=None), "grid": - TableFormat(lineabove=Line("+", "-", "+", "+"), - linebelowheader=Line("+", "=", "+", "+"), - linebetweenrows=Line("+", "-", "+", "+"), - linebelow=Line("+", "-", "+", "+"), - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None, with_align=True), + TableFormat( + lineabove=Line("+", "-", "+", "+"), + linebelowheader=Line("+", "=", "+", "+"), + linebetweenrows=Line("+", "-", "+", "+"), + linebelow=Line("+", "-", "+", "+"), + headerrow=DataRow("|", "|", "|"), + datarow=DataRow("|", "|", "|"), + padding=1, with_header_hide=None), "fancy_grid": - TableFormat(lineabove=Line("╒", "═", "╤", "╕"), - linebelowheader=Line("╞", "═", "╪", "╡"), - linebetweenrows=Line("├", "─", "┼", "┤"), - linebelow=Line("╘", "═", "╧", "╛"), - headerrow=DataRow("│", "│", "│"), - datarow=DataRow("│", "│", "│"), - padding=1, with_header_hide=None, with_align=True), + TableFormat( + lineabove=Line("╒", "═", "╤", "╕"), + linebelowheader=Line("╞", "═", "╪", "╡"), + linebetweenrows=Line("├", "─", "┼", "┤"), + linebelow=Line("╘", "═", "╧", "╛"), + headerrow=DataRow("│", "│", "│"), + datarow=DataRow("│", "│", "│"), + padding=1, with_header_hide=None), "pipe": - TableFormat(lineabove=_pipe_line_with_colons, - linebelowheader=_pipe_line_with_colons, - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, - with_header_hide=["lineabove"], with_align=True), + TableFormat( + lineabove=_pipe_line_with_colons, + linebelowheader=_pipe_line_with_colons, + linebetweenrows=None, + linebelow=None, + headerrow=DataRow("|", "|", "|"), + datarow=DataRow("|", "|", "|"), + padding=1, + with_header_hide=["lineabove"]), "orgtbl": - TableFormat(lineabove=None, - linebelowheader=Line("|", "-", "+", "|"), - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None, with_align=True), + TableFormat( + lineabove=None, + linebelowheader=Line("|", "-", "+", "|"), + linebetweenrows=None, + linebelow=None, + headerrow=DataRow("|", "|", "|"), + datarow=DataRow("|", "|", "|"), + padding=1, with_header_hide=None), + "jira": + TableFormat( + lineabove=None, + linebelowheader=None, + linebetweenrows=None, + linebelow=None, + headerrow=DataRow("||", "||", "||"), + datarow=DataRow("|", "|", "|"), + padding=1, with_header_hide=None), "psql": - TableFormat(lineabove=Line("+", "-", "+", "+"), - linebelowheader=Line("|", "-", "+", "|"), - linebetweenrows=None, - linebelow=Line("+", "-", "+", "+"), - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None, with_align=True), + TableFormat( + lineabove=Line("+", "-", "+", "+"), + linebelowheader=Line("|", "-", "+", "|"), + linebetweenrows=None, + linebelow=Line("+", "-", "+", "+"), + headerrow=DataRow("|", "|", "|"), + datarow=DataRow("|", "|", "|"), + padding=1, with_header_hide=None), "rst": - TableFormat(lineabove=Line("", "=", " ", ""), - linebelowheader=Line("", "=", " ", ""), - linebetweenrows=None, - linebelow=Line("", "=", " ", ""), - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, with_header_hide=None, with_align=True), + TableFormat( + lineabove=Line("", "=", " ", ""), + linebelowheader=Line("", "=", " ", ""), + linebetweenrows=None, + linebelow=Line("", "=", " ", ""), + headerrow=DataRow("", " ", ""), + datarow=DataRow("", " ", ""), + padding=0, with_header_hide=None), "mediawiki": - TableFormat(lineabove=Line("{| class=\"wikitable\" style=\"text-align: left;\"", - "", "", "\n|+ \n|-"), - linebelowheader=Line("|-", "", "", ""), - linebetweenrows=Line("|-", "", "", ""), - linebelow=Line("|}", "", "", ""), - headerrow=partial(_mediawiki_row_with_attrs, "!"), - datarow=partial(_mediawiki_row_with_attrs, "|"), - padding=0, with_header_hide=None, with_align=True), + TableFormat(lineabove=Line( + "{| class=\"wikitable\" style=\"text-align: left;\"", + "", "", "\n|+ \n|-"), + linebelowheader=Line("|-", "", "", ""), + linebetweenrows=Line("|-", "", "", ""), + linebelow=Line("|}", "", "", ""), + headerrow=partial(_mediawiki_row_with_attrs, "!"), + datarow=partial(_mediawiki_row_with_attrs, "|"), + padding=0, with_header_hide=None), + "moinmoin": + TableFormat( + lineabove=None, + linebelowheader=None, + linebetweenrows=None, + linebelow=None, + headerrow=partial(_moin_row_with_attrs, "||", + header="'''"), + datarow=partial(_moin_row_with_attrs, "||"), + padding=1, with_header_hide=None), "html": - TableFormat(lineabove=Line("
", "", "", ""), - linebelowheader=None, - linebetweenrows=None, - linebelow=Line("
", "", "", ""), - headerrow=partial(_html_row_with_attrs, "th"), - datarow=partial(_html_row_with_attrs, "td"), - padding=0, with_header_hide=None, with_align=False), + TableFormat( + lineabove=_html_begin_table_without_header, + linebelowheader="", + linebetweenrows=None, + linebelow=Line("\n", "", "", ""), + headerrow=partial(_html_row_with_attrs, "th"), + datarow=partial(_html_row_with_attrs, "td"), + padding=0, with_header_hide=["lineabove"]), "latex": - TableFormat(lineabove=_latex_line_begin_tabular, - linebelowheader=Line("\\hline", "", "", ""), - linebetweenrows=None, - linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), - headerrow=_latex_row, - datarow=_latex_row, - padding=1, with_header_hide=None, with_align=False), + TableFormat( + lineabove=_latex_line_begin_tabular, + linebelowheader=Line("\\hline", "", "", ""), + linebetweenrows=None, + linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), + headerrow=_latex_row, + datarow=_latex_row, + padding=1, with_header_hide=None), + "latex_raw": + TableFormat( + lineabove=_latex_line_begin_tabular, + linebelowheader=Line("\\hline", "", "", ""), + linebetweenrows=None, + linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), + headerrow=partial(_latex_row, escrules={}), + datarow=partial(_latex_row, escrules={}), + padding=1, with_header_hide=None), "latex_booktabs": - TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True), - linebelowheader=Line("\\midrule", "", "", ""), - linebetweenrows=None, - linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""), - headerrow=_latex_row, - datarow=_latex_row, - padding=1, with_header_hide=None, with_align=False), - "tsv": - TableFormat(lineabove=None, linebelowheader=None, - linebetweenrows=None, linebelow=None, - headerrow=DataRow("", "\t", ""), - datarow=DataRow("", "\t", ""), - padding=0, with_header_hide=None, with_align=False)} + TableFormat( + lineabove=partial(_latex_line_begin_tabular, + booktabs=True), + linebelowheader=Line("\\midrule", "", "", ""), + linebetweenrows=None, + linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", + ""), + headerrow=_latex_row, + datarow=_latex_row, + padding=1, with_header_hide=None), + "textile": + TableFormat( + lineabove=None, linebelowheader=None, + linebetweenrows=None, linebelow=None, + headerrow=DataRow("|_. ", "|_.", "|"), + datarow=_textile_row_with_attrs, + padding=1, with_header_hide=None)} tabulate_formats = list(sorted(_table_formats.keys())) -_invisible_codes = re.compile(r"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes -_invisible_codes_bytes = re.compile(b"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes - - -def simple_separated_format(separator): - """Construct a simple TableFormat with columns separated by a separator. - - >>> tsv = simple_separated_format("\\t") ; \ - print(tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv)[0].replace('\\t', r'\\t')) - foo\\t1 - spam\\t23 - """ - return TableFormat(None, None, None, None, - headerrow=DataRow('', separator, ''), - datarow=DataRow('', separator, ''), - padding=0, with_header_hide=None, - with_align=False) +# ANSI color codes +_invisible_codes = re.compile(r"\x1b\[\d+[;\d]*m|\x1b\[\d*\;\d*\;\d*m") +_invisible_codes_bytes = re.compile(b"\x1b\[\d+[;\d]*m|\x1b\[\d*\;\d*\;\d*m") def _isconvertible(conv, string): @@ -301,19 +400,34 @@ def _isnumber(string): return _isconvertible(float, string) -def _isint(string): +def _isint(string, inttype=int): """ >>> _isint("123") True >>> _isint("123.45") False """ - return type(string) is _int_type or type(string) is _long_type or \ - (isinstance(string, _binary_type) or isinstance(string, _text_type)) and \ - _isconvertible(int, string) + return type(string) is inttype or\ + (isinstance(string, _binary_type) or isinstance(string, _text_type))\ + and\ + _isconvertible(inttype, string) -def _type(string, has_invisible=True): +def _isbool(string): + """ + >>> _isbool(True) + True + >>> _isbool("False") + True + >>> _isbool(1) + False + """ + return type(string) is _bool_type or\ + (isinstance(string, (_binary_type, _text_type)) and + string in ("True", "False")) + + +def _type(string, has_invisible=True, numparse=True): """The least generic type (type(None), int, float, str, unicode). >>> _type(None) is type(None) @@ -335,13 +449,15 @@ def _type(string, has_invisible=True): if string is None: return _none_type - if isinstance(string, (bool, Decimal,)): - return _text_type elif hasattr(string, "isoformat"): # datetime.datetime, date, and time return _text_type - elif _isint(string): + elif _isbool(string): + return _bool_type + elif _isint(string) and numparse: + return int + elif _isint(string, _long_type) and numparse: return int - elif _isnumber(string): + elif _isnumber(string) and numparse: return float elif isinstance(string, _binary_type): return _binary_type @@ -376,39 +492,37 @@ def _afterpoint(string): return -1 # not a number -def _padleft(width, s, has_invisible=True): +def _padleft(width, s): """Flush right. >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430' True """ - lwidth = width - wcswidth(_strip_invisible(s) if has_invisible else s) - return ' ' * lwidth + s + fmt = "{0:>%ds}" % width + return fmt.format(s) -def _padright(width, s, has_invisible=True): +def _padright(width, s): """Flush left. >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 ' True """ - rwidth = width - wcswidth(_strip_invisible(s) if has_invisible else s) - return s + ' ' * rwidth + fmt = "{0:<%ds}" % width + return fmt.format(s) -def _padboth(width, s, has_invisible=True): +def _padboth(width, s): """Center string. >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 ' True """ - xwidth = width - wcswidth(_strip_invisible(s) if has_invisible else s) - lwidth = xwidth // 2 - rwidth = 0 if xwidth <= 0 else lwidth + xwidth % 2 - return ' ' * lwidth + s + ' ' * rwidth + fmt = "{0:^%ds}" % width + return fmt.format(s) def _strip_invisible(s): @@ -426,16 +540,23 @@ def _visible_width(s): (5, 5) """ + # optional wide-character support + if wcwidth is not None and WIDE_CHARS_MODE: + len_fn = wcwidth.wcswidth + else: + len_fn = len if isinstance(s, _text_type) or isinstance(s, _binary_type): - return wcswidth(_strip_invisible(s)) + return len_fn(_strip_invisible(s)) else: - return wcswidth(_text_type(s)) + return len_fn(_text_type(s)) def _align_column(strings, alignment, minwidth=0, has_invisible=True): """[string] -> [padded_string] - >>> list(map(str,_align_column(["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], "decimal"))) + >>> list(map(str,_align_column( + ... ["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], + ... "decimal"))) [' 12.345 ', '-1234.5 ', ' 1.23 ', ' 1234.5 ', ' 1e+234 ', ' 1.0e234'] >>> list(map(str,_align_column(['123.4', '56.7890'], None))) @@ -443,11 +564,18 @@ def _align_column(strings, alignment, minwidth=0, has_invisible=True): """ if alignment == "right": + if not PRESERVE_WHITESPACE: + strings = [s.strip() for s in strings] padfn = _padleft elif alignment == "center": + if not PRESERVE_WHITESPACE: + strings = [s.strip() for s in strings] padfn = _padboth elif alignment == "decimal": - decimals = [_afterpoint(s) for s in strings] + if has_invisible: + decimals = [_afterpoint(_strip_invisible(s)) for s in strings] + else: + decimals = [_afterpoint(s) for s in strings] maxdecimals = max(decimals) strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)] @@ -455,28 +583,46 @@ def _align_column(strings, alignment, minwidth=0, has_invisible=True): elif not alignment: return strings else: + if not PRESERVE_WHITESPACE: + strings = [s.strip() for s in strings] padfn = _padright + enable_widechars = wcwidth is not None and WIDE_CHARS_MODE if has_invisible: width_fn = _visible_width + elif enable_widechars: # optional wide-character support if available + width_fn = wcwidth.wcswidth else: - width_fn = wcswidth + width_fn = len - maxwidth = max(max(map(width_fn, strings)), minwidth) - padded_strings = [padfn(maxwidth, s, has_invisible) for s in strings] + s_lens = list(map(len, strings)) + s_widths = list(map(width_fn, strings)) + maxwidth = max(max(s_widths), minwidth) + if not enable_widechars and not has_invisible: + padded_strings = [padfn(maxwidth, s) for s in strings] + else: + # enable wide-character width corrections + visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] + # wcswidth and _visible_width don't count invisible characters; + # padfn doesn't need to apply another correction + padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] return padded_strings def _more_generic(type1, type2): - types = { _none_type: 0, int: 1, float: 2, _binary_type: 3, _text_type: 4 } - invtypes = { 4: _text_type, 3: _binary_type, 2: float, 1: int, 0: _none_type } - moregeneric = max(types.get(type1, 4), types.get(type2, 4)) + types = {_none_type: 0, _bool_type: 1, int: 2, float: 3, _binary_type: 4, + _text_type: 5} + invtypes = {5: _text_type, 4: _binary_type, 3: float, 2: int, + 1: _bool_type, 0: _none_type} + moregeneric = max(types.get(type1, 5), types.get(type2, 5)) return invtypes[moregeneric] -def _column_type(strings, has_invisible=True): +def _column_type(strings, has_invisible=True, numparse=True): """The least generic type all column values are convertible to. + >>> _column_type([True, False]) is _bool_type + True >>> _column_type(["1", "2"]) is _int_type True >>> _column_type(["1", "2.3"]) is _float_type @@ -494,22 +640,25 @@ def _column_type(strings, has_invisible=True): True """ - types = [_type(s, has_invisible) for s in strings ] - return reduce(_more_generic, types, int) + types = [_type(s, has_invisible, numparse) for s in strings] + return reduce(_more_generic, types, _bool_type) -def _format(val, valtype, floatfmt, missingval=""): - u"""Format a value accoding to its type. +def _format(val, valtype, floatfmt, missingval="", has_invisible=True): + """Format a value accoding to its type. Unicode is supported: - >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \ - tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \ - print(tabulate(tbl, headers=hrow)[0]) - буква цифра - ------- ------- - аз 2 - буки 4 + >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', + ... '\u0446\u0438\u0444\u0440\u0430'] + >>> tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] + >>> good_result = ('\\u0431\\u0443\\u043a\\u0432\\u0430 ' + ... '\\u0446\\u0438\\u0444\\u0440\\u0430\\n------- ' + ... '-------\\n\\u0430\\u0437 ' + ... '2\\n\\u0431\\u0443\\u043a\\u0438 4') + >>> tabulate(tbl, headers=hrow) == good_result + True + """ if val is None: return missingval @@ -519,17 +668,25 @@ def _format(val, valtype, floatfmt, missingval=""): elif valtype is _binary_type: try: return _text_type(val, "ascii") - except UnicodeDecodeError: - return _text_type('0x' + binascii.hexlify(val).decode('ascii')) except TypeError: return _text_type(val) elif valtype is float: - return format(float(val), floatfmt) + is_a_colored_number = (has_invisible and + isinstance(val, (_text_type, _binary_type))) + if is_a_colored_number: + raw_val = _strip_invisible(val) + formatted_val = format(float(raw_val), floatfmt) + return val.replace(raw_val, formatted_val) + else: + return format(float(val), floatfmt) else: return "{0}".format(val) -def _align_header(header, alignment, width): +def _align_header(header, alignment, width, visible_width): + """Pad string header to width chars given known visible_width of the + header.""" + width += len(header) - visible_width if alignment == "left": return _padright(width, header) elif alignment == "center": @@ -540,8 +697,30 @@ def _align_header(header, alignment, width): return _padleft(width, header) -def _normalize_tabular_data(tabular_data, headers): - """Transform a supported data type to a list of lists, and a list of headers. +def _prepend_row_index(rows, index): + """Add a left-most index column.""" + if index is None or index is False: + return rows + if len(index) != len(rows): + print('index=', index) + print('rows=', rows) + raise ValueError('index must be as long as the number of data rows') + rows = [[v] + list(row) for v, row in zip(index, rows)] + return rows + + +def _bool(val): + """A wrapper around standard bool() which doesn't throw on NumPy + arrays.""" + try: + return bool(val) + except ValueError: # val is likely to be a numpy array with many elements + return False + + +def _normalize_tabular_data(tabular_data, headers, showindex="default"): + """Transform a supported data type to a list of lists, and a list of + headers. Supported tabular data types: @@ -564,32 +743,58 @@ def _normalize_tabular_data(tabular_data, headers): The first row can be used as headers if headers="firstrow", column indices can be used as headers if headers="keys". + If showindex="default", show row indices of the pandas.DataFrame. + If showindex="always", show row indices for all types of data. + If showindex="never", don't show row indices for all types of data. + If showindex is an iterable, show its values as row indices. + """ + try: + bool(headers) + is_headers2bool_broken = False + except ValueError: # numpy.ndarray, pandas.core.index.Index, ... + is_headers2bool_broken = True + headers = list(headers) + + index = None if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"): # dict-like and pandas.DataFrame? if hasattr(tabular_data.values, "__call__"): # likely a conventional dict keys = tabular_data.keys() - rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed + # columns have to be transposed + rows = list(izip_longest(*tabular_data.values())) elif hasattr(tabular_data, "index"): - # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0) - keys = tabular_data.keys() - vals = tabular_data.values # values matrix doesn't need to be transposed - names = tabular_data.index - rows = [[v]+list(row) for v,row in zip(names, vals)] + # values is a property, has .index => it's likely a + # pandas.DataFrame (pandas 0.11.0) + keys = list(tabular_data) + if tabular_data.index.name is not None: + if isinstance(tabular_data.index.name, list): + keys[:0] = tabular_data.index.name + else: + keys[:0] = [tabular_data.index.name] + # values matrix doesn't need to be transposed + vals = tabular_data.values + # for DataFrames add an index per default + index = list(tabular_data.index) + rows = [list(row) for row in vals] else: - raise ValueError("tabular data doesn't appear to be a dict or a DataFrame") + raise ValueError( + "tabular data doesn't appear to be a dict or a DataFrame") if headers == "keys": - headers = list(map(_text_type,keys)) # headers should be strings + headers = list(map(_text_type, keys)) # headers should be strings else: # it's a usual an iterable of iterables, or a NumPy array rows = list(tabular_data) - if (headers == "keys" and - hasattr(tabular_data, "dtype") and - getattr(tabular_data.dtype, "names")): + if (headers == "keys" and not rows): + # an empty table (issue #81) + headers = [] + elif (headers == "keys" and + hasattr(tabular_data, "dtype") and + getattr(tabular_data.dtype, "names")): # numpy record array headers = tabular_data.dtype.names elif (headers == "keys" @@ -601,8 +806,8 @@ def _normalize_tabular_data(tabular_data, headers): elif (len(rows) > 0 and isinstance(rows[0], dict)): # dict or OrderedDict - uniq_keys = set() # implements hashed lookup - keys = [] # storage for set + uniq_keys = set() # implements hashed lookup + keys = [] # storage for set if headers == "firstrow": firstdict = rows[0] if len(rows) > 0 else {} keys.extend(firstdict.keys()) @@ -610,7 +815,7 @@ def _normalize_tabular_data(tabular_data, headers): rows = rows[1:] for row in rows: for k in row.keys(): - #Save unique items in input order + # Save unique items in input order if k not in uniq_keys: keys.append(k) uniq_keys.add(k) @@ -627,38 +832,67 @@ def _normalize_tabular_data(tabular_data, headers): else: headers = [] elif headers: - raise ValueError('headers for a list of dicts is not a dict or a keyword') + raise ValueError( + 'headers for a list of dicts is not a dict or a keyword') rows = [[row.get(k) for k in keys] for row in rows] + + elif (headers == "keys" + and hasattr(tabular_data, "description") + and hasattr(tabular_data, "fetchone") + and hasattr(tabular_data, "rowcount")): + # Python Database API cursor object (PEP 0249) + # print tabulate(cursor, headers='keys') + headers = [column[0] for column in tabular_data.description] + elif headers == "keys" and len(rows) > 0: # keys are column indices headers = list(map(_text_type, range(len(rows[0])))) # take headers from the first row if necessary if headers == "firstrow" and len(rows) > 0: - headers = list(map(_text_type, rows[0])) # headers should be strings + if index is not None: + headers = [index[0]] + list(rows[0]) + index = index[1:] + else: + headers = rows[0] + headers = list(map(_text_type, headers)) # headers should be strings rows = rows[1:] - headers = list(map(_text_type,headers)) - rows = list(map(list,rows)) + headers = list(map(_text_type, headers)) + rows = list(map(list, rows)) + + # add or remove an index column + showindex_is_a_str = type(showindex) in [_text_type, _binary_type] + if showindex == "default" and index is not None: + rows = _prepend_row_index(rows, index) + elif isinstance(showindex, Iterable) and not showindex_is_a_str: + rows = _prepend_row_index(rows, list(showindex)) + elif (showindex == "always" or + (_bool(showindex) and not showindex_is_a_str)): + if index is None: + index = list(range(len(rows))) + rows = _prepend_row_index(rows, index) + elif (showindex == "never" or + (not _bool(showindex) and not showindex_is_a_str)): + pass # pad with empty headers for initial columns if necessary if headers and len(rows) > 0: - nhs = len(headers) - ncols = len(rows[0]) - if nhs < ncols: - headers = [""]*(ncols - nhs) + headers + nhs = len(headers) + ncols = len(rows[0]) + if nhs < ncols: + headers = [""] * (ncols - nhs) + headers return rows, headers -def table_formats(): - return _table_formats.keys() -def tabulate(tabular_data, headers=[], tablefmt="simple", - floatfmt="g", numalign="decimal", stralign="left", - missingval=""): +def tabulate(tabular_data, headers=(), tablefmt="simple", + floatfmt=_DEFAULT_FLOATFMT, numalign="decimal", stralign="left", + missingval=_DEFAULT_MISSINGVAL, showindex="default", + disable_numparse=False): """Format a fixed width table for pretty printing. - >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]])[0]) + >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]])) --- --------- 1 2.34 -56 8.999 @@ -688,12 +922,24 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", with the plain-text format of R and Pandas' dataframes. >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]], - ... headers="firstrow")[0]) + ... headers="firstrow")) sex age ----- ----- ----- Alice F 24 Bob M 19 + By default, pandas.DataFrame data have an additional column called + row index. To add a similar column to all other types of data, + use `showindex="always"` or `showindex=True`. To suppress row indices + for all types of data, pass `showindex="never" or `showindex=False`. + To add a custom row index column, pass `showindex=some_iterable`. + + >>> print(tabulate([["F",24],["M",19]], showindex="always")) + - - -- + 0 F 24 + 1 M 19 + - - -- + Column alignment ---------------- @@ -710,13 +956,16 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", ------------- `floatfmt` is a format specification used for columns which - contain numeric data with a decimal point. + contain numeric data with a decimal point. This can also be + a list or tuple of format strings, one per column. - `None` values are replaced with a `missingval` string: + `None` values are replaced with a `missingval` string (like + `floatfmt`, this can also be a list of values for different + columns): >>> print(tabulate([["spam", 1, None], ... ["eggs", 42, 3.14], - ... ["other", None, 2.7]], missingval="?")[0]) + ... ["other", None, 2.7]], missingval="?")) ----- -- ---- spam 1 ? eggs 42 3.14 @@ -725,32 +974,34 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", Various plain-text table formats (`tablefmt`) are supported: 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki', - 'latex', and 'latex_booktabs'. Variable `tabulate_formats` contains the list of - currently supported formats. + 'latex', 'latex_raw' and 'latex_booktabs'. Variable `tabulate_formats` + contains the list of currently supported formats. "plain" format doesn't use any pseudographics to draw tables, it separates columns with a double space: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "plain")[0]) + ... ["strings", "numbers"], "plain")) strings numbers spam 41.9999 eggs 451 - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="plain")) spam 41.9999 eggs 451 "simple" format is like Pandoc simple_tables: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "simple")[0]) + ... ["strings", "numbers"], "simple")) strings numbers --------- --------- spam 41.9999 eggs 451 - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="simple")) ---- -------- spam 41.9999 eggs 451 @@ -760,7 +1011,7 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", Pandoc grid_tables: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "grid")[0]) + ... ["strings", "numbers"], "grid")) +-----------+-----------+ | strings | numbers | +===========+===========+ @@ -769,7 +1020,8 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", | eggs | 451 | +-----------+-----------+ - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="grid")) +------+----------+ | spam | 41.9999 | +------+----------+ @@ -779,7 +1031,7 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", "fancy_grid" draws a grid using box-drawing characters: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "fancy_grid")[0]) + ... ["strings", "numbers"], "fancy_grid")) ╒═══════════╤═══════════╕ │ strings │ numbers │ ╞═══════════╪═══════════╡ @@ -792,13 +1044,14 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", pipe_tables: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "pipe")[0]) + ... ["strings", "numbers"], "pipe")) | strings | numbers | |:----------|----------:| | spam | 41.9999 | | eggs | 451 | - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="pipe")) |:-----|---------:| | spam | 41.9999 | | eggs | 451 | @@ -809,14 +1062,15 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", intersections: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "orgtbl")[0]) + ... ["strings", "numbers"], "orgtbl")) | strings | numbers | |-----------+-----------| | spam | 41.9999 | | eggs | 451 | - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="orgtbl")) | spam | 41.9999 | | eggs | 451 | @@ -824,7 +1078,7 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", note that reStructuredText accepts also "grid" tables: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "rst")[0]) + ... ["strings", "numbers"], "rst")) ========= ========= strings numbers ========= ========= @@ -832,7 +1086,7 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", eggs 451 ========= ========= - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst")[0]) + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst")) ==== ======== spam 41.9999 eggs 451 @@ -841,8 +1095,9 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", "mediawiki" produces a table markup used in Wikipedia and on other MediaWiki-based sites: - >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]], - ... headers="firstrow", tablefmt="mediawiki")[0]) + >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], + ... ["eggs", "451.0"]], headers="firstrow", + ... tablefmt="mediawiki")) {| class="wikitable" style="text-align: left;" |+ |- @@ -855,85 +1110,162 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", "html" produces HTML markup: - >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]], - ... headers="firstrow", tablefmt="html")[0]) + >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], + ... ["eggs", "451.0"]], headers="firstrow", + ... tablefmt="html")) - - - + + + + + + +
stringsnumbers
spam41.9999
eggs451
strings numbers
spam 41.9999
eggs 451
"latex" produces a tabular environment of LaTeX document markup: - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex")[0]) - \\begin{tabular}{ll} + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="latex")) + \\begin{tabular}{lr} + \\hline + spam & 41.9999 \\\\ + eggs & 451 \\\\ + \\hline + \\end{tabular} + + "latex_raw" is similar to "latex", but doesn't escape special characters, + such as backslash and underscore, so LaTeX commands may embedded into + cells' values: + + >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], + ... tablefmt="latex_raw")) + \\begin{tabular}{lr} \\hline - spam & 41.9999 \\\\ - eggs & 451 \\\\ + spam$_9$ & 41.9999 \\\\ + \\emph{eggs} & 451 \\\\ \\hline \\end{tabular} "latex_booktabs" produces a tabular environment of LaTeX document markup using the booktabs.sty package: - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs")[0]) - \\begin{tabular}{ll} + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... tablefmt="latex_booktabs")) + \\begin{tabular}{lr} \\toprule - spam & 41.9999 \\\\ - eggs & 451 \\\\ + spam & 41.9999 \\\\ + eggs & 451 \\\\ \\bottomrule \end{tabular} - Also returns a tuple of the raw rows pulled from tabular_data + Number parsing + -------------- + By default, anything which can be parsed as a number is a number. + This ensures numbers represented as strings are aligned properly. + This can lead to weird results for particular strings such as + specific git SHAs e.g. "42992e1" will be parsed into the number + 429920 and aligned as such. + + To completely disable number parsing (and alignment), use + `disable_numparse=True`. For more fine grained control, a list column + indices is used to disable number parsing only on those columns + e.g. `disable_numparse=[0, 2]` would disable number parsing only on the + first and third columns. + """ if tabular_data is None: tabular_data = [] - list_of_lists, headers = _normalize_tabular_data(tabular_data, headers) + list_of_lists, headers = _normalize_tabular_data( + tabular_data, headers, showindex=showindex) - # format rows and columns, convert numeric values to strings - cols = list(zip(*list_of_lists)) - coltypes = list(map(_column_type, cols)) - cols = [[_format(v, ct, floatfmt, missingval) for v in c] - for c,ct in zip(cols, coltypes)] + # empty values in the first column of RST tables should be escaped + # (issue #82). "" should be escaped as "\\ " or ".." + if tablefmt == 'rst': + list_of_lists, headers = _rst_escape_first_column(list_of_lists, + headers) # optimization: look for ANSI control codes once, # enable smart width functions only if a control code is found - plain_text = '\n'.join(['\t'.join(map(_text_type, headers))] + \ - ['\t'.join(map(_text_type, row)) for row in cols]) + plain_text = '\n'.join(['\t'.join(map(_text_type, headers))] + + ['\t'.join(map(_text_type, row)) + for row in list_of_lists]) + has_invisible = re.search(_invisible_codes, plain_text) + enable_widechars = wcwidth is not None and WIDE_CHARS_MODE if has_invisible: width_fn = _visible_width + elif enable_widechars: # optional wide-character support if available + width_fn = wcwidth.wcswidth else: - width_fn = wcswidth + width_fn = len - if not isinstance(tablefmt, TableFormat): - tablefmt = _table_formats.get(tablefmt, _table_formats["simple"]) - - if tablefmt.with_align: - # align columns - aligns = [numalign if ct in [int,float] else stralign for ct in coltypes] + # format rows and columns, convert numeric values to strings + cols = list(izip_longest(*list_of_lists)) + numparses = _expand_numparse(disable_numparse, len(cols)) + coltypes = [_column_type(col, numparse=np) for col, np in + zip(cols, numparses)] + if isinstance(floatfmt, basestring): # old version + # just duplicate the string to use in each column + float_formats = len(cols) * [floatfmt] + else: # if floatfmt is list, tuple etc we have one per column + float_formats = list(floatfmt) + if len(float_formats) < len(cols): + float_formats.extend((len(cols) - len(float_formats)) * + [_DEFAULT_FLOATFMT]) + if isinstance(missingval, basestring): + missing_vals = len(cols) * [missingval] else: - aligns = [False for ct in coltypes] - minwidths = [width_fn(h) + MIN_PADDING for h in headers] if headers else [0]*len(cols) + missing_vals = list(missingval) + if len(missing_vals) < len(cols): + missing_vals.extend((len(cols) - len(missing_vals)) * + [_DEFAULT_MISSINGVAL]) + cols = [[_format(v, ct, fl_fmt, miss_v, has_invisible) for v in c] + for c, ct, fl_fmt, miss_v in zip(cols, coltypes, float_formats, + missing_vals)] + + # align columns + aligns = [numalign if ct in [int, float] else stralign for ct in coltypes] + minwidths = [width_fn(h) + MIN_PADDING + for h in headers] if headers else [0] * len(cols) cols = [_align_column(c, a, minw, has_invisible) for c, a, minw in zip(cols, aligns, minwidths)] if headers: # align headers and add headers t_cols = cols or [['']] * len(headers) - if tablefmt.with_align: - t_aligns = aligns or [stralign] * len(headers) - else: - t_aligns = [False for ct in coltypes] - minwidths = [max(minw, width_fn(c[0])) for minw, c in zip(minwidths, t_cols)] - headers = [_align_header(h, a, minw) + t_aligns = aligns or [stralign] * len(headers) + minwidths = [max(minw, width_fn(c[0])) + for minw, c in zip(minwidths, t_cols)] + headers = [_align_header(h, a, minw, width_fn(h)) for h, a, minw in zip(headers, t_aligns, minwidths)] rows = list(zip(*cols)) else: minwidths = [width_fn(c[0]) for c in cols] rows = list(zip(*cols)) - return _format_table(tablefmt, headers, rows, minwidths, aligns), rows + if not isinstance(tablefmt, TableFormat): + tablefmt = _table_formats.get(tablefmt, _table_formats["simple"]) + + return _format_table(tablefmt, headers, rows, minwidths, aligns) + + +def _expand_numparse(disable_numparse, column_count): + """Return a list of bools of length `column_count` which indicates whether + number parsing should be used on each column. + + If `disable_numparse` is a list of indices, each of those indices + are False, and everything else is True. If `disable_numparse` is a + bool, then the returned list is all the same. + + """ + if isinstance(disable_numparse, Iterable): + numparses = [True] * column_count + for index in disable_numparse: + numparses[index] = False + return numparses + else: + return [not disable_numparse] * column_count def _build_simple_row(padded_cells, rowfmt): @@ -988,32 +1320,41 @@ def _format_table(fmt, headers, rows, colwidths, colaligns): lines.append(_build_line(padded_widths, colaligns, fmt.lineabove)) if padded_headers: - lines.append(_build_row(padded_headers, padded_widths, colaligns, headerrow)) + lines.append(_build_row(padded_headers, padded_widths, colaligns, + headerrow)) if fmt.linebelowheader and "linebelowheader" not in hidden: - lines.append(_build_line(padded_widths, colaligns, fmt.linebelowheader)) + lines.append(_build_line(padded_widths, colaligns, + fmt.linebelowheader)) if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden: # initial rows with a line below for row in padded_rows[:-1]: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) - lines.append(_build_line(padded_widths, colaligns, fmt.linebetweenrows)) + lines.append(_build_row(row, padded_widths, colaligns, + fmt.datarow)) + lines.append(_build_line(padded_widths, colaligns, + fmt.linebetweenrows)) # the last row without a line below - lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, fmt.datarow)) + lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, + fmt.datarow)) else: for row in padded_rows: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) + lines.append(_build_row(row, padded_widths, colaligns, + fmt.datarow)) if fmt.linebelow and "linebelow" not in hidden: lines.append(_build_line(padded_widths, colaligns, fmt.linebelow)) - return "\n".join(lines) + if headers or rows: + return "\n".join(lines) + else: # a completely empty table + return "" def _main(): - """\ - Usage: tabulate [options] [FILE ...] + """\ Usage: tabulate [options] [FILE ...] - Pretty-print tabular data. See also https://bitbucket.org/astanin/python-tabulate + Pretty-print tabular data. + See also https://bitbucket.org/astanin/python-tabulate FILE a filename of the file with tabular data; if "-" or missing, read data from stdin. @@ -1022,30 +1363,40 @@ def _main(): -h, --help show this message -1, --header use the first row of data as a table header + -o FILE, --output FILE print table to FILE (default: stdout) -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace) + -F FPFMT, --float FPFMT floating point number format (default: g) -f FMT, --format FMT set output table format; supported formats: plain, simple, grid, fancy_grid, pipe, orgtbl, - rst, mediawiki, html, latex, latex_booktabs, tsv + rst, mediawiki, html, latex, latex_raw, + latex_booktabs, tsv (default: simple) + """ import getopt import sys import textwrap usage = textwrap.dedent(_main.__doc__) try: - opts, args = getopt.getopt(sys.argv[1:], - "h1f:s:", - ["help", "header", "format", "separator"]) + opts, args = getopt.getopt( + sys.argv[1:], "h1o:s:F:f:", + ["help", "header", "output", "sep=", "float=", "format="]) except getopt.GetoptError as e: print(e) print(usage) sys.exit(2) headers = [] + floatfmt = _DEFAULT_FLOATFMT tablefmt = "simple" sep = r"\s+" + outfile = "-" for opt, value in opts: if opt in ["-1", "--header"]: headers = "firstrow" + elif opt in ["-o", "--output"]: + outfile = value + elif opt in ["-F", "--float"]: + floatfmt = value elif opt in ["-f", "--format"]: if value not in tabulate_formats: print("%s is not a supported table format" % value) @@ -1058,20 +1409,23 @@ def _main(): print(usage) sys.exit(0) files = [sys.stdin] if not args else args - for f in files: - if f == "-": - f = sys.stdin - if _is_file(f): - _pprint_file(f, headers=headers, tablefmt=tablefmt, sep=sep) - else: - with open(f) as fobj: - _pprint_file(fobj) + with (sys.stdout if outfile == "-" else open(outfile, "w")) as out: + for f in files: + if f == "-": + f = sys.stdin + if _is_file(f): + _pprint_file(f, headers=headers, tablefmt=tablefmt, + sep=sep, floatfmt=floatfmt, file=out) + else: + with open(f) as fobj: + _pprint_file(fobj, headers=headers, tablefmt=tablefmt, + sep=sep, floatfmt=floatfmt, file=out) -def _pprint_file(fobject, headers, tablefmt, sep): +def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file): rows = fobject.readlines() - table = [re.split(sep, r.rstrip()) for r in rows] - print(tabulate(table, headers, tablefmt)) + table = [re.split(sep, r.rstrip()) for r in rows if r.strip()] + print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file) if __name__ == "__main__": diff --git a/mycli/sqlcompleter.py b/mycli/sqlcompleter.py index 41364756..f01afb1d 100644 --- a/mycli/sqlcompleter.py +++ b/mycli/sqlcompleter.py @@ -1,13 +1,14 @@ from __future__ import print_function from __future__ import unicode_literals import logging +from re import compile, escape +from collections import Counter + from prompt_toolkit.completion import Completer, Completion + from .packages.completion_engine import suggest_type from .packages.parseutils import last_word from .packages.special.favoritequeries import favoritequeries -from re import compile, escape -from .packages.tabulate import table_formats -from collections import Counter _logger = logging.getLogger(__name__) @@ -48,7 +49,7 @@ class SQLCompleter(Completer): users = [] - def __init__(self, smart_completion=True): + def __init__(self, smart_completion=True, supported_formats=()): super(self.__class__, self).__init__() self.smart_completion = smart_completion self.reserved_words = set() @@ -57,7 +58,7 @@ def __init__(self, smart_completion=True): self.name_pattern = compile("^[_a-z][_a-z0-9\$]*$") self.special_commands = [] - self.table_formats = table_formats() + self.table_formats = supported_formats self.reset_completions() def escape_name(self, name): diff --git a/mycli/sqlexecute.py b/mycli/sqlexecute.py index 786c8f0e..4d5c0a09 100644 --- a/mycli/sqlexecute.py +++ b/mycli/sqlexecute.py @@ -4,7 +4,7 @@ from .packages import special from pymysql.constants import FIELD_TYPE from pymysql.converters import (convert_mysql_timestamp, convert_datetime, - convert_timedelta, convert_date) + convert_timedelta, convert_date, conversions) _logger = logging.getLogger(__name__) @@ -65,12 +65,13 @@ def connect(self, database=None, user=None, password=None, host=None, '\tlocal_infile: %r' '\tssl: %r', database, user, host, port, socket, charset, local_infile, ssl) - conv = { - FIELD_TYPE.TIMESTAMP: lambda obj: (convert_mysql_timestamp(obj) or obj), - FIELD_TYPE.DATETIME: lambda obj: (convert_datetime(obj) or obj), - FIELD_TYPE.TIME: lambda obj: (convert_timedelta(obj) or obj), - FIELD_TYPE.DATE: lambda obj: (convert_date(obj) or obj), - } + conv = conversions.copy() + conv.update({ + FIELD_TYPE.TIMESTAMP: lambda obj: (convert_mysql_timestamp(obj) or obj), + FIELD_TYPE.DATETIME: lambda obj: (convert_datetime(obj) or obj), + FIELD_TYPE.TIME: lambda obj: (convert_timedelta(obj) or obj), + FIELD_TYPE.DATE: lambda obj: (convert_date(obj) or obj), + }) conn = pymysql.connect(database=db, user=user, password=password, host=host, port=port, unix_socket=socket, diff --git a/setup.py b/setup.py index 82cdef6e..417929c6 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ 'sqlparse>=0.2.2,<0.3.0', 'configobj >= 5.0.5', 'pycryptodome >= 3', + 'terminaltables >= 3.0.0', ] setup( diff --git a/tests/features/fixture_data/help_commands.txt b/tests/features/fixture_data/help_commands.txt index 1c5a08bf..ee3d4ca4 100644 --- a/tests/features/fixture_data/help_commands.txt +++ b/tests/features/fixture_data/help_commands.txt @@ -1,6 +1,6 @@ +-------------+-------------------+---------------------------------------------------------+ | Command | Shortcut | Description | -|-------------+-------------------+---------------------------------------------------------| ++-------------+-------------------+---------------------------------------------------------+ | \G | \G | Display results vertically. | | \dt | \dt [table] | List or describe tables. | | \e | \e | Edit command with editor. (uses $EDITOR) | diff --git a/tests/features/steps/crud_table.py b/tests/features/steps/crud_table.py index 72b4e080..b73ff0d6 100644 --- a/tests/features/steps/crud_table.py +++ b/tests/features/steps/crud_table.py @@ -91,7 +91,8 @@ def step_see_data_selected(context): """ Wait to see select output. """ - wrappers.expect_exact(context, '+-----+\r\n| x |\r\n|-----|\r\n| yyy |\r\n+-----+\r\n1 row in set\r\n', timeout=1) + wrappers.expect_exact( + context, '+-----+\r\n| x |\r\n+-----+\r\n| yyy |\r\n+-----+\r\n1 row in set\r\n', timeout=1) @then('we see record deleted') diff --git a/tests/test_completion_refresher.py b/tests/test_completion_refresher.py index a50ad749..8851eae6 100644 --- a/tests/test_completion_refresher.py +++ b/tests/test_completion_refresher.py @@ -37,7 +37,7 @@ def test_refresh_called_once(refresher): assert len(actual) == 1 assert len(actual[0]) == 4 assert actual[0][3] == 'Auto-completion refresh started in the background.' - bg_refresh.assert_called_with(sqlexecute, callbacks) + bg_refresh.assert_called_with(sqlexecute, callbacks, {}) def test_refresh_called_twice(refresher): diff --git a/tests/test_expanded.py b/tests/test_expanded.py index 9b2a6c5c..7233e91c 100644 --- a/tests/test_expanded.py +++ b/tests/test_expanded.py @@ -1,13 +1,19 @@ -from mycli.packages.expanded import expanded_table +"""Test the vertical, expanded table formatter.""" +from textwrap import dedent + +from mycli.output_formatter.expanded import expanded_table +from mycli.encodingutils import text_type + def test_expanded_table_renders(): - input = [("hello", 123), ("world", 456)] - - expected = """***************************[ 1. row ]*************************** -name | hello -age | 123 -***************************[ 2. row ]*************************** -name | world -age | 456 -""" - assert expected == expanded_table(input, ["name", "age"]) + results = [('hello', text_type(123)), ('world', text_type(456))] + + expected = dedent("""\ + ***************************[ 1. row ]*************************** + name | hello + age | 123 + ***************************[ 2. row ]*************************** + name | world + age | 456 + """) + assert expected == expanded_table(results, ('name', 'age')) diff --git a/tests/test_main.py b/tests/test_main.py index 14f1c10e..1271f18d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,11 +3,13 @@ import click from click.testing import CliRunner -from mycli.main import (cli, confirm_destructive_query, format_output, +from mycli.main import (cli, confirm_destructive_query, is_destructive, query_starts_with, queries_start_with, thanks_picker, PACKAGE_ROOT) from utils import USER, HOST, PORT, PASSWORD, dbtest, run +from textwrap import dedent + try: text_type = basestring except NameError: @@ -16,32 +18,6 @@ CLI_ARGS = ['--user', USER, '--host', HOST, '--port', PORT, '--password', PASSWORD, '_test_db'] -def test_format_output(): - results = format_output('Title', [('abc', 'def')], ['head1', 'head2'], - 'test status', 'psql') - expected = ['Title', '+---------+---------+\n| head1 | head2 |\n|---------+---------|\n| abc | def |\n+---------+---------+', 'test status'] - assert results == expected - -def test_format_output_auto_expand(): - table_results = format_output('Title', [('abc', 'def')], - ['head1', 'head2'], 'test status', 'psql', - max_width=100) - table = ['Title', '+---------+---------+\n| head1 | head2 |\n|---------+---------|\n| abc | def |\n+---------+---------+', 'test status'] - assert table_results == table - - expanded_results = format_output('Title', [('abc', 'def')], - ['head1', 'head2'], 'test status', 'psql', - max_width=1) - expanded = ['Title', u'***************************[ 1. row ]***************************\nhead1 | abc\nhead2 | def\n', 'test status'] - assert expanded_results == expanded - -def test_format_output_no_table(): - results = format_output('Title', [('abc', 'def')], ['head1', 'head2'], - 'test status', None) - - expected = ['Title', u'head1\thead2\nabc\tdef', 'test status'] - assert results == expected - @dbtest def test_execute_arg(executor): run(executor, 'create table test (a text)') @@ -72,7 +48,7 @@ def test_execute_arg_with_table(executor): sql = 'select * from test;' runner = CliRunner() result = runner.invoke(cli, args=CLI_ARGS + ['-e', sql] + ['--table']) - expected = '+-----+\n| a |\n|-----|\n| abc |\n+-----+\n' + expected = '+-----+\n| a |\n+-----+\n| abc |\n+-----+\n' assert result.exit_code == 0 assert expected in result.output @@ -106,7 +82,7 @@ def test_batch_mode(executor): result = runner.invoke(cli, args=CLI_ARGS, input=sql) assert result.exit_code == 0 - assert 'count(*)\n3\na\nabc\n' in result.output + assert 'count(*)\n3\n\na\nabc\n' in result.output @dbtest def test_batch_mode_table(executor): @@ -121,10 +97,17 @@ def test_batch_mode_table(executor): runner = CliRunner() result = runner.invoke(cli, args=CLI_ARGS + ['-t'], input=sql) - expected = ( - '| count(*) |\n|------------|\n| 3 |\n+------------+\n' - '+-----+\n| a |\n|-----|\n| abc |\n+-----+' - ) + expected = (dedent("""\ + +----------+ + | count(*) | + +----------+ + | 3 | + +----------+ + +-----+ + | a | + +-----+ + | abc | + +-----+""")) assert result.exit_code == 0 assert expected in result.output diff --git a/tests/test_output_formatter.py b/tests/test_output_formatter.py new file mode 100644 index 00000000..9844c191 --- /dev/null +++ b/tests/test_output_formatter.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +"""Test the generic output formatter interface.""" + +from __future__ import unicode_literals +from decimal import Decimal +from textwrap import dedent + +from mycli.output_formatter.preprocessors import (align_decimals, + bytes_to_string, + convert_to_string, + quote_whitespaces, + override_missing_value, + to_string) +from mycli.output_formatter.output_formatter import OutputFormatter +from mycli.output_formatter import delimited_output_adapter +from mycli.output_formatter import tabulate_adapter +from mycli.output_formatter import terminaltables_adapter + + +def test_to_string(): + """Test the *output_formatter.to_string()* function.""" + assert 'a' == to_string('a') + assert 'a' == to_string(b'a') + assert '1' == to_string(1) + assert '1.23' == to_string(1.23) + + +def test_convert_to_string(): + """Test the *output_formatter.convert_to_string()* function.""" + data = [[1, 'John'], [2, 'Jill']] + headers = [0, 'name'] + expected = ([['1', 'John'], ['2', 'Jill']], ['0', 'name']) + + assert expected == convert_to_string(data, headers) + + +def test_override_missing_values(): + """Test the *output_formatter.override_missing_values()* function.""" + data = [[1, None], [2, 'Jill']] + headers = [0, 'name'] + expected = ([[1, ''], [2, 'Jill']], [0, 'name']) + + assert expected == override_missing_value(data, headers, + missing_value='') + + +def test_bytes_to_string(): + """Test the *output_formatter.bytes_to_string()* function.""" + data = [[1, 'John'], [2, b'Jill']] + headers = [0, 'name'] + expected = ([[1, 'John'], [2, 'Jill']], [0, 'name']) + + assert expected == bytes_to_string(data, headers) + + +def test_align_decimals(): + """Test the *align_decimals()* function.""" + data = [[Decimal('200'), Decimal('1')], [ + Decimal('1.00002'), Decimal('1.0')]] + headers = ['num1', 'num2'] + expected = ([['200', '1'], [' 1.00002', '1.0']], ['num1', 'num2']) + + assert expected == align_decimals(data, headers) + + +def test_align_decimals_empty_result(): + """Test *align_decimals()* with no results.""" + data = [] + headers = ['num1', 'num2'] + expected = ([], ['num1', 'num2']) + + assert expected == align_decimals(data, headers) + + +def test_quote_whitespaces(): + """Test the *quote_whitespaces()* function.""" + data = [[" before", "after "], [" both ", "none"]] + headers = ['h1', 'h2'] + expected = ([["' before'", "'after '"], ["' both '", "'none'"]], + ['h1', 'h2']) + + assert expected == quote_whitespaces(data, headers) + + +def test_quote_whitespaces_empty_result(): + """Test the *quote_whitespaces()* function with no results.""" + data = [] + headers = ['h1', 'h2'] + expected = ([], ['h1', 'h2']) + + assert expected == quote_whitespaces(data, headers) + + +def test_tabulate_wrapper(): + """Test the *output_formatter.tabulate_wrapper()* function.""" + data = [['abc', 1], ['d', 456]] + headers = ['letters', 'number'] + output = tabulate_adapter.adapter(data, headers, table_format='psql') + assert output == dedent('''\ + +-----------+----------+ + | letters | number | + |-----------+----------| + | abc | 1 | + | d | 456 | + +-----------+----------+''') + + +def test_csv_wrapper(): + """Test the *output_formatter.csv_wrapper()* function.""" + # Test comma-delimited output. + data = [['abc', 1], ['d', 456]] + headers = ['letters', 'number'] + output = delimited_output_adapter.adapter(data, headers) + assert output == dedent('''\ + letters,number\r\n\ + abc,1\r\n\ + d,456\r\n''') + + # Test tab-delimited output. + data = [['abc', 1], ['d', 456]] + headers = ['letters', 'number'] + output = delimited_output_adapter.adapter( + data, headers, table_format='tsv') + assert output == dedent('''\ + letters\tnumber\r\n\ + abc\t1\r\n\ + d\t456\r\n''') + + +def test_terminal_tables_wrapper(): + """Test the *output_formatter.terminal_tables_wrapper()* function.""" + data = [['abc', 1], ['d', 456]] + headers = ['letters', 'number'] + output = terminaltables_adapter.adapter( + data, headers, table_format='ascii') + assert output == dedent('''\ + +---------+--------+ + | letters | number | + +---------+--------+ + | abc | 1 | + | d | 456 | + +---------+--------+''') + + +def test_output_formatter(): + """Test the *output_formatter.OutputFormatter* class.""" + data = [['abc', Decimal(1)], ['defg', Decimal('11.1')], + ['hi', Decimal('1.1')]] + headers = ['text', 'numeric'] + expected = dedent('''\ + +------+---------+ + | text | numeric | + +------+---------+ + | abc | 1 | + | defg | 11.1 | + | hi | 1.1 | + +------+---------+''') + + assert expected == OutputFormatter().format_output(data, headers, + format_name='ascii') diff --git a/tests/test_sqlexecute.py b/tests/test_sqlexecute.py index e9929a70..a9b5fbec 100644 --- a/tests/test_sqlexecute.py +++ b/tests/test_sqlexecute.py @@ -15,7 +15,7 @@ def test_conn(executor): assert results == dedent("""\ +-----+ | a | - |-----| + +-----+ | abc | +-----+ 1 row in set""") @@ -26,11 +26,11 @@ def test_bools(executor): run(executor, '''insert into test values(True)''') results = run(executor, '''select * from test''', join=True) assert results == dedent("""\ - +-----+ - | a | - |-----| - | 1 | - +-----+ + +---+ + | a | + +---+ + | 1 | + +---+ 1 row in set""") @dbtest @@ -41,7 +41,7 @@ def test_binary(executor): assert results == dedent("""\ +----------------------------------------------------------------------------------------------+ | geom | - |----------------------------------------------------------------------------------------------| + +----------------------------------------------------------------------------------------------+ | 0x00000000010200000002000000397f130a11185d4034f44f70b1de43400000000000185d40423ee8d9acde4340 | +----------------------------------------------------------------------------------------------+ 1 row in set""") @@ -140,7 +140,7 @@ def test_favorite_query(executor): > select * from test where a like 'a%' +-----+ | a | - |-----| + +-----+ | abc | +-----+""") @@ -163,13 +163,13 @@ def test_favorite_query_multiple_statement(executor): > select * from test where a like 'a%' +-----+ | a | - |-----| + +-----+ | abc | +-----+ > select * from test where a like 'd%' +-----+ | a | - |-----| + +-----+ | def | +-----+""") @@ -261,13 +261,13 @@ def test_favorite_query_multiline_statement(executor): > select * from test where a like 'a%' +-----+ | a | - |-----| + +-----+ | abc | +-----+ > select * from test where a like 'd%' +-----+ | a | - |-----| + +-----+ | def | +-----+""") @@ -282,7 +282,7 @@ def test_timestamp_null(executor): assert results == dedent("""\ +---------------------+ | a | - |---------------------| + +---------------------+ | 0000-00-00 00:00:00 | +---------------------+ 1 row in set""") @@ -295,7 +295,7 @@ def test_datetime_null(executor): assert results == dedent("""\ +---------------------+ | a | - |---------------------| + +---------------------+ | 0000-00-00 00:00:00 | +---------------------+ 1 row in set""") @@ -308,7 +308,7 @@ def test_date_null(executor): assert results == dedent("""\ +------------+ | a | - |------------| + +------------+ | 0000-00-00 | +------------+ 1 row in set""") @@ -321,7 +321,7 @@ def test_time_null(executor): assert results == dedent("""\ +----------+ | a | - |----------| + +----------+ | 00:00:00 | +----------+ 1 row in set""") diff --git a/tests/test_tabulate.py b/tests/test_tabulate.py index e0ddc407..ae7c25ce 100644 --- a/tests/test_tabulate.py +++ b/tests/test_tabulate.py @@ -1,22 +1,17 @@ -from mycli.packages.tabulate import tabulate from textwrap import dedent +from mycli.packages import tabulate + +tabulate.PRESERVE_WHITESPACE = True + def test_dont_strip_leading_whitespace(): data = [[' abc']] headers = ['xyz'] - tbl, _ = tabulate(data, headers, tablefmt='psql') + tbl = tabulate.tabulate(data, headers, tablefmt='psql') assert tbl == dedent(''' +---------+ | xyz | |---------| | abc | +---------+ ''').strip() -def test_dont_add_whitespace(): - data = [[3, 4]] - headers = ['1', '2'] - tbl, _ = tabulate(data, headers, tablefmt='tsv') - assert tbl == dedent(''' - 1\t2 - 3\t4 - ''').strip() diff --git a/tests/utils.py b/tests/utils.py index c76cb7ad..b29e5e07 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,10 @@ -import pytest -import pymysql -from mycli.main import format_output, special from os import getenv +import pymysql +import pytest + +from mycli.main import MyCli, special + PASSWORD = getenv('PYTEST_PASSWORD') USER = getenv('PYTEST_USER', 'root') HOST = getenv('PYTEST_HOST', 'localhost') @@ -37,8 +39,14 @@ def create_db(dbname): def run(executor, sql, join=False): " Return string output for the sql to be run " result = [] + + # TODO: this needs to go away. `run()` should not test formatted output. + # It should test raw results. + mycli = MyCli() for title, rows, headers, status in executor.run(sql): - result.extend(format_output(title, rows, headers, status, 'psql', special.is_expanded_output())) + result.extend(mycli.format_output(title, rows, headers, status, + special.is_expanded_output())) + if join: result = '\n'.join(result) return result