From a055c6dc1e3e80faadde90938d9c67d02c1b5608 Mon Sep 17 00:00:00 2001 From: Colin Zuo Date: Sat, 31 Jan 2026 08:49:31 +0800 Subject: [PATCH 1/4] add CLAUDE.md --- .gitignore | 2 ++ CLAUDE.md | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 CLAUDE.md diff --git a/.gitignore b/.gitignore index db4561ea..e031ebbe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +*.local.* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..027cf071 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,96 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +pyhocon is a Python implementation of the HOCON (Human-Optimized Config Object Notation) parser. It parses HOCON configuration files into Python data structures and can convert to JSON, YAML, properties, and HOCON formats. + +HOCON spec: https://github.com/typesafehub/config/blob/master/HOCON.md + +## Commands + +### Testing +```bash +# Run all tests +pytest tests/ + +# Run a specific test file +pytest tests/test_config_parser.py + +# Run a specific test +pytest tests/test_config_parser.py::TestConfigParser::test_parse_simple_value + +# Run with coverage +coverage run --source=pyhocon -m pytest tests/ +coverage report -m +``` + +### Linting +```bash +flake8 pyhocon tests setup.py +``` + +### Tox (multi-environment testing) +```bash +tox # Run all environments +tox -e flake8 # Run flake8 only +tox -e py312 # Run tests on Python 3.12 +``` + +### CLI Tool +```bash +# Convert HOCON to JSON +pyhocon -i input.conf -f json -o output.json +cat input.conf | pyhocon -f json + +# Other formats: json, yaml, properties, hocon +# Use -c for compact output (nested single-value dicts as a.b.c = 1) +``` + +## Architecture + +### Core Modules + +- **config_parser.py** - Main parsing engine using pyparsing library + - `ConfigFactory` - Public API for parsing (parse_file, parse_string, parse_URL, from_dict) + - `ConfigParser` - Internal parser with HOCON grammar rules + +- **config_tree.py** - Data structures + - `ConfigTree` - Hierarchical config storage (extends OrderedDict), supports dot notation access (`config['a.b.c']`) + - `ConfigList` - HOCON arrays + - `ConfigValues` - Concatenated values (handles array/string/dict merging) + - `ConfigSubstitution` - Represents `${var}` and `${?var}` substitutions + +- **converter.py** - `HOCONConverter` with to_json, to_yaml, to_properties, to_hocon methods + +- **period_parser.py / period_serializer.py** - Duration parsing (e.g., "5 days", "10 seconds") + +- **tool.py** - CLI entry point + +### Parsing Flow + +1. `ConfigFactory.parse_*()` receives input +2. `ConfigParser.parse()` applies pyparsing grammar rules +3. Produces `ConfigTree`/`ConfigList` with unresolved `ConfigSubstitution` tokens +4. `resolve_substitutions()` replaces `${var}` references from config or environment variables +5. Returns resolved `ConfigTree` + +### Key Features + +- Substitutions: `${key}` (required) and `${?key}` (optional, fallback to env vars) +- Includes: `include "file.conf"`, `include url("http://...")`, `include required(file("..."))`, glob patterns +- Value access: `config['a.b.c']` or `config['a']['b']['c']` or `config.get_string('a.b.c')` +- Type-safe getters: `get_string()`, `get_int()`, `get_float()`, `get_bool()`, `get_list()`, `get_config()` + +## Dependencies + +- **pyparsing** (>=2, <4) - Grammar parsing +- **python-dateutil** (>=2.8.0, optional) - For months/years in duration parsing + +## Test Dependencies + +- pytest +- mock +- python-dateutil +- coveralls (for CI coverage reporting) From e6ee294335cb933f3da1a5331fce42fd62de9171 Mon Sep 17 00:00:00 2001 From: Colin Zuo Date: Sat, 31 Jan 2026 09:03:54 +0800 Subject: [PATCH 2/4] add test install support --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7127c77b..f0397923 100755 --- a/setup.py +++ b/setup.py @@ -57,7 +57,8 @@ def run_tests(self): 'pyparsing>=2,<4;python_version>="3.0"', ], extras_require={ - 'Duration': ['python-dateutil>=2.8.0'] + 'Duration': ['python-dateutil>=2.8.0'], + 'test': ['pytest', 'mock==3.0.5'] }, tests_require=['pytest', 'mock==3.0.5'], entry_points={ From 9e93f7984c2d69276651d882d1cbb6d67a662b57 Mon Sep 17 00:00:00 2001 From: Colin Zuo Date: Sun, 1 Feb 2026 21:23:18 +0800 Subject: [PATCH 3/4] bump pyparsing to 3.0, then remove fix that's not needed anymore --- pyhocon/config_parser.py | 14 -------------- setup.py | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/pyhocon/config_parser.py b/pyhocon/config_parser.py index 4936f1ee..7044f451 100644 --- a/pyhocon/config_parser.py +++ b/pyhocon/config_parser.py @@ -7,7 +7,6 @@ import socket import sys -import pyparsing from pyparsing import (Forward, Group, Keyword, Literal, Optional, ParserElement, ParseSyntaxException, QuotedString, Regex, SkipTo, StringEnd, Suppress, TokenConverter, @@ -16,19 +15,6 @@ from pyhocon.period_parser import get_period_expr -# Fix deepcopy issue with pyparsing -if sys.version_info >= (3, 8): - def fixed_get_attr(self, item): - if item == '__deepcopy__': - raise AttributeError(item) - try: - return self[item] - except KeyError: - return "" - - - pyparsing.ParseResults.__getattr__ = fixed_get_attr - from pyhocon.config_tree import (ConfigInclude, ConfigList, ConfigQuotedString, ConfigSubstitution, ConfigTree, ConfigUnquotedString, ConfigValues, NoneValue) diff --git a/setup.py b/setup.py index f0397923..8445148d 100755 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def run_tests(self): ], install_requires=[ 'pyparsing~=2.0;python_version<"3.0"', - 'pyparsing>=2,<4;python_version>="3.0"', + 'pyparsing>=3,<4;python_version>="3.0"', ], extras_require={ 'Duration': ['python-dateutil>=2.8.0'], From 67792a726a685016758d168b0ec4e24aa8083b2f Mon Sep 17 00:00:00 2001 From: Colin Zuo Date: Tue, 3 Feb 2026 20:15:10 +0800 Subject: [PATCH 4/4] replace deprecated function with new ones --- pyhocon/config_parser.py | 34 +++++++++++++++++----------------- pyhocon/period_parser.py | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pyhocon/config_parser.py b/pyhocon/config_parser.py index 7044f451..ec60afe1 100644 --- a/pyhocon/config_parser.py +++ b/pyhocon/config_parser.py @@ -11,7 +11,7 @@ ParserElement, ParseSyntaxException, QuotedString, Regex, SkipTo, StringEnd, Suppress, TokenConverter, Word, ZeroOrMore, alphanums, alphas8bit, col, lineno, - replaceWith) + replace_with) from pyhocon.period_parser import get_period_expr @@ -128,7 +128,7 @@ def parse_file(cls, filename, encoding='utf-8', required=True, resolve=True, unr except IOError as e: if required: raise e - logger.warn('Cannot include file %s. File does not exist or cannot be read.', filename) + logger.warning('Cannot include file %s. File does not exist or cannot be read.', filename) return [] @classmethod @@ -153,7 +153,7 @@ def parse_URL(cls, url, timeout=None, resolve=True, required=False, unresolved_v content = fd.read() if use_urllib2 else fd.read().decode('utf-8') return cls.parse_string(content, os.path.dirname(url), resolve, unresolved_value) except (HTTPError, URLError) as e: - logger.warn('Cannot include url %s. Resource is inaccessible.', url) + logger.warning('Cannot include url %s. Resource is inaccessible.', url) if required: raise e else: @@ -362,17 +362,17 @@ def _merge(a, b): @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS - ParserElement.setDefaultWhitespaceChars(' \t') + ParserElement.set_default_whitespace_chars(' \t') yield - ParserElement.setDefaultWhitespaceChars(default) + ParserElement.set_default_whitespace_chars(default) with set_default_white_spaces(): assign_expr = Forward() - true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) - false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) - null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) - key = QuotedString('"""', escChar='\\', unquoteResults=False) | \ - QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') + true_expr = Keyword("true", caseless=True).set_parse_action(replace_with(True)) + false_expr = Keyword("false", caseless=True).set_parse_action(replace_with(False)) + null_expr = Keyword("null", caseless=True).set_parse_action(replace_with(NoneValue())) + key = QuotedString('"""', esc_char='\\', unquote_results=False) | \ + QuotedString('"', esc_char='\\', unquote_results=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() @@ -380,20 +380,20 @@ def set_default_white_spaces(): comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', - re.DOTALL).setParseAction(convert_number) + re.DOTALL).set_parse_action(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 - multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) + multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).set_parse_action(parse_multi_string) # single quoted line string - quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) + quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).set_parse_action(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n - unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction( + unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).set_parse_action( unescape_string) - substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) + substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').set_parse_action(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = get_period_expr() | number_expr | true_expr | false_expr | null_expr | string_expr @@ -408,7 +408,7 @@ def set_default_white_spaces(): Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) - ).setParseAction(include_config) + ).set_parse_action(include_config) root_dict_expr = Forward() dict_expr = Forward() @@ -437,7 +437,7 @@ def set_default_white_spaces(): config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) - config = config_expr.parseString(content, parseAll=True)[0] + config = config_expr.parse_string(content, parse_all=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION \ diff --git a/pyhocon/period_parser.py b/pyhocon/period_parser.py index efa7a480..fc25f1a2 100644 --- a/pyhocon/period_parser.py +++ b/pyhocon/period_parser.py @@ -64,8 +64,8 @@ def get_period_expr(): return Combine( Word(nums)('value') + ZeroOrMore(Literal(" ")).suppress() + Or(period_types)('unit') + WordEnd( alphanums).suppress() - ).setParseAction(convert_period) + ).set_parse_action(convert_period) def parse_period(content): - return get_period_expr().parseString(content, parseAll=True)[0] + return get_period_expr().parse_string(content, parse_all=True)[0]