diff --git a/.github/workflows/rich_codex.yml b/.github/workflows/rich_codex.yml index 3686def..ad75db9 100644 --- a/.github/workflows/rich_codex.yml +++ b/.github/workflows/rich_codex.yml @@ -14,6 +14,7 @@ jobs: runs-on: ubuntu-latest env: FORCE_COLOR: "true" + steps: - name: Check out the repo uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 037c0e0..6e0f0ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Mash sketch files are now validated using MD5 checksum to ensure integrity +- Added `--table-path` option to `search-pangenomes` to save TSV output to a file +- Added `--no-table` flag to disable table output in `search-pangenomes` +- `list-collections` now automatically detects terminal vs redirected output: displays formatted table in interactive terminals, plain TSV when redirected to files or pipes + +### Changed + +- `search-pangenomes` table output now defaults to stdout instead of a file (previous default was `pangenomes_information.tsv`) ## [0.1.1] - 2025-11-09 diff --git a/README.md b/README.md index 0a62f04..1ec84af 100644 --- a/README.md +++ b/README.md @@ -107,10 +107,10 @@ pangbank search-pangenomes --help pangbank list-collections ``` -Displays all pangenome collections available in PanGBank, along with their description and the number of pangenomes they contain. +Displays the list all pangenome collections available in PanGBank, along with their description and the number of pangenomes they contain. + +Output is formatted as a rich table in the terminal, or as plain TSV when redirected (e.g., `pangbank list-collections > collections.tsv`). - -![`TERMINAL_WIDTH=110 pangbank list-collections`](docs/img/pangbank-list-collections.svg) ### Search for pangenomes @@ -118,10 +118,12 @@ Displays all pangenome collections available in PanGBank, along with their descr pangbank search-pangenomes --taxon "g__Escherichia" ``` -Searches PanGBank for pangenomes matching the given taxon. -Results are saved as a **TSV file** named 'pangenomes_information.tsv' by default containing summary metrics for the matching pangenomes. +![`pangbank search-pangenomes --taxon "g__Escherichia" --no-progress --table-path pangenome_information.tsv`](docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg) + + +This command searches PanGBank for pangenomes matching the given taxon. +Results are printed to **stdout** as plain TSV by default (suitable for piping or redirection). Use `--table-path ` to save directly to a file (e.g., `--table-path pangenomes_information.tsv`), or `--no-table` to disable table output. -![`pangbank search-pangenomes --taxon "g__Escherichia" --no-progress`](docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg) ### Download pangenomes @@ -132,10 +134,11 @@ pangbank search-pangenomes --taxon "g__Chlamydia" \ --download ``` -Searches for **Chlamydia** pangenomes in the `GTDB_refseq` collection, then downloads the corresponding pangenome files into `Chlamydia_pangenomes/`. - ![`pangbank search-pangenomes --taxon "g__Chlamydia" --collection GTDB_refseq --outdir Chlamydia_pangenomes/ --download --no-progress`](docs/img/pangbank-search-pangenomes_taxon_Chlamydia_download.svg) +This command searches for **Chlamydia** pangenomes in the `GTDB_refseq` collection, then downloads the corresponding pangenome files into `Chlamydia_pangenomes/`. + + ### Match a genome to an existing pangenome ```bash diff --git a/docs/img/pangbank-list-collections.svg b/docs/img/pangbank-list-collections.svg deleted file mode 100644 index a393007..0000000 --- a/docs/img/pangbank-list-collections.svg +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [13:45:39]INFO     Found 2 collections in PanGBank.                 main.py:174 - -Available collections of PanGBank:                                                                             -┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -Collection Description               Latest releaseRelease dateTaxonomy     Pangenome Count -┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -GTDB_refseqA collection of pangenomes1.0.0         10 Jul 2025 GTDB:10-RS2261770            -built from GTDB species,   -each represented by at     -least 15 genomes from      -RefSeq.                    -├─────────────┼────────────────────────────┼────────────────┼──────────────┼───────────────┼─────────────────┤ -GTDB_all   A collection of pangenomes1.0.0         05 Aug 2025 GTDB:10-RS2264351            -built from GTDB species,   -each represented by at     -least 15 genomes from      -RefSeq or GenBank.         -└─────────────┴────────────────────────────┴────────────────┴──────────────┴───────────────┴─────────────────┘ - - - - diff --git a/docs/img/pangbank-search-pangenomes_help.svg b/docs/img/pangbank-search-pangenomes_help.svg index d9deade..767be94 100644 --- a/docs/img/pangbank-search-pangenomes_help.svg +++ b/docs/img/pangbank-search-pangenomes_help.svg @@ -1,4 +1,4 @@ - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + - + - + - - $ pangbank search-pangenomes --help - -Usage: pangbank search-pangenomes [OPTIONS] - - Search for pangenomes.                                                          - -╭─ Options ────────────────────────────────────────────────────────────────────╮ ---help-h        Show this message and exit.                                 -╰──────────────────────────────────────────────────────────────────────────────╯ -╭─ Search filters ─────────────────────────────────────────────────────────────╮ ---collection-cTEXT  Filter pangenomes by collection  -                                              name (e.g. 'GTDB_refseq').       ---taxon-tTEXT  Filter pangenomes by taxon name  -                                              (e.g. 'Escherichia').            ---genome-gTEXT  Filter pangenomes by genome      -                                              assembly identifier (e.g.        -                                              'GCF_000354175.2').              ---exact-match--no-exact-match  Use exact string matching        -                                              instead of partial matches.      -[default: no-exact-match]       -╰──────────────────────────────────────────────────────────────────────────────╯ -╭─ Output and downloads ───────────────────────────────────────────────────────╮ ---download--no-download  Download HDF5 pangenome files.           -[default: no-download]         ---outdirPATH  Output directory for downloaded          -                                      pangenomes.                              -[default: pangbank]                     ---details--no-details  Display summary information for each     -                                      matching pangenome.                      -[default: no-details]                   ---tablePATH  Save a TSV table summarizing the         -                                      matching pangenomes. Use '-' to print    -                                      the table to stdout.                     -[default: pangenomes_information.tsv]   -╰──────────────────────────────────────────────────────────────────────────────╯ -╭─ Execution settings ─────────────────────────────────────────────────────────╮ ---api-urlVALIDATE_API_URL  URL of the PanGBank API.     -[env var: PANGBANK_API_URL] -[default:                   -https://pangbank-api.genos… ---verbose  Enable verbose logging.      ---progress--no-progress  Show progress bar while      -                                                  fetching pangenomes          -                                                  (disable with                ---no-progress).              -[default: progress]         -╰──────────────────────────────────────────────────────────────────────────────╯ - + + $ pangbank search-pangenomes --help + +Usage: pangbank search-pangenomes [OPTIONS] + + Search for pangenomes.                                                          + +╭─ Options ────────────────────────────────────────────────────────────────────╮ +--help-h        Show this message and exit.                                 +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Search filters ─────────────────────────────────────────────────────────────╮ +--collection-cTEXT  Filter pangenomes by collection  +                                              name (e.g. 'GTDB_refseq').       +--taxon-tTEXT  Filter pangenomes by taxon name  +                                              (e.g. 'Escherichia').            +--genome-gTEXT  Filter pangenomes by genome      +                                              assembly identifier (e.g.        +                                              'GCF_000354175.2').              +--exact-match--no-exact-match  Use exact string matching        +                                              instead of partial matches.      +[default: no-exact-match]       +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Output and downloads ───────────────────────────────────────────────────────╮ +--download--no-download  Download HDF5 pangenome files.         +[default: no-download]         +--outdirPATH  Output directory for downloaded        +                                        pangenomes.                            +[default: pangbank]                   +--details--no-details  Display summary information for each   +                                        matching pangenome.                    +[default: no-details]                 +--table--no-table  Output a TSV table summarizing the     +                                        matching pangenomes to stdout.         +[default: table]                      +--table-pathPATH  Save TSV table to a file instead of    +                                        stdout (e.g.,                          +                                        pangenomes_information.tsv). Implies   +--table.                               +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Execution settings ─────────────────────────────────────────────────────────╮ +--api-urlVALIDATE_API_URL  URL of the PanGBank API.     +[env var: PANGBANK_API_URL] +[default:                   +https://pangbank-api.genos… +--verbose  Enable verbose logging.      +--progress--no-progress  Show progress bar while      +                                                  fetching pangenomes          +                                                  (disable with                +--no-progress).              +[default: progress]         +╰──────────────────────────────────────────────────────────────────────────────╯ + diff --git a/docs/img/pangbank-search-pangenomes_taxon_Chlamydia_download.svg b/docs/img/pangbank-search-pangenomes_taxon_Chlamydia_download.svg index 6824f1d..b071a93 100644 --- a/docs/img/pangbank-search-pangenomes_taxon_Chlamydia_download.svg +++ b/docs/img/pangbank-search-pangenomes_taxon_Chlamydia_download.svg @@ -19,77 +19,77 @@ font-weight: 700; } - .terminal-4280735415-matrix { + .terminal-937873522-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-4280735415-title { + .terminal-937873522-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-4280735415-r1 { fill: #c5c8c6 } -.terminal-4280735415-r2 { fill: #4e707b } -.terminal-4280735415-r3 { fill: #608ab1 } -.terminal-4280735415-r4 { fill: #868887 } -.terminal-4280735415-r5 { fill: #d0b344 } -.terminal-4280735415-r6 { fill: #98729f } -.terminal-4280735415-r7 { fill: #00823d;font-style: italic; } -.terminal-4280735415-r8 { fill: #68a0b3;font-weight: bold } -.terminal-4280735415-r9 { fill: #98a84b } + .terminal-937873522-r1 { fill: #c5c8c6 } +.terminal-937873522-r2 { fill: #4e707b } +.terminal-937873522-r3 { fill: #608ab1 } +.terminal-937873522-r4 { fill: #868887 } +.terminal-937873522-r5 { fill: #d0b344 } +.terminal-937873522-r6 { fill: #98729f } +.terminal-937873522-r7 { fill: #00823d;font-style: italic; } +.terminal-937873522-r8 { fill: #68a0b3;font-weight: bold } +.terminal-937873522-r9 { fill: #98a84b } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -101,25 +101,25 @@ - + - - $ pangbank search-pangenomes --taxon "g__Chlamydia" --collection GTDB_refseq  ---outdir Chlamydia_pangenomes/ --download --no-progress -[13:45:44]INFO     Counting pangenomes for                    pangenomes.py:103 -taxon_name=g__Chlamydia &                   -substring_taxon_match=True &                -only_latest_release=True &                  -collection_name=GTDB_refseq -[13:45:45]INFO     Found 3 pangenomes matching search         pangenomes.py:112 -         criteria.                                   -INFO     Fetching information for the 3 pangenomes. pangenomes.py:114 -[13:45:46]INFO     The 3 pangenomes matching search criteria  pangenomes.py:152 -         are from 1 collection : 'GTDB_refseq' -INFO     Saving pangenomes information as TSV table to    main.py:289 -         file: pangenomes_information.tsv                  -INFO     Downloading 3 pangenome files to           pangenomes.py:480 -'Chlamydia_pangenomes/' + + $ pangbank search-pangenomes --taxon "g__Chlamydia" --collection GTDB_refseq  +--outdir Chlamydia_pangenomes/ --download --no-progress +[16:06:35]INFO     Counting pangenomes for                    pangenomes.py:103 +taxon_name=g__Chlamydia &                   +substring_taxon_match=True &                +only_latest_release=True &                  +collection_name=GTDB_refseq +[16:06:36]INFO     Found 3 pangenomes matching search         pangenomes.py:112 +         criteria.                                   +INFO     Fetching information for the 3 pangenomes. pangenomes.py:114 +[16:06:37]INFO     The 3 pangenomes matching search criteria  pangenomes.py:152 +         are from 1 collection : 'GTDB_refseq' +INFO     Printing pangenomes information as TSV table to  main.py:306 +         stdout                                            +INFO     Downloading 3 pangenome files to           pangenomes.py:480 +'Chlamydia_pangenomes/' diff --git a/docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg b/docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg index b112f52..2a80a16 100644 --- a/docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg +++ b/docs/img/pangbank-search-pangenomes_taxon_Escherichia.svg @@ -1,4 +1,4 @@ - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + - + - + - - $ pangbank search-pangenomes --taxon "g__Escherichia" --no-progress -[13:45:41]INFO     Counting pangenomes for                    pangenomes.py:103 -taxon_name=g__Escherichia &                 -substring_taxon_match=True &                -only_latest_release=True -[13:45:42]INFO     Found 14 pangenomes matching search        pangenomes.py:112 -         criteria.                                   -INFO     Fetching information for the 14pangenomes.py:114 -         pangenomes.                                 -[13:45:43]INFO     The 14 pangenomes matching search criteria pangenomes.py:152 -         are from 2 collections : 'GTDB_refseq',     -'GTDB_all' -INFO     Saving pangenomes information as TSV table to    main.py:289 -         file: pangenomes_information.tsv                  + + $ pangbank search-pangenomes --taxon "g__Escherichia" --no-progress --table-path +pangenome_information.tsv +[16:06:32]INFO     Counting pangenomes for                    pangenomes.py:103 +taxon_name=g__Escherichia &                 +substring_taxon_match=True &                +only_latest_release=True +[16:06:33]INFO     Found 14 pangenomes matching search        pangenomes.py:112 +         criteria.                                   +INFO     Fetching information for the 14pangenomes.py:114 +         pangenomes.                                 +[16:06:34]INFO     The 14 pangenomes matching search criteria pangenomes.py:152 +         are from 2 collections : 'GTDB_all',        +'GTDB_refseq' +INFO     Saving pangenomes information as TSV table to    main.py:301 +         file: pangenome_information.tsv                   diff --git a/pangbank_cli/main.py b/pangbank_cli/main.py index 6f84cd4..582eb74 100644 --- a/pangbank_cli/main.py +++ b/pangbank_cli/main.py @@ -174,7 +174,13 @@ def list_collections( logger.info(f"Found {len(collections)} collections in PanGBank.") df = format_collections_to_dataframe(collections) - print_dataframe_as_rich_table(df, title="Available collections of PanGBank:") + + # Use rich formatting if interactive terminal, plain TSV if redirected + if sys.stdout.isatty(): + print_dataframe_as_rich_table(df, title="Available collections of PanGBank:") + else: + df.to_csv(sys.stdout, index=False, sep="\t") + print_yaml = False if print_yaml: yaml_collections = format_collections_to_yaml(collections) @@ -234,17 +240,24 @@ def search_pangenomes( rich_help_panel="Output and downloads", ), ] = False, + table: Annotated[ + bool, + typer.Option( + help="Output a TSV table summarizing the matching pangenomes to stdout.", + rich_help_panel="Output and downloads", + ), + ] = True, table_path: Annotated[ - Path, + Optional[Path], typer.Option( - "--table", + "--table-path", help=( - "Save a TSV table summarizing the matching pangenomes. " - "Use '-' to print the table to stdout." + "Save TSV table to a file instead of stdout (e.g., pangenomes_information.tsv). " + "Implies --table." ), rich_help_panel="Output and downloads", ), - ] = Path("pangenomes_information.tsv"), + ] = None, # Execution settings api_url: HttpUrl = ApiUrlOption, verbose: bool = Verbose, @@ -282,14 +295,18 @@ def search_pangenomes( df = format_pangenomes_to_dataframe(pangenomes) - if str(table_path) == "-": - logger.info("Printing pangenomes information as TSV table to stdout") - output_handle: TextIO | Path = sys.stdout - else: - logger.info(f"Saving pangenomes information as TSV table to file: {table_path}") - output_handle: TextIO | Path = table_path + # Output table if enabled + if table or table_path is not None: + if table_path is not None: + logger.info( + f"Saving pangenomes information as TSV table to file: {table_path}" + ) + output_handle: TextIO | Path = table_path + else: + logger.info("Printing pangenomes information as TSV table to stdout") + output_handle: TextIO | Path = sys.stdout - df.to_csv(output_handle, index=False, sep="\t") + df.to_csv(output_handle, index=False, sep="\t") if details: display_pangenome_summary_by_collection(pangenomes, True) diff --git a/pangbank_cli/utils.py b/pangbank_cli/utils.py index 1f63d35..9935787 100644 --- a/pangbank_cli/utils.py +++ b/pangbank_cli/utils.py @@ -28,7 +28,7 @@ def print_dataframe_as_rich_table(df: pd.DataFrame, title: Optional[str] = None) except ValueError: pass # If not a valid integer, keep None - console = Console(stderr=True, width=terminal_width) + console = Console(width=terminal_width) table = Table( title=title, show_header=True,