Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dist/
llmsql_workdir

evaluation_*
coverage.xml
2 changes: 1 addition & 1 deletion examples/inference_transformers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "llmsql-benchmark-3.11",
"display_name": "llmsql-benchmark-3.11 (3.11.13)",
"language": "python",
"name": "python3"
},
Expand Down
85 changes: 85 additions & 0 deletions examples/inference_transformers_version_1.0.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e71979fc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/pihul/Desktop/gdrive/Projects/llmsql-benchmark/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model from: EleutherAI/pythia-14m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"`torch_dtype` is deprecated! Use `dtype` instead!\n",
"2026-02-23 09:00:00,424 [INFO] llmsql-bench: Removing existing path: llmsql_workdir/questions.jsonl\n",
"2026-02-23 09:00:00,426 [INFO] llmsql-bench: Downloading questions.jsonl from Hugging Face Hub...\n",
"2026-02-23 09:00:01,433 [INFO] llmsql-bench: Downloaded questions.jsonl to: llmsql_workdir/questions.jsonl\n",
"2026-02-23 09:00:01,434 [INFO] llmsql-bench: Removing existing path: llmsql_workdir/tables.jsonl\n",
"2026-02-23 09:00:01,436 [INFO] llmsql-bench: Downloading tables.jsonl from Hugging Face Hub...\n",
"2026-02-23 09:00:02,117 [INFO] llmsql-bench: Downloaded tables.jsonl to: llmsql_workdir/tables.jsonl\n",
"2026-02-23 09:00:03,373 [INFO] llmsql-bench: Limiting evaluation to first 100 questions out of 80330\n",
"2026-02-23 09:00:03,383 [INFO] llmsql-bench: Writing results to test_output.jsonl\n",
"2026-02-23 09:00:03,384 [INFO] llmsql-bench: Using 5-shot prompt builder: build_prompt_5shot\n",
"Generating: 0%| | 0/4 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
"The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n",
"2026-02-23 09:00:10,474 [INFO] llmsql-bench: Saved batch 1: 25/100\n",
"Generating: 25%|██▌ | 1/4 [00:07<00:21, 7.09s/it]2026-02-23 09:00:19,324 [INFO] llmsql-bench: Saved batch 2: 50/100\n",
"Generating: 50%|█████ | 2/4 [00:15<00:16, 8.12s/it]2026-02-23 09:00:28,513 [INFO] llmsql-bench: Saved batch 3: 75/100\n",
"Generating: 75%|███████▌ | 3/4 [00:25<00:08, 8.61s/it]2026-02-23 09:00:37,861 [INFO] llmsql-bench: Saved batch 4: 100/100\n",
"Generating: 100%|██████████| 4/4 [00:34<00:00, 8.62s/it]\n",
"2026-02-23 09:00:37,864 [INFO] llmsql-bench: Generation complete — total: 100\n"
]
}
],
"source": [
"from llmsql import inference_transformers\n",
"\n",
"# Example 1: Basic usage (same as before)\n",
"results = inference_transformers(\n",
" model_or_model_name_or_path=\"EleutherAI/pythia-14m\",\n",
" output_file=\"test_output.jsonl\",\n",
" batch_size=25,\n",
" do_sample=False,\n",
" limit=100,\n",
" version=\"1.0\"\n",
" )\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llmsql-benchmark-3.11 (3.11.13)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
154 changes: 5 additions & 149 deletions llmsql/__main__.py
Original file line number Diff line number Diff line change
@@ -1,155 +1,11 @@
import argparse
import inspect
import json
from llmsql._cli import ParserCLI


def main() -> None:
parser = argparse.ArgumentParser(prog="llmsql", description="LLMSQL CLI")
subparsers = parser.add_subparsers(dest="command")

# ================================================================
# Inference command
# ================================================================
inference_examples = r"""
Examples:

# 1️⃣ Run inference with Transformers backend
llmsql inference --method transformers \
--model-or-model-name-or-path Qwen/Qwen2.5-1.5B-Instruct \
--output-file outputs/preds_transformers.jsonl \
--batch-size 8 \
--num-fewshots 5

# 2️⃣ Run inference with vLLM backend
llmsql inference --method vllm \
--model-name Qwen/Qwen2.5-1.5B-Instruct \
--output-file outputs/preds_vllm.jsonl \
--batch-size 8 \
--num-fewshots 5

# 3️⃣ Pass model-specific kwargs (for Transformers)
llmsql inference --method transformers \
--model-or-model-name-or-path meta-llama/Llama-3-8b-instruct \
--output-file outputs/llama_preds.jsonl \
--model-kwargs '{"attn_implementation": "flash_attention_2", "torch_dtype": "bfloat16"}'

# 4️⃣ Pass LLM init kwargs (for vLLM)
llmsql inference --method vllm \
--model-name mistralai/Mixtral-8x7B-Instruct-v0.1 \
--output-file outputs/mixtral_preds.jsonl \
--llm-kwargs '{"max_model_len": 4096, "gpu_memory_utilization": 0.9}'

# 5️⃣ Override generation parameters dynamically
llmsql inference --method transformers \
--model-or-model-name-or-path Qwen/Qwen2.5-1.5B-Instruct \
--output-file outputs/temp_0.9.jsonl \
--temperature 0.9 \
--generation-kwargs '{"do_sample": true, "top_p": 0.9, "top_k": 40}'
"""

inf_parser = subparsers.add_parser(
"inference",
help="Run inference using either Transformers or vLLM backend.",
description="Run SQL generation using a chosen inference method "
"(either 'transformers' or 'vllm').",
epilog=inference_examples,
formatter_class=argparse.RawTextHelpFormatter,
)

inf_parser.add_argument(
"--method",
type=str,
required=True,
choices=["transformers", "vllm"],
help="Inference backend to use ('transformers' or 'vllm').",
)

# ================================================================
# Parse CLI
# ================================================================
args, extra = parser.parse_known_args()

# ------------------------------------------------
# Inference
# ------------------------------------------------
if args.command == "inference":
if args.method == "vllm":
from llmsql import inference_vllm as inference_fn
elif args.method == "transformers":
from llmsql import inference_transformers as inference_fn # type: ignore
else:
raise ValueError(f"Unknown inference method: {args.method}")

# Dynamically create parser from the function signature
fn_parser = argparse.ArgumentParser(
prog=f"llmsql inference --method {args.method}",
description=f"Run inference using {args.method} backend",
)

sig = inspect.signature(inference_fn)
for name, param in sig.parameters.items():
if param.kind == inspect.Parameter.VAR_KEYWORD:
fn_parser.add_argument(
"--llm-kwargs",
default="{}",
help="Additional LLM kwargs as a JSON string, e.g. '{\"top_p\": 0.9}'",
)
fn_parser.add_argument(
"--generate-kwargs",
default="{}",
help="",
)
continue
arg_name = f"--{name.replace('_', '-')}"
default = param.default
if default is inspect.Parameter.empty:
fn_parser.add_argument(arg_name, required=True)
else:
if isinstance(default, bool):
fn_parser.add_argument(
arg_name,
action="store_true" if not default else "store_false",
help=f"(default: {default})",
)
elif default is None:
fn_parser.add_argument(arg_name, type=str, default=None)
else:
fn_parser.add_argument(
arg_name, type=type(default), default=default
)

fn_args = fn_parser.parse_args(extra)
fn_kwargs = vars(fn_args)

if "llm_kwargs" in fn_kwargs and isinstance(fn_kwargs["llm_kwargs"], str):
try:
fn_kwargs["llm_kwargs"] = json.loads(fn_kwargs["llm_kwargs"])
except json.JSONDecodeError:
print("⚠️ Could not parse --llm-kwargs JSON, passing as string.")

if fn_kwargs.get("model_kwargs") is not None:
try:
fn_kwargs["model_kwargs"] = json.loads(fn_kwargs["model_kwargs"])
except json.JSONDecodeError:
raise

if fn_kwargs.get("generation_kwargs") is not None:
try:
fn_kwargs["generation_kwargs"] = json.loads(
fn_kwargs["generation_kwargs"]
)
except json.JSONDecodeError:
raise

print(f"🔹 Running {args.method} inference with arguments:")
for k, v in fn_kwargs.items():
print(f" {k}: {v}")

results = inference_fn(**fn_kwargs)
print(f"✅ Inference complete. Generated {len(results)} results.")

else:
parser.print_help()
"""Main CLI entry point."""
parser = ParserCLI()
args = parser.parse_args()
parser.execute(args)


if __name__ == "__main__":
Expand Down
7 changes: 7 additions & 0 deletions llmsql/_cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
CLI subcommands to run from the terminal.
"""

from .llmsql_cli import ParserCLI

__all__ = ["ParserCLI"]
Empty file added llmsql/_cli/evaluate.py
Empty file.
Loading