LLMSQL · DzmitryPihulski · Feb 24, 2026
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: meta-llama/Llama-3.2-1B-Instruct
+  revision: main
+  commit_hash: 9213176726f574b556790deb65791e0c5aa438b6
+  parameter_count: 1B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.2678
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Llama-3.2-1B-Instruct/5fewshots/Llama-3.2-1B-Instruct_outputs.jsonl
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)