diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 216116f..bdae539 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -17,12 +17,14 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(QBIN_COMPILER_SOURCES src/main.cpp src/compiler.cpp + src/tools.cpp src/qasm_frontend.cpp ) set(QBIN_COMPILER_HEADERS include/qbin_compiler/compiler.hpp include/qbin_compiler/qasm_frontend.hpp + include/qbin_compiler/tools.hpp ) add_executable(qbin-compile ${QBIN_COMPILER_SOURCES} ${QBIN_COMPILER_HEADERS}) diff --git a/compiler/include/qbin_compiler/qasm_frontend.hpp b/compiler/include/qbin_compiler/qasm_frontend.hpp index 8a1f055..81f2ef3 100644 --- a/compiler/include/qbin_compiler/qasm_frontend.hpp +++ b/compiler/include/qbin_compiler/qasm_frontend.hpp @@ -1,6 +1,4 @@ -#ifndef QBIN_COMPILER_QASM_FRONTEND_HPP -#define QBIN_COMPILER_QASM_FRONTEND_HPP - +#pragma once #include #include #include @@ -9,56 +7,32 @@ namespace qbin_compiler { namespace frontend { - // Core opcode values (aligned with spec draft) - enum class Opcode : uint8_t { - X = 0x01, Y = 0x02, Z = 0x03, H = 0x04, - S = 0x05, SDG = 0x06, T = 0x07, TDG = 0x08, - SX = 0x09, SXDG = 0x0A, + enum class Op : uint8_t { + X = 0x01, Y = 0x02, Z = 0x03, H = 0x04, S = 0x05, SDG = 0x06, T = 0x07, TDG = 0x08, SX = 0x09, SXDG = 0x0A, RX = 0x0B, RY = 0x0C, RZ = 0x0D, PHASE = 0x0E, - U = 0x0F, - CX = 0x10, CZ = 0x11, ECR = 0x12, SWAP = 0x13, CSX = 0x14, - CRX = 0x15, CRY = 0x16, CRZ = 0x17, CU = 0x18, + CX = 0x10, CZ = 0x11, ECR = 0x12, SWAP = 0x13, CSX = 0x14, CRX = 0x15, CRY = 0x16, CRZ = 0x17, CU = 0x18, RXX = 0x20, RYY = 0x21, RZZ = 0x22, - MEASURE = 0x30, RESET = 0x31, BARRIER = 0x32, - DELAY = 0x38, FRAME = 0x39, - // Structured control (MVP subset) - IF_EQ = 0x81, IF_NEQ = 0x82, ENDIF = 0x8F, - CALLG = 0x40 + MEASURE = 0x30, RESET = 0x31, BARRIER = 0x32, DELAY = 0x38, FRAME = 0x39, CALLG = 0x40, + IF_EQ = 0x81, IF_NEQ = 0x82, ENDIF = 0x8F }; struct Instr { - Opcode op; - int a = -1; // qubit a - int b = -1; // qubit b - int c = -1; // qubit c (unused here) - - // Angle slot 0 - bool has_angle0 = false; - bool angle0_is_param = false; // reserved - float angle0 = 0.0f; - - // Aux 32-bit payload (e.g., classical bit index for MEASURE/IF) - bool has_aux = false; - uint32_t aux_u32 = 0; - - // Small immediate byte (e.g., IF compare value 0/1) - bool has_imm8 = false; - uint8_t imm8 = 0; + Op op; + int a = -1, b = -1, c = -1; // qubit indices + bool has_angle = false; float angle = 0.0f; // for single-angle ops (rx/ry/rz/phase) + bool has_aux = false; uint32_t aux = 0; // for measure bit index or IF bit index + bool has_imm8 = false; uint8_t imm8 = 0; // for IF compare constant }; struct Program { - std::vector instrs; + std::vector code; + int max_qubit = -1; + int max_bit = -1; }; - // Parse minimal OpenQASM subset used by the MVP compiler: - // - h/x/y/z/s/sdg/t/tdg/sx/sxdg q[i]; - // - rx/ry/rz/phase() q[i]; - // - cx/cz/swap q[i], q[j]; - // - c[k] = measure q[i]; - // - if (c[k] == 1) { ; } (also supports != 0/1) - Program parse_qasm_subset(std::string_view text, bool verbose); + // Auto-detects QASM 2.0 or 3.0 and parses into our canonical IR. + // Supports QASM2 'gate' definitions with parameter substitution and 'u' decomposition. + Program parse_qasm_subset(std::string_view text, bool verbose = false); } // namespace frontend } // namespace qbin_compiler - -#endif // QBIN_COMPILER_QASM_FRONTEND_HPP diff --git a/compiler/include/qbin_compiler/tools.hpp b/compiler/include/qbin_compiler/tools.hpp new file mode 100644 index 0000000..1ccff1a --- /dev/null +++ b/compiler/include/qbin_compiler/tools.hpp @@ -0,0 +1,27 @@ +#pragma once +#include +#include +#include +#include + +namespace qbin_compiler { namespace util { + +// String helpers +std::string to_lower_ascii(std::string s); +std::string trim(std::string_view sv); + +// Split a comma-separated list, respecting parentheses depth if requested. +std::vector split_commas(const std::string& s, bool respect_parens = true); + +// Find the index of the matching ')' given an index of '(' in s. +// Returns std::string::npos if not found. +std::size_t find_matching_paren(const std::string& s, std::size_t open_pos); + +// Evaluate a numeric expression with: numbers, pi, + - * /, parentheses. +// Uses double precision; caller can cast to float if needed. +double eval_expr(const std::string& expr); + +// Very small logger: prints to stderr if enabled == true. +void vlog(bool enabled, const std::string& msg); + +}} // namespace diff --git a/compiler/src/compiler.cpp b/compiler/src/compiler.cpp index 4a2392b..90d2c87 100644 --- a/compiler/src/compiler.cpp +++ b/compiler/src/compiler.cpp @@ -1,138 +1,170 @@ -#include "qbin_compiler/compiler.hpp" #include "qbin_compiler/qasm_frontend.hpp" +#include #include #include +#include +#include #include +#include #include -namespace qbin_compiler { +namespace { - static inline void push_u32_le(std::vector& out, uint32_t v) { - out.push_back(static_cast(v & 0xFF)); - out.push_back(static_cast((v >> 8) & 0xFF)); - out.push_back(static_cast((v >> 16) & 0xFF)); - out.push_back(static_cast((v >> 24) & 0xFF)); + // ---- little-endian writers ---- + inline void push_u8(std::vector& out, uint8_t v) { + out.push_back(v); } - static inline void push_bytes(std::vector& out, const void* data, size_t n) { - const auto* p = static_cast(data); - out.insert(out.end(), p, p + n); + inline void push_u16(std::vector& out, uint16_t v) { + out.push_back(static_cast(v & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); } - static inline void push_str(std::vector& out, const char* s) { - push_bytes(out, s, std::strlen(s)); + inline void push_u32(std::vector& out, uint32_t v) { + out.push_back(static_cast(v & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); + out.push_back(static_cast((v >> 16) & 0xFF)); + out.push_back(static_cast((v >> 24) & 0xFF)); } - // ULEB128 - static inline void push_uleb128(std::vector& out, uint64_t n) { - do { - uint8_t b = static_cast(n & 0x7Fu); - n >>= 7; - if (n != 0) b |= 0x80u; - out.push_back(b); - } while (n != 0); + inline void push_bytes(std::vector& out, const void* data, size_t len) { + const auto* p = static_cast(data); + out.insert(out.end(), p, p + len); } - // Float32 (IEEE-754) write helper - static inline void push_f32_le(std::vector& out, float f) { + inline void push_f32le(std::vector& out, float f) { static_assert(sizeof(float) == 4, "float must be 32-bit"); uint32_t u; - std::memcpy(&u, &f, sizeof(u)); - push_u32_le(out, u); + std::memcpy(&u, &f, 4); + push_u32(out, u); } - // CRC32C (Castagnoli), reflected poly 0x82F63B78 - static inline uint32_t crc32c(const uint8_t* data, size_t len) { - static uint32_t table[256]; - static bool init = false; - if (!init) { - const uint32_t poly = 0x82F63B78u; - for (uint32_t i = 0; i < 256; ++i) { - uint32_t c = i; - for (int k = 0; k < 8; ++k) { - c = (c & 1u) ? (c >> 1) ^ poly : (c >> 1); - } - table[i] = c; - } - init = true; - } - uint32_t crc = 0xFFFFFFFFu; - for (size_t i = 0; i < len; ++i) { - crc = (crc >> 8) ^ table[(crc ^ data[i]) & 0xFFu]; + + // Unsigned LEB128 + inline void push_uleb128(std::vector& out, uint64_t val) { + for (;;) { + uint8_t byte = static_cast(val & 0x7F); + val >>= 7; + if (val != 0) { byte |= 0x80; out.push_back(byte); } + else { out.push_back(byte); break; } } - return crc ^ 0xFFFFFFFFu; } - static inline void encode_inst_section(const frontend::Program& prog, std::vector& out) { - // INST magic - push_str(out, "INST"); - // instr_count - push_uleb128(out, static_cast(prog.instrs.size())); - // encode instructions - for (const auto& I : prog.instrs) { - out.push_back(static_cast(I.op)); + // ---- QBIN v1 fixed header (20 bytes) ---- + // Layout (little-endian): + // 0: 'Q''B''I''N' + // 4: version (u16) = 1 + // 6: flags (u16) = 0 + // 8: header_sz (u32) = 20 + // 12: section_count (u32) = 1 + // 16: reserved (u32) = 0 + inline void write_qbin_header(std::vector& out, + uint16_t version, + uint16_t flags, + uint32_t section_count) { + constexpr uint32_t kHeaderSize = 20; + out.push_back('Q'); out.push_back('B'); out.push_back('I'); out.push_back('N'); + push_u16(out, version); + push_u16(out, flags); + push_u32(out, kHeaderSize); + push_u32(out, section_count); + push_u32(out, 0u); // reserved + } + +} // namespace + + +namespace qbin_compiler { + + // ---- INST section payload encoder ---- + // Format: + // "INST" 4 bytes + // instr_count uleb128 + // repeated instr { + // opcode u8 + // mask u8 (bit0=a, bit1=b, bit2=c, bit3=angle, bit7=aux) + // a,b,c uleb128 each if present + // if angle: tag=0x00 u8, angle_f32_le + // if aux: aux_u32_le + // if opcode in {IF_EQ, IF_NEQ}: imm8 (one byte), after aux if aux present + // } + static void encode_inst_payload(const frontend::Program& prog, + std::vector& payload) { + using frontend::Op; + + payload.clear(); + payload.reserve(16 + prog.code.size() * 8); + + // Tag + payload.push_back('I'); payload.push_back('N'); payload.push_back('S'); payload.push_back('T'); + + // Count + push_uleb128(payload, static_cast(prog.code.size())); + + // Body + for (const auto& I : prog.code) { + const uint8_t opcode = static_cast(I.op); uint8_t mask = 0; - if (I.a >= 0) mask |= 1u << 0; - if (I.b >= 0) mask |= 1u << 1; - if (I.c >= 0) mask |= 1u << 2; - if (I.has_angle0) mask |= 1u << 3; - if (I.has_aux) mask |= 1u << 7; - out.push_back(mask); - if (I.a >= 0) push_uleb128(out, static_cast(I.a)); - if (I.b >= 0) push_uleb128(out, static_cast(I.b)); - if (I.c >= 0) push_uleb128(out, static_cast(I.c)); - if (I.has_angle0) { out.push_back(0); push_f32_le(out, I.angle0); } // tag 0 = f32 - if (I.has_aux) { push_u32_le(out, I.aux_u32); } - // IF_* carry an extra imm8 after operands - uint8_t opc = static_cast(I.op); - if (opc == 0x81 || opc == 0x82) { - out.push_back(I.has_imm8 ? I.imm8 : 0); + if (I.a >= 0) mask |= 0x01; + if (I.b >= 0) mask |= 0x02; + if (I.c >= 0) mask |= 0x04; + if (I.has_angle) mask |= 0x08; + if (I.has_aux) mask |= 0x80; + + push_u8(payload, opcode); + push_u8(payload, mask); + + if (mask & 0x01) push_uleb128(payload, static_cast(I.a)); + if (mask & 0x02) push_uleb128(payload, static_cast(I.b)); + if (mask & 0x04) push_uleb128(payload, static_cast(I.c)); + + if (mask & 0x08) { + // angle tag 0x00 = f32 + push_u8(payload, 0x00); + push_f32le(payload, I.angle); + } + + if (mask & 0x80) { + push_u32(payload, I.aux); + } + + if (opcode == static_cast(Op::IF_EQ) || + opcode == static_cast(Op::IF_NEQ)) { + push_u8(payload, I.has_imm8 ? I.imm8 : 0); } } } - static inline std::vector encode_qbin_min(const frontend::Program& prog) { - // Build INST payload - std::vector inst; - encode_inst_section(prog, inst); - - // Layout - const uint32_t header_size = 24; - const uint32_t section_count = 1; - const uint32_t section_table_offset = header_size; - const uint32_t section_table_size = section_count * 16; // one entry - - // Header without CRC - std::vector header; - push_str(header, "QBIN"); // 0x00 - header.push_back(1); // major - header.push_back(0); // minor - header.push_back(0); // flags (LE, no table hash) - header.push_back(static_cast(header_size)); // header size - push_u32_le(header, section_count); // count - push_u32_le(header, section_table_offset); - push_u32_le(header, section_table_size); - // CRC32C over 0x00..0x13 - uint32_t crc = crc32c(header.data(), header.size()); - push_u32_le(header, crc); - - // Section table entry for INST - std::vector table; - uint32_t inst_id = 0; std::memcpy(&inst_id, "INST", 4); - push_u32_le(table, inst_id); - push_u32_le(table, section_table_offset + section_table_size); // offset 40 - push_u32_le(table, static_cast(inst.size())); - push_u32_le(table, 0); // flags - - // Assemble file - std::vector blob; - blob.reserve(header.size() + table.size() + inst.size()); - blob.insert(blob.end(), header.begin(), header.end()); - blob.insert(blob.end(), table.begin(), table.end()); - blob.insert(blob.end(), inst.begin(), inst.end()); - - return blob; + // ---- public API used by main.cpp ---- + + std::vector compile_qasm_to_qbin_min(const std::string& qasm_text, bool verbose) { + // 1) QASM -> IR + auto prog = frontend::parse_qasm_subset(qasm_text, verbose); + + // 2) Build file + std::vector out; + out.reserve(64); + + // Header (v1, flags=0, one section) + write_qbin_header(out, /*version*/ 1, /*flags*/ 0, /*section_count*/ 1); + + // INST payload + std::vector inst_payload; + encode_inst_payload(prog, inst_payload); + out.insert(out.end(), inst_payload.begin(), inst_payload.end()); + + return out; } - std::vector compile_qasm_to_qbin_min(const std::string& qasm_text, bool verbose) { - frontend::Program prog = frontend::parse_qasm_subset(qasm_text, verbose); - return encode_qbin_min(prog); + int compile_file_to_file(const std::string& in_path, const std::string& out_path, bool verbose) { + std::ifstream ifs(in_path, std::ios::binary); + if (!ifs) return 1; + std::string content((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + + auto bytes = compile_qasm_to_qbin_min(content, verbose); + + std::ofstream ofs(out_path, std::ios::binary); + if (!ofs) return 2; + ofs.write(reinterpret_cast(bytes.data()), + static_cast(bytes.size())); + if (!ofs) return 3; + return 0; } } // namespace qbin_compiler diff --git a/compiler/src/qasm_frontend.cpp b/compiler/src/qasm_frontend.cpp index ee1169b..08635a8 100644 --- a/compiler/src/qasm_frontend.cpp +++ b/compiler/src/qasm_frontend.cpp @@ -1,271 +1,572 @@ #include "qbin_compiler/qasm_frontend.hpp" +#include "qbin_compiler/tools.hpp" -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include + +using namespace std; +using qbin_compiler::util::to_lower_ascii; +using qbin_compiler::util::trim; +using qbin_compiler::util::split_commas; +using qbin_compiler::util::find_matching_paren; +using qbin_compiler::util::eval_expr; +using qbin_compiler::util::vlog; namespace qbin_compiler { namespace frontend { - static inline std::string trim_copy(std::string s) { - auto is_space = [](unsigned char ch) { return std::isspace(ch) != 0; }; - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [&](char c) { return !is_space((unsigned char)c); })); - s.erase(std::find_if(s.rbegin(), s.rend(), [&](char c) { return !is_space((unsigned char)c); }).base(), s.end()); - return s; + // ---------- IR emit helpers ---------- + static inline void emit_1q(vector& out, Op op, int a) { + Instr i{}; i.op = op; i.a = a; out.push_back(i); + } + static inline void emit_2q(vector& out, Op op, int a, int b) { + Instr i{}; i.op = op; i.a = a; i.b = b; out.push_back(i); + } + static inline void emit_angle(vector& out, Op op, int a, double ang) { + Instr i{}; i.op = op; i.a = a; i.has_angle = true; i.angle = float(ang); out.push_back(i); + } + static inline void emit_measure(vector& out, int q, int c) { + Instr i{}; i.op = Op::MEASURE; i.a = q; i.has_aux = true; i.aux = (uint32_t)c; out.push_back(i); } - static bool parse_qubit_index(const std::string& tok, int& idx) { - // expects q[] - if (tok.size() < 4) return false; - if (tok[0] != 'q') return false; - size_t lb = tok.find('['); - size_t rb = tok.find(']'); - if (lb == std::string::npos || rb == std::string::npos || rb <= lb + 1) return false; - std::string inside = tok.substr(lb + 1, rb - lb - 1); - try { idx = std::stoi(inside); return true; } - catch (...) { return false; } + // ---------- IF matcher ---------- + // Matches: if (CREG[idx] == imm) { ; } + // if (CREG[idx] != imm) { ; } + // Semicolon after '}' is optional. + static bool match_if_one_stmt(const std::string& line, + std::string& creg_name, + int& cidx, + bool& is_eq, + int& imm, + std::string& body_stmt) + { + static const std::regex re( + R"(^\s*if\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\[(\d+)\]\s*(==|!=)\s*([0-9]+)\s*\)\s*\{\s*(.*?)\s*\}\s*;?\s*$)", + std::regex::icase); + + std::smatch m; + if (!std::regex_match(line, m, re)) return false; + creg_name = to_lower_ascii(m[1].str()); + cidx = std::stoi(m[2].str()); + is_eq = (m[3].str() == "=="); + imm = std::stoi(m[4].str()); + body_stmt = m[5].str(); + return true; } - static bool parse_bit_index(const std::string& tok, int& idx) { - // expects c[] - if (tok.size() < 4) return false; - if (tok[0] != 'c') return false; - size_t lb = tok.find('['); - size_t rb = tok.find(']'); - if (lb == std::string::npos || rb == std::string::npos || rb <= lb + 1) return false; - std::string inside = tok.substr(lb + 1, rb - lb - 1); - try { idx = std::stoi(inside); return true; } - catch (...) { return false; } + // ---------- QASM2 gate definitions ---------- + struct GateDef { + string name; // lower-case + vector qformals; // e.g., q0, q1 + vector pformals; // e.g., theta, phi, lambda + vector body; // statements without trailing ';' + }; + + static string substitute_idents(const string& s, const unordered_map& subs) { + string out; out.reserve(s.size()); + for (size_t i = 0; i < s.size();) { + unsigned char ch = static_cast(s[i]); + if (std::isalpha(ch) || s[i] == '_') { + size_t j = i + 1; + while (j < s.size()) { + unsigned char cj = static_cast(s[j]); + if (std::isalnum(cj) || s[j] == '_' || s[j] == '[' || s[j] == ']') ++j; + else break; + } + string tok = s.substr(i, j - i); + string key = to_lower_ascii(tok); + auto it = subs.find(key); + out += (it != subs.end()) ? it->second : tok; + i = j; + } + else { + out += s[i++]; + } + } + return out; } - static bool parse_angle_from(const std::string& t, float& val) { - size_t lp = t.find('('); - size_t rp = t.find(')'); - if (lp == std::string::npos || rp == std::string::npos || rp <= lp + 1) return false; - std::string a = t.substr(lp + 1, rp - lp - 1); - try { val = std::stof(a); return true; } - catch (...) { return false; } + // Expand one statement into canonical primitives, logging along the way. + static void expand_stmt_recursive(const string& stmt_in, + const unordered_map& subs, + const map& gates, + vector& out_stmts, + bool verbose) + { + string s = trim(stmt_in); + if (s.empty()) return; + + s = substitute_idents(s, subs); + string sl = to_lower_ascii(s); + + // ignore barrier/reset + if (sl.rfind("barrier", 0) == 0) { vlog(verbose, "skip barrier"); return; } + if (sl.rfind("reset", 0) == 0) { vlog(verbose, "skip reset"); return; } + + // U(theta,phi,lambda) q; + { + static regex reU(R"(^u\s*\(\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^)]+)\)\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, reU)) { + double th = eval_expr(m[1].str()); + double ph = eval_expr(m[2].str()); + double la = eval_expr(m[3].str()); + string q = m[4].str(); + out_stmts.push_back("rz(" + to_string(ph) + ") " + q + ";"); + out_stmts.push_back("ry(" + to_string(th) + ") " + q + ";"); + out_stmts.push_back("rz(" + to_string(la) + ") " + q + ";"); + vlog(verbose, "expand U(...) on " + q + " -> rz,ry,rz"); + return; + } + } + + // cx a,b; + { + static regex recx(R"(^cx\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*,\s*([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, recx)) { + out_stmts.push_back("cx " + m[1].str() + ", " + m[2].str() + ";"); + vlog(verbose, "emit cx " + m[1].str() + "," + m[2].str()); + return; + } + } + + // 1q with angle: rz/ry/rx/phase + { + static regex reang(R"(^\s*(rz|ry|rx|phase)\s*\(\s*([^)]+)\)\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, reang)) { + out_stmts.push_back(m[1].str() + "(" + m[2].str() + ") " + m[3].str() + ";"); + vlog(verbose, "emit angle1 " + to_lower_ascii(m[1].str()) + " " + m[3].str()); + return; + } + } + + // 1q no-angle + { + static regex re1q(R"(^\s*(x|y|z|h|s|sdg|t|tdg|sx|sxdg)\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, re1q)) { + out_stmts.push_back(to_lower_ascii(m[1].str()) + " " + m[2].str() + ";"); + vlog(verbose, "emit 1q " + to_lower_ascii(m[1].str()) + " " + m[2].str()); + return; + } + } + + // measure arrow or assignment + { + static regex rem1(R"(^\s*measure\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*->\s*([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + static regex rem2(R"(^\s*([A-Za-z_][A-Za-z0-9_\[\]]*)\s*=\s*measure\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, rem1)) { out_stmts.push_back(m[2].str() + " = measure " + m[1].str() + ";"); vlog(verbose, "emit measure (arrow)"); return; } + if (regex_match(s, m, rem2)) { out_stmts.push_back(m[1].str() + " = measure " + m[2].str() + ";"); vlog(verbose, "emit measure (assign)"); return; } + } + + // Robust nested gate call: NAME[(params)] qargs; + { + static regex re_name_only(R"(^\s*([A-Za-z_][A-Za-z0-9_]*)\s*(.*?);?\s*$)"); + smatch m; + if (regex_match(s, m, re_name_only)) { + string name = to_lower_ascii(trim(m[1].str())); + auto it = gates.find(name); + if (it != gates.end()) { + string rest = trim(m[2].str()); + + // Extract "( ... )" at start if present + string param_str, qubits_str = rest; + if (!rest.empty() && rest[0] == '(') { + size_t close = find_matching_paren(rest, 0); + if (close != string::npos) { + param_str = rest.substr(1, close - 1); + qubits_str = trim(rest.substr(close + 1)); + } + } + + // Split qubits by comma + vector qargs = split_commas(qubits_str, /*respect_parens*/true); + + // Build substitutions + unordered_map subs2 = subs; + + // Map params if gate has formals + if (!it->second.pformals.empty() && !param_str.empty()) { + vector pvals = split_commas(param_str, /*respect_parens*/true); + for (size_t k = 0; k < it->second.pformals.size() && k < pvals.size(); ++k) { + subs2[to_lower_ascii(it->second.pformals[k])] = pvals[k]; + } + } + + // Map qubit formals + for (size_t i2 = 0; i2 < it->second.qformals.size() && i2 < qargs.size(); ++i2) { + subs2[to_lower_ascii(it->second.qformals[i2])] = qargs[i2]; + } + + vlog(verbose, string("expand call: ") + name + + " p=" + to_string(it->second.pformals.size()) + + " q=" + to_string(it->second.qformals.size()) + + " with args q=" + to_string(qargs.size())); + + // Recurse into body + for (const auto& st : it->second.body) { + expand_stmt_recursive(st, subs2, gates, out_stmts, verbose); + } + return; + } + else { + vlog(verbose, string("unknown gate call: ") + name + " (no def)"); + } + } + } + + // Pass-through (keep as-is, ensure it ends with ';') + out_stmts.push_back(s.back() == ';' ? s : s + ";"); + vlog(verbose, "pass-through stmt"); } + // ---------- main parser ---------- Program parse_qasm_subset(std::string_view text, bool verbose) { - Program P; - std::istringstream iss{ std::string(text) }; - std::string line; - size_t lineno = 0; - while (std::getline(iss, line)) { - ++lineno; - std::string s = trim_copy(line); - if (s.empty() || s[0] == '/' || s[0] == '#') continue; - - // Keep a lowercase copy for quick checks - std::string lower = s; - std::transform(lower.begin(), lower.end(), lower.begin(), [](unsigned char c) { return std::tolower(c); }); - - auto warn_skip = [&](const char* reason) { - if (verbose) std::fprintf(stderr, "[skip line %zu] %s: %s\n", lineno, reason, s.c_str()); - }; - - // Ignore declarations (we infer sizes) - if (lower.rfind("openqasm", 0) == 0) continue; - if (lower.rfind("include", 0) == 0) continue; - if (lower.rfind("qubit", 0) == 0) continue; - if (lower.rfind("bit", 0) == 0) continue; - - // MEASURE: c[k] = measure q[i]; + string src(text); + + // Normalize lines and strip // comments. + vector raw_lines; + raw_lines.reserve(src.size() / 16 + 8); + { + string cur; cur.reserve(256); + for (size_t i = 0; i < src.size(); ++i) { + char c = src[i]; + if (c == '\r') continue; + if (c == '/' && i + 1 < src.size() && src[i + 1] == '/') { + while (i < src.size() && src[i] != '\n') ++i; + } + if (i < src.size() && src[i] == '\n') { raw_lines.push_back(cur); cur.clear(); } + else if (i < src.size()) { cur.push_back(src[i]); } + } + raw_lines.push_back(cur); + } + + // Reg tables + struct Reg { int offset = 0; int size = 0; }; + map qregs, cregs; + int q_total = 0, c_total = 0; + + // Gate defs + map gates; + + // First pass: collect regs, gate defs, and keep non-definition lines + vector nondef_lines; + for (size_t li = 0; li < raw_lines.size(); ++li) { + string line = trim(raw_lines[li]); + if (line.empty()) continue; + string ll = to_lower_ascii(line); + + if (ll.rfind("openqasm", 0) == 0) { vlog(verbose, "header: " + line); continue; } + if (ll.rfind("include", 0) == 0) { vlog(verbose, "include: " + line); continue; } + + // qreg / creg { - // robust check for pattern - size_t eq = lower.find('='); - if (lower.rfind("c[", 0) == 0 && eq != std::string::npos && lower.find("measure", eq) != std::string::npos) { - std::string lhs = trim_copy(s.substr(0, eq)); - std::string rhs = trim_copy(s.substr(eq + 1)); - int bit_idx = -1, q_idx = -1; - if (!parse_bit_index(lhs, bit_idx)) { warn_skip("bad bit index on measure LHS"); goto after_measure; } - // rhs expected: "measure q[i];" (semicolon optional at this point) - // Normalize spaces and commas - for (char& c : rhs) if (c == ',') c = ' '; - std::istringstream rs(rhs); - std::string tok0, tok1; - if (!(rs >> tok0 >> tok1)) { warn_skip("measure RHS too short"); goto after_measure; } - std::string t0l = tok0; std::transform(t0l.begin(), t0l.end(), t0l.begin(), ::tolower); - if (t0l != "measure") { warn_skip("RHS missing 'measure'"); goto after_measure; } - if (!parse_qubit_index(tok1, q_idx)) { warn_skip("bad qubit on measure RHS"); goto after_measure; } - // Emit - Instr I{}; - I.op = Opcode::MEASURE; - I.a = q_idx; - I.has_aux = true; - I.aux_u32 = static_cast(bit_idx); - P.instrs.push_back(I); + static regex req(R"(^qreg\s+([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\s*;?$)", regex::icase); + smatch m; + if (regex_match(line, m, req)) { + string name = to_lower_ascii(m[1].str()); + int n = stoi(m[2].str()); + qregs[name] = Reg{ q_total, n }; q_total += n; + vlog(verbose, "qreg " + name + "[" + to_string(n) + "] -> offset " + to_string(qregs[name].offset)); continue; } } - after_measure:; - - // IF single-line: if (c[k] == 1) { ; } - if (lower.rfind("if", 0) == 0) { - size_t lp = lower.find('('); - size_t rp = lower.find(')'); - size_t lb = lower.find('{'); - size_t rb = lower.rfind('}'); - if (lp == std::string::npos || rp == std::string::npos || lb == std::string::npos || rb == std::string::npos || rb < lb) { - warn_skip("unsupported if format"); + { + static regex rec(R"(^creg\s+([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\s*;?$)", regex::icase); + smatch m; + if (regex_match(line, m, rec)) { + string name = to_lower_ascii(m[1].str()); + int n = stoi(m[2].str()); + cregs[name] = Reg{ c_total, n }; c_total += n; + vlog(verbose, "creg " + name + "[" + to_string(n) + "] -> offset " + to_string(cregs[name].offset)); continue; } - std::string cond = trim_copy(lower.substr(lp + 1, rp - lp - 1)); // e.g. c[1] == 1 - // parse lhs op rhs - bool is_eq = true; - size_t pos_eq = cond.find("=="); - size_t pos_neq = cond.find("!="); - if (pos_eq == std::string::npos && pos_neq == std::string::npos) { warn_skip("if condition missing ==/!="); continue; } - size_t pos_op = (pos_eq != std::string::npos) ? pos_eq : pos_neq; - if (pos_neq != std::string::npos) is_eq = false; - std::string lhs = trim_copy(cond.substr(0, pos_op)); - std::string rhs = trim_copy(cond.substr(pos_op + 2)); - int bit_idx = -1; - int val = 0; - if (!parse_bit_index(lhs, bit_idx)) { warn_skip("bad c[k] in if"); continue; } - try { val = std::stoi(rhs); } - catch (...) { warn_skip("bad compare value in if"); continue; } - // Emit IF - Instr IF{}; - IF.op = is_eq ? Opcode::IF_EQ : Opcode::IF_NEQ; - IF.has_aux = true; IF.aux_u32 = static_cast(bit_idx); - IF.has_imm8 = true; IF.imm8 = static_cast(val & 0xFF); - P.instrs.push_back(IF); - - // Body inside braces - std::string body = trim_copy(s.substr(lb + 1, rb - lb - 1)); - if (!body.empty() && body.back() == ';') body.pop_back(); - for (char& c : body) if (c == ',') c = ' '; - std::istringstream ts(body); - std::string tok0; - if (!(ts >> tok0)) { - // empty body, just ENDIF - Instr End{}; End.op = Opcode::ENDIF; P.instrs.push_back(End); - continue; + } + + // Gate definition: handle braces on same line and across lines + if (ll.rfind("gate ", 0) == 0) { + string header = line; + while (header.find('{') == string::npos && li + 1 < raw_lines.size()) { + header += " " + trim(raw_lines[++li]); } - float ang = 0.0f; - bool has_ang = parse_angle_from(tok0, ang); - std::string gate = tok0; - if (has_ang) gate = tok0.substr(0, tok0.find('(')); - - if (gate == "cx" || gate == "cz" || gate == "swap") { - std::string qa, qb; - if (!(ts >> qa >> qb)) { warn_skip("if-body expects two qubits"); } - else { - int ia = -1, ib = -1; - if (!parse_qubit_index(qa, ia) || !parse_qubit_index(qb, ib)) { warn_skip("if-body bad qubits"); } - else { - Instr I{}; - if (gate == "cx") I.op = Opcode::CX; - else if (gate == "cz") I.op = Opcode::CZ; - else I.op = Opcode::SWAP; - I.a = ia; I.b = ib; - P.instrs.push_back(I); - } + size_t brace_pos = header.find('{'); + if (brace_pos == string::npos) continue; + + // Parse head + static regex rehead(R"(^gate\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\(([^)]*)\))?\s+(.+)$)", regex::icase); + smatch mh; + string head_part = header.substr(0, brace_pos); + if (!regex_match(head_part, mh, rehead)) continue; + + GateDef gd; + gd.name = to_lower_ascii(trim(mh[1].str())); + string params = mh[3].str(); + string qargs = trim(mh[4].str()); + + if (!params.empty()) { + for (auto& t : split_commas(params, /*respect_parens*/false)) gd.pformals.push_back(t); + } + if (!qargs.empty()) { + for (auto& t : split_commas(qargs, /*respect_parens*/false)) gd.qformals.push_back(t); + } + + // Collect body from remainder of header line and following lines + string body; + int depth = 1; + for (size_t k = brace_pos + 1; k < header.size(); ++k) { + char ch = header[k]; + if (ch == '{') { ++depth; continue; } + if (ch == '}') { --depth; if (depth == 0) break; else continue; } + body.push_back(ch); + } + while (depth > 0 && li + 1 < raw_lines.size()) { + string nxt = raw_lines[++li]; + for (size_t k = 0; k < nxt.size(); ++k) { + char ch = nxt[k]; + if (ch == '{') { ++depth; continue; } + if (ch == '}') { --depth; if (depth == 0) { continue; } } + if (depth >= 1) body.push_back(ch); } + if (depth >= 1) body.push_back('\n'); } - else if (gate == "h" || gate == "x" || gate == "y" || gate == "z" || - gate == "s" || gate == "sdg" || gate == "t" || gate == "tdg" || - gate == "sx" || gate == "sxdg" || gate == "rx" || gate == "ry" || - gate == "rz" || gate == "phase") { - std::string qa; - if (!(ts >> qa)) { warn_skip("if-body expects one qubit"); } - else { - int ia = -1; - if (!parse_qubit_index(qa, ia)) { warn_skip("if-body bad qubit"); } - else { - Instr I{}; - if (gate == "h") I.op = Opcode::H; - else if (gate == "x") I.op = Opcode::X; - else if (gate == "y") I.op = Opcode::Y; - else if (gate == "z") I.op = Opcode::Z; - else if (gate == "s") I.op = Opcode::S; - else if (gate == "sdg") I.op = Opcode::SDG; - else if (gate == "t") I.op = Opcode::T; - else if (gate == "tdg") I.op = Opcode::TDG; - else if (gate == "sx") I.op = Opcode::SX; - else if (gate == "sxdg") I.op = Opcode::SXDG; - else if (gate == "rx") { I.op = Opcode::RX; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "ry") { I.op = Opcode::RY; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "rz") { I.op = Opcode::RZ; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "phase") { I.op = Opcode::PHASE; I.has_angle0 = has_ang; I.angle0 = ang; } - I.a = ia; - P.instrs.push_back(I); + + // Split body by ';' respecting parentheses + { + size_t p = 0, last = 0; int depthP = 0; + while (p <= body.size()) { + bool at_end = (p == body.size()); + char ch = at_end ? '\0' : body[p]; + if (!at_end && ch == '(') ++depthP; + else if (!at_end && ch == ')') --depthP; + if (at_end || (ch == ';' && depthP == 0)) { + string t = trim(string_view(body).substr(last, p - last)); + if (!t.empty()) gd.body.push_back(t); + last = p + 1; } + ++p; } } - else { - warn_skip("unsupported if-body statement"); - } - Instr End{}; End.op = Opcode::ENDIF; P.instrs.push_back(End); + vlog(verbose, "gate def: " + gd.name + + " p=" + to_string(gd.pformals.size()) + + " q=" + to_string(gd.qformals.size()) + + " stmts=" + to_string(gd.body.size())); + gates[gd.name] = std::move(gd); continue; } - // Regular statements - // Remove trailing ';' then normalize commas to spaces - if (!s.empty() && s.back() == ';') s.pop_back(); - for (char& c : s) if (c == ',') c = ' '; - std::istringstream ts(s); - std::string tok0; - if (!(ts >> tok0)) { warn_skip("empty"); continue; } - - float ang = 0.0f; - bool has_ang = parse_angle_from(tok0, ang); - std::string gate = tok0; - if (has_ang) gate = tok0.substr(0, tok0.find('(')); - - // Two-qubit without angles - if (gate == "cx" || gate == "cz" || gate == "swap") { - std::string qa, qb; - if (!(ts >> qa >> qb)) { warn_skip("expected two qubits"); continue; } - int ia = -1, ib = -1; - if (!parse_qubit_index(qa, ia) || !parse_qubit_index(qb, ib)) { warn_skip("bad qubit index"); continue; } - Instr I{}; - if (gate == "cx") I.op = Opcode::CX; - else if (gate == "cz") I.op = Opcode::CZ; - else I.op = Opcode::SWAP; - I.a = ia; I.b = ib; - P.instrs.push_back(I); - continue; + nondef_lines.push_back(line); + } + + Program prog; + prog.max_qubit = q_total - 1; + prog.max_bit = c_total - 1; + + auto resolve_qubit = [&](const string& token)->int { + static regex r(R"(^([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]$)"); + smatch m; + if (!regex_match(token, m, r)) return -1; + string reg = to_lower_ascii(m[1].str()); + int idx = stoi(m[2].str()); + auto it = qregs.find(reg); if (it == qregs.end()) return -1; + return it->second.offset + idx; + }; + auto resolve_bit = [&](const string& token)->int { + static regex r(R"(^([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]$)"); + smatch m; + if (!regex_match(token, m, r)) return -1; + string reg = to_lower_ascii(m[1].str()); + int idx = stoi(m[2].str()); + auto it = cregs.find(reg); if (it == cregs.end()) return -1; + return it->second.offset + idx; + }; + + // Second pass: expand and emit + vector canonical; + canonical.reserve(nondef_lines.size() * 2); + + // Small helper to emit a single canonical statement string directly to IR. + auto emit_from_stmt = [&](const std::string& st)->bool { + smatch m; + // measure + { + static regex r(R"(^([A-Za-z_][A-Za-z0-9_\[\]]*)\s*=\s*measure\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + if (regex_match(st, m, r)) { + int q = resolve_qubit(m[2].str()); + int c = resolve_bit(m[1].str()); + if (q >= 0 && c >= 0) { emit_measure(prog.code, q, c); return true; } + vlog(verbose, "measure resolve failed: " + st); + return true; // consumed + } + } + // cx + { + static regex r(R"(^cx\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*,\s*([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + if (regex_match(st, m, r)) { + int a = resolve_qubit(m[1].str()); + int b = resolve_qubit(m[2].str()); + if (a >= 0 && b >= 0) { emit_2q(prog.code, Op::CX, a, b); return true; } + vlog(verbose, "cx resolve failed: " + st); + return true; + } + } + // 1q with angle + { + static regex r(R"(^\s*(rz|ry|rx|phase)\s*\(\s*([^)]+)\)\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + if (regex_match(st, m, r)) { + double ang = eval_expr(m[2].str()); + int a = resolve_qubit(m[3].str()); + if (a >= 0) { + string g = to_lower_ascii(m[1].str()); + Op op = Op::RZ; + if (g == "rz") op = Op::RZ; + else if (g == "ry") op = Op::RY; + else if (g == "rx") op = Op::RX; + else op = Op::PHASE; + emit_angle(prog.code, op, a, ang); + } + else { + vlog(verbose, "1q angle resolve failed: " + st); + } + return true; + } + } + // 1q no-angle + { + static regex r(R"(^\s*(x|y|z|h|s|sdg|t|tdg|sx|sxdg)\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + if (regex_match(st, m, r)) { + int a = resolve_qubit(m[2].str()); + if (a >= 0) { + string g = to_lower_ascii(m[1].str()); + Op op = Op::X; + if (g == "x") op = Op::X; + else if (g == "y") op = Op::Y; + else if (g == "z") op = Op::Z; + else if (g == "h") op = Op::H; + else if (g == "s") op = Op::S; + else if (g == "sdg") op = Op::SDG; + else if (g == "t") op = Op::T; + else if (g == "tdg") op = Op::TDG; + else if (g == "sx") op = Op::SX; + else if (g == "sxdg")op = Op::SXDG; + emit_1q(prog.code, op, a); + } + else { + vlog(verbose, "1q resolve failed: " + st); + } + return true; + } + } + // ignore barrier/reset + { + static regex rb(R"(^\s*(barrier|reset)\b)", regex::icase); + if (regex_search(st, rb)) return true; } + return false; // not recognized; caller may log "ignored" + }; - // One-qubit gates - if (gate == "h" || gate == "x" || gate == "y" || gate == "z" || - gate == "s" || gate == "sdg" || gate == "t" || gate == "tdg" || - gate == "sx" || gate == "sxdg" || gate == "rx" || gate == "ry" || - gate == "rz" || gate == "phase") { - std::string qa; - if (!(ts >> qa)) { warn_skip("expected one qubit"); continue; } - int ia = -1; - if (!parse_qubit_index(qa, ia)) { warn_skip("bad qubit index"); continue; } - Instr I{}; - if (gate == "h") I.op = Opcode::H; - else if (gate == "x") I.op = Opcode::X; - else if (gate == "y") I.op = Opcode::Y; - else if (gate == "z") I.op = Opcode::Z; - else if (gate == "s") I.op = Opcode::S; - else if (gate == "sdg") I.op = Opcode::SDG; - else if (gate == "t") I.op = Opcode::T; - else if (gate == "tdg") I.op = Opcode::TDG; - else if (gate == "sx") I.op = Opcode::SX; - else if (gate == "sxdg") I.op = Opcode::SXDG; - else if (gate == "rx") { I.op = Opcode::RX; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "ry") { I.op = Opcode::RY; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "rz") { I.op = Opcode::RZ; I.has_angle0 = has_ang; I.angle0 = ang; } - else if (gate == "phase") { I.op = Opcode::PHASE; I.has_angle0 = has_ang; I.angle0 = ang; } - I.a = ia; - P.instrs.push_back(I); - continue; + for (const auto& line : nondef_lines) { + string s = trim(line); + if (s.empty()) continue; + + // QASM 3 style decls: qubit[N] name; bit[M] name; + { + static regex rq(R"(^qubit\s*\[(\d+)\]\s*([A-Za-z_][A-Za-z0-9_]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, rq)) { + string name = to_lower_ascii(m[2].str()); int n = stoi(m[1].str()); + if (!qregs.count(name)) { qregs[name] = { q_total, n }; q_total += n; prog.max_qubit = q_total - 1; } + vlog(verbose, "qubit decl: " + name + "[" + to_string(n) + "]"); + continue; + } } + { + static regex rb(R"(^bit\s*\[(\d+)\]\s*([A-Za-z_][A-Za-z0-9_]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, rb)) { + string name = to_lower_ascii(m[2].str()); int n = stoi(m[1].str()); + if (!cregs.count(name)) { cregs[name] = { c_total, n }; c_total += n; prog.max_bit = c_total - 1; } + vlog(verbose, "bit decl: " + name + "[" + to_string(n) + "]"); + continue; + } + } + + // measure arrow or assignment to canonical form + { + static regex rm1(R"(^measure\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*->\s*([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + static regex rm2(R"(^([A-Za-z_][A-Za-z0-9_\[\]]*)\s*=\s*measure\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s*;?$)", regex::icase); + smatch m; + if (regex_match(s, m, rm1)) { canonical.push_back(m[2].str() + " = measure " + m[1].str() + ";"); vlog(verbose, "measure arrow -> canonical"); continue; } + if (regex_match(s, m, rm2)) { canonical.push_back(m[1].str() + " = measure " + m[2].str() + ";"); vlog(verbose, "measure assign canonical"); continue; } + } + - // Unsupported - warn_skip("unsupported"); + + // Expand everything else to canonical primitives + vector expanded; + expand_stmt_recursive(s, unordered_map{}, gates, expanded, verbose); + if (expanded.empty()) vlog(verbose, "expansion produced 0 statements for: " + (s.size() > 64 ? s.substr(0, 64) : s)); + canonical.insert(canonical.end(), expanded.begin(), expanded.end()); + } + + vlog(verbose, "canonical statements: " + to_string(canonical.size())); + + // Emit IR for canonical (non-IF) statements + for (const auto& st : canonical) { + if (emit_from_stmt(st)) continue; + + // --- IF handling here to preserve order --- +{ + std::string creg_name, body; + int cidx = -1, imm = 0; bool is_eq = true; + if (match_if_one_stmt(st, creg_name, cidx, is_eq, imm, body)) { + int c_abs = resolve_bit(creg_name + "[" + std::to_string(cidx) + "]"); + if (c_abs < 0) { + vlog(verbose, "IF cbit resolve failed: " + creg_name + "[" + std::to_string(cidx) + "]"); + continue; + } + + // Emit IF opcode + Instr ifi{}; + ifi.op = is_eq ? Op::IF_EQ : Op::IF_NEQ; + ifi.has_aux = true; ifi.aux = static_cast(c_abs); + ifi.has_imm8 = true; ifi.imm8 = static_cast(imm); + prog.code.push_back(ifi); + + // Expand IF body and emit + vector expanded; + expand_stmt_recursive(body, unordered_map{}, gates, expanded, verbose); + for (const auto& st2 : expanded) { + if (!emit_from_stmt(st2)) { + vlog(verbose, "ignored stmt in IF body: " + (st2.size() > 64 ? st2.substr(0, 64) : st2)); } - return P; + } + + // ENDIF + Instr endi{}; endi.op = Op::ENDIF; + prog.code.push_back(endi); + continue; + } +} + + // ignore barrier/reset + { + static regex rb(R"(^\s*(barrier|reset)\b)", regex::icase); + if (regex_search(st, rb)) continue; + } + vlog(verbose, "ignored stmt: " + (st.size() > 64 ? st.substr(0, 64) : st)); + } + + vlog(verbose, "emitted IR instructions: " + to_string(prog.code.size())); + return prog; } } // namespace frontend diff --git a/compiler/src/tools.cpp b/compiler/src/tools.cpp new file mode 100644 index 0000000..c210acf --- /dev/null +++ b/compiler/src/tools.cpp @@ -0,0 +1,150 @@ +#include "qbin_compiler/tools.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace qbin_compiler { + namespace util { + + std::string to_lower_ascii(std::string s) { + for (auto& ch : s) ch = char(std::tolower(static_cast(ch))); + return s; + } + + std::string trim(std::string_view sv) { + std::size_t i = 0, j = sv.size(); + while (i < j && std::isspace(static_cast(sv[i]))) ++i; + while (j > i && std::isspace(static_cast(sv[j - 1]))) --j; + return std::string(sv.substr(i, j - i)); + } + + std::vector split_commas(const std::string& s, bool respect_parens) { + std::vector out; + std::size_t last = 0; + int depth = 0; + for (std::size_t i = 0; i <= s.size(); ++i) { + bool at_end = (i == s.size()); + char ch = at_end ? '\0' : s[i]; + if (respect_parens) { + if (!at_end && ch == '(') { ++depth; continue; } + if (!at_end && ch == ')') { --depth; continue; } + } + if (at_end || (ch == ',' && depth == 0)) { + std::string t = trim(std::string_view(s).substr(last, i - last)); + if (!t.empty()) out.push_back(t); + last = i + 1; + } + } + return out; + } + + std::size_t find_matching_paren(const std::string& s, std::size_t open_pos) { + if (open_pos >= s.size() || s[open_pos] != '(') return std::string::npos; + int depth = 1; + for (std::size_t i = open_pos + 1; i < s.size(); ++i) { + if (s[i] == '(') ++depth; + else if (s[i] == ')') { + --depth; + if (depth == 0) return i; + } + } + return std::string::npos; + } + + // ----- simple expression parser for angles ----- + struct ExprEval { + std::string s; + std::size_t i = 0; + + static double run(const std::string& expr) { + ExprEval ev; ev.s = expr; ev.i = 0; + return ev.parse_expr(); + } + + void skip() { while (i < s.size() && std::isspace(static_cast(s[i]))) ++i; } + + double parse_expr() { + double v = parse_term(); + for (;;) { + skip(); + if (i >= s.size()) return v; + char c = s[i]; + if (c == '+' || c == '-') { + ++i; + double t = parse_term(); + v = (c == '+') ? v + t : v - t; + } + else return v; + } + } + + double parse_term() { + double v = parse_factor(); + for (;;) { + skip(); + if (i >= s.size()) return v; + char c = s[i]; + if (c == '*' || c == '/') { + ++i; + double t = parse_factor(); + v = (c == '*') ? v * t : v / t; + } + else return v; + } + } + + double parse_factor() { + skip(); + if (i >= s.size()) return 0.0; + if (s[i] == '(') { + ++i; + double v = parse_expr(); + skip(); + if (i < s.size() && s[i] == ')') ++i; + return v; + } + if (s[i] == '+') { ++i; return parse_factor(); } + if (s[i] == '-') { ++i; return -parse_factor(); } + if (match_pi()) return M_PI; + return parse_number(); + } + + bool match_pi() { + skip(); + if (i + 1 < s.size()) { + char c1 = char(std::tolower(static_cast(s[i]))); + char c2 = char(std::tolower(static_cast(s[i + 1]))); + if (c1 == 'p' && c2 == 'i') { i += 2; return true; } + } + return false; + } + + double parse_number() { + skip(); + std::size_t j = i; + while (i < s.size()) { + unsigned char ch = static_cast(s[i]); + if (std::isdigit(ch) || s[i] == '.' || s[i] == 'e' || s[i] == 'E' || s[i] == '+' || s[i] == '-') { + ++i; + } + else break; + } + std::string t = s.substr(j, i - j); + if (t.empty()) return 0.0; + return std::strtod(t.c_str(), nullptr); + } + }; + + double eval_expr(const std::string& expr) { return ExprEval::run(expr); } + + void vlog(bool enabled, const std::string& msg) { + if (enabled) std::cerr << "[qbin] " << msg << "\n"; + } + + } +} // namespace diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index caa455d..1fe72fe 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -16,10 +16,12 @@ set(CMAKE_CXX_EXTENSIONS OFF) # ---- Sources ---- set(QBIN_DECOMPILER_SOURCES src/main.cpp + src/tools.cpp src/decompiler.cpp ) set(QBIN_DECOMPILER_HEADERS + include/qbin_decompiler/tools.hpp include/qbin_decompiler/decompiler.hpp ) diff --git a/decompiler/include/qbin_decompiler/tools.hpp b/decompiler/include/qbin_decompiler/tools.hpp new file mode 100644 index 0000000..ef9afea --- /dev/null +++ b/decompiler/include/qbin_decompiler/tools.hpp @@ -0,0 +1,41 @@ +#ifndef QBIN_DECOMPILER_TOOLS_HPP +#define QBIN_DECOMPILER_TOOLS_HPP + +#include +#include +#include +#include // for std::memcmp + +namespace qbin_decompiler { +namespace tools { + +// ---- QBIN v1 header (fixed 20 bytes, no section table) ---- +struct FileHeader { + uint16_t version = 0; + uint16_t flags = 0; + uint32_t header_size = 0; + uint32_t section_count = 0; + uint32_t reserved = 0; +}; + +// Little-endian primitives +inline uint32_t rd_u32le(const uint8_t* p) { + return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); +} + +uint16_t read_u16(const std::vector& b, size_t& i); +uint32_t read_u32(const std::vector& b, size_t& i); + +// ULEB128 with end bound +bool read_uleb128_bound(const std::vector& b, size_t& i, size_t end, uint64_t& v); + +// f32 (little-endian) with bound +bool read_f32le_bound(const std::vector& b, size_t& i, size_t end, float& out); + +// Parse QBIN v1 header, advance pos to the first section/tag after header padding +bool read_header_v1(const std::vector& b, size_t& pos, FileHeader& h, std::string& err, bool verbose); + +} // namespace tools +} // namespace qbin_decompiler + +#endif // QBIN_DECOMPILER_TOOLS_HPP diff --git a/decompiler/src/decompiler.cpp b/decompiler/src/decompiler.cpp index 2d3d64b..8b20abf 100644 --- a/decompiler/src/decompiler.cpp +++ b/decompiler/src/decompiler.cpp @@ -1,4 +1,5 @@ #include "qbin_decompiler/decompiler.hpp" +#include "qbin_decompiler/tools.hpp" #include #include @@ -7,89 +8,12 @@ #include #include #include +#include #include #include namespace qbin_decompiler { - static inline uint32_t rd_u32le(const uint8_t* p) { - return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); - } - - struct SectionEntry { - uint32_t id; - uint32_t offset; - uint32_t size; - uint32_t flags; - }; - - static inline std::string id_to_ascii(uint32_t id) { - char s[5]; - s[0] = char(id & 0xFF); - s[1] = char((id >> 8) & 0xFF); - s[2] = char((id >> 16) & 0xFF); - s[3] = char((id >> 24) & 0xFF); - s[4] = 0; - return std::string(s); - } - - // ULEB128 with local end bound - static bool read_uleb128_bound(const std::vector& b, size_t& i, size_t end, uint64_t& v) { - v = 0; int shift = 0; - while (i < end) { - uint8_t byte = b[i++]; - v |= (uint64_t)(byte & 0x7F) << shift; - if ((byte & 0x80) == 0) return true; - shift += 7; - if (shift > 63) return false; - } - return false; - } - - static bool read_f32le_bound(const std::vector& b, size_t& i, size_t end, float& out) { - if (i + 4 > end) return false; - uint32_t u = rd_u32le(&b[i]); i += 4; - std::memcpy(&out, &u, 4); - return true; - } - - static bool decode_header_and_table(const std::vector& b, - uint8_t& major, uint8_t& minor, - uint32_t& table_off, uint32_t& table_size, - std::vector& table, - std::string& err, bool verbose) { - if (b.size() < 24) { err = "file too small for header"; return false; } - if (std::memcmp(b.data(), "QBIN", 4) != 0) { err = "bad magic"; return false; } - major = b[4]; - minor = b[5]; - uint8_t hdr_size = b[7]; - if (hdr_size != 24) { err = "unexpected header size"; return false; } - uint32_t section_count = rd_u32le(&b[8]); - table_off = rd_u32le(&b[12]); - table_size = rd_u32le(&b[16]); - if (table_off + table_size > b.size()) { err = "section table OOB"; return false; } - if (section_count == 0 || table_size != section_count * 16) { err = "table size mismatch"; return false; } - table.clear(); - table.reserve(section_count); - size_t p = table_off; - for (uint32_t i = 0; i < section_count; ++i) { - SectionEntry e; - e.id = rd_u32le(&b[p + 0]); - e.offset = rd_u32le(&b[p + 4]); - e.size = rd_u32le(&b[p + 8]); - e.flags = rd_u32le(&b[p + 12]); - if ((size_t)e.offset + (size_t)e.size > b.size()) { err = "section out of bounds"; return false; } - table.push_back(e); - p += 16; - } - if (verbose) { - for (const auto& e : table) { - std::fprintf(stderr, " [%s] off=%u size=%u flags=%u\n", id_to_ascii(e.id).c_str(), e.offset, e.size, e.flags); - } - } - return true; - } - struct DecodedInstr { uint8_t opcode = 0; int a = -1, b = -1, c = -1; @@ -101,22 +25,29 @@ namespace qbin_decompiler { uint8_t imm8 = 0; }; + // decode INST section (kept internal) static bool decode_inst_section(const std::vector& b, size_t off, size_t size, std::vector& out, std::string& err, bool verbose = false) { + + using namespace qbin_decompiler::tools; + if (off + size > b.size()) { err = "INST OOB"; return false; } size_t i = off, end = off + size; if (i + 4 > end) { err = "short INST"; return false; } if (std::memcmp(&b[i], "INST", 4) != 0) { err = "INST magic missing"; return false; } i += 4; + uint64_t n = 0; if (!read_uleb128_bound(b, i, end, n)) { err = "bad instr_count"; return false; } out.clear(); out.reserve((size_t)n); + for (uint64_t k = 0; k < n; ++k) { if (i + 2 > end) { err = "truncated instruction header"; return false; } DecodedInstr di{}; di.opcode = b[i++]; uint8_t mask = b[i++]; + if (verbose) std::fprintf(stderr, "idx=%llu: op=0x%02X mask=0x%02X @%zu\n", (unsigned long long)k, di.opcode, mask, i); @@ -137,7 +68,9 @@ namespace qbin_decompiler { uint64_t dummy; if (!read_uleb128_bound(b, i, end, dummy)) { err = "angle param_ref OOB"; return false; } di.has_angle0 = true; di.angle0 = 0.0f; } - else { err = "unknown angle tag"; return false; } + else { + err = "unknown angle tag"; return false; + } } // aux_u32 @@ -203,31 +136,37 @@ namespace qbin_decompiler { std::string& qasm_out, std::string& err, bool verbose) { - uint8_t major = 0, minor = 0; - uint32_t table_off = 0, table_size = 0; - std::vector table; - if (!decode_header_and_table(buf, major, minor, table_off, table_size, table, err, verbose)) { + + using namespace qbin_decompiler::tools; + + // Read v1 header (20 bytes), no section table + FileHeader hdr{}; + size_t pos = 0; + if (!read_header_v1(buf, pos, hdr, err, verbose)) { return false; } - // Find INST - uint32_t inst_id = 0; std::memcpy(&inst_id, "INST", 4); - const SectionEntry* inst = nullptr; - for (const auto& e : table) { if (e.id == inst_id) { inst = &e; break; } } - if (!inst) { err = "No INST section found"; return false; } + // After header, the single INST section/tag starts immediately and runs to EOF. + if (pos + 4 > buf.size()) { err = "no INST tag after header"; return false; } + if (std::memcmp(&buf[pos], "INST", 4) != 0) { + err = "expected INST tag after header"; + return false; + } + size_t inst_off = pos; + size_t inst_size = buf.size() - inst_off; std::vector instrs; - if (!decode_inst_section(buf, inst->offset, inst->size, instrs, err, verbose)) { + if (!decode_inst_section(buf, inst_off, inst_size, instrs, err, verbose)) { return false; } - // Infer sizes + // Infer register sizes int max_q = -1, max_c = -1; for (const auto& di : instrs) { max_q = std::max(max_q, di.a); max_q = std::max(max_q, di.b); max_q = std::max(max_q, di.c); - if (di.opcode == 0x30 /*MEASURE*/ && di.has_aux) max_c = std::max(max_c, int(di.aux)); + if (di.opcode == 0x30 /*measure*/ && di.has_aux) max_c = std::max(max_c, int(di.aux)); if ((di.opcode == 0x81 || di.opcode == 0x82) && di.has_aux) max_c = std::max(max_c, int(di.aux)); } int num_qubits = (max_q >= 0) ? (max_q + 1) : 0; @@ -237,7 +176,7 @@ namespace qbin_decompiler { std::ostringstream q; q << "OPENQASM 3.0;\n"; if (num_qubits > 0) q << "qubit[" << num_qubits << "] q;\n"; - if (num_bits > 0) q << "bit[" << num_bits << "] c;\n"; + if (num_bits > 0) q << "bit[" << num_bits << "] c;\n"; q << "\n"; q << std::setprecision(9); @@ -259,11 +198,12 @@ namespace qbin_decompiler { case 0x0D: q << "rz(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "];\n"; break; case 0x0E: q << "phase(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "];\n"; break; case 0x10: q << "cx q[" << di.a << "], q[" << di.b << "];\n"; break; - case 0x11: q << "cz " << "q[" << di.a << "], q[" << di.b << "];\n"; break; + case 0x11: q << "cz q[" << di.a << "], q[" << di.b << "];\n"; break; case 0x13: q << "swap q[" << di.a << "], q[" << di.b << "];\n"; break; - case 0x15: q << "crx q[" << di.a << "], q[" << di.b << "], (" << (di.has_angle0 ? di.angle0 : 0.0f) << ");\n"; break; - case 0x16: q << "cry q[" << di.a << "], q[" << di.b << "], (" << (di.has_angle0 ? di.angle0 : 0.0f) << ");\n"; break; - case 0x17: q << "crz q[" << di.a << "], q[" << di.b << "], (" << (di.has_angle0 ? di.angle0 : 0.0f) << ");\n"; break; + case 0x15: q << "crx(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; + case 0x16: q << "cry(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; + case 0x17: q << "crz(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; + case 0x20: q << "rxx(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; case 0x21: q << "ryy(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; case 0x22: q << "rzz(" << (di.has_angle0 ? di.angle0 : 0.0f) << ") q[" << di.a << "], q[" << di.b << "];\n"; break; @@ -273,6 +213,7 @@ namespace qbin_decompiler { case 0x81: case 0x82: { int val = di.has_imm8 ? di.imm8 : 0; + // Try single-instruction inline if followed by body + endif if (idx + 2 < instrs.size() && instrs[idx + 2].opcode == 0x8F) { const auto& body = instrs[idx + 1]; std::ostringstream one; @@ -302,13 +243,13 @@ namespace qbin_decompiler { break; } } - // fallback multi-line + // Fallback multi-line q << "if (c[" << di.aux << "] " << (di.opcode == 0x81 ? "==" : "!=") << " " << val << ") {\n"; size_t j = idx + 1; for (; j < instrs.size(); ++j) { if (instrs[j].opcode == 0x8F) break; const auto& body = instrs[j]; - q << " " << opcode_name(body.opcode) << " ...\n"; // concise fallback + q << " " << opcode_name(body.opcode) << " ...\n"; } q << "}\n"; idx = (j < instrs.size()) ? j : instrs.size() - 1; diff --git a/decompiler/src/tools.cpp b/decompiler/src/tools.cpp new file mode 100644 index 0000000..e4c827a --- /dev/null +++ b/decompiler/src/tools.cpp @@ -0,0 +1,78 @@ +#include "qbin_decompiler/tools.hpp" + +#include +#include + +namespace qbin_decompiler { +namespace tools { + +uint16_t read_u16(const std::vector& b, size_t& i) { + if (i + 2 > b.size()) throw std::runtime_error("eof reading u16"); + uint16_t v = (uint16_t)b[i] | ((uint16_t)b[i + 1] << 8); + i += 2; + return v; +} + +uint32_t read_u32(const std::vector& b, size_t& i) { + if (i + 4 > b.size()) throw std::runtime_error("eof reading u32"); + uint32_t v = (uint32_t)b[i] + | ((uint32_t)b[i + 1] << 8) + | ((uint32_t)b[i + 2] << 16) + | ((uint32_t)b[i + 3] << 24); + i += 4; + return v; +} + +bool read_uleb128_bound(const std::vector& b, size_t& i, size_t end, uint64_t& v) { + v = 0; + int shift = 0; + while (i < end) { + uint8_t byte = b[i++]; + v |= (uint64_t)(byte & 0x7F) << shift; + if ((byte & 0x80) == 0) return true; + shift += 7; + if (shift > 63) return false; + } + return false; +} + +bool read_f32le_bound(const std::vector& b, size_t& i, size_t end, float& out) { + if (i + 4 > end) return false; + uint32_t u = rd_u32le(&b[i]); + i += 4; + std::memcpy(&out, &u, 4); + return true; +} + +bool read_header_v1(const std::vector& b, size_t& pos, FileHeader& h, std::string& err, bool verbose) { + if (b.size() < 20) { err = "file too small for header"; return false; } + if (std::memcmp(b.data(), "QBIN", 4) != 0) { err = "bad magic (not QBIN)"; return false; } + + pos = 4; + h.version = read_u16(b, pos); + h.flags = read_u16(b, pos); + h.header_size = read_u32(b, pos); + h.section_count = read_u32(b, pos); + h.reserved = read_u32(b, pos); + + const uint32_t kMinHeaderSize = 20; + if (h.header_size < kMinHeaderSize) { err = "header too small: " + std::to_string(h.header_size); return false; } + if (h.header_size > b.size()) { err = "header claims bigger than file: " + std::to_string(h.header_size); return false; } + + // Skip any extra header padding to land at first section/tag + if (h.header_size > kMinHeaderSize) { + size_t extra = h.header_size - kMinHeaderSize; + if (pos + extra > b.size()) { err = "header padding runs past file"; return false; } + pos += extra; + } + + if (verbose) { + std::fprintf(stderr, "[qbin] header: ver=%u flags=0x%04x header_size=%u sections=%u\n", + (unsigned)h.version, (unsigned)h.flags, + (unsigned)h.header_size, (unsigned)h.section_count); + } + return true; +} + +} // namespace tools +} // namespace qbin_decompiler