diff --git a/CHANGELOG.md b/CHANGELOG.md index 41e9780..8e8235d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add compilation targets matrix documentation (`book/compilation_targets.md`) ([#97]) - 6-option matrix: Compile/Proof x Debug/Release x with/without non-det operations -- Document FxHashMap non-deterministic iteration bug in `arena.rs` ([#97]) ### Testing @@ -218,6 +217,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ast: 98% memory reduction in `Location` struct by removing unused source field ([#69]) +### Fixed + +- Fix FxHashMap non-deterministic iteration in `Arena` — `filter_nodes()` and `list_nodes_cmp()` now sort by node ID, ensuring reproducible WASM function emission order +- Fix Drop instruction emission for nested non-det blocks — `parent_blocks_stack.last()` (innermost block) is now used instead of `.first()` (outermost block) +- Fix `lower_literal` to emit type-correct WASM const instructions — number literals now consult `TypedContext` and emit `i32.const` or `i64.const` based on inferred type instead of always emitting `i32.const` +- Fix `wasm_to_v` public API signature — parameter changed from `&Vec` to idiomatic `&[u8]` + --- ## [0.0.1-alpha] - 2026-01-03 diff --git a/core/ast/src/arena.rs b/core/ast/src/arena.rs index 2594459..8a2803f 100644 --- a/core/ast/src/arena.rs +++ b/core/ast/src/arena.rs @@ -190,11 +190,12 @@ impl Arena { .collect() } - // BUG(reproducibility): Same non-deterministic iteration issue as `list_nodes_cmp`. - // See comment on `list_nodes_cmp` for details and required fix. pub fn filter_nodes bool>(&self, fn_predicate: T) -> Vec { - self.nodes - .values() + let mut entries: Vec<_> = self.nodes.iter().collect(); + entries.sort_unstable_by_key(|(id, _)| *id); + entries + .into_iter() + .map(|(_, node)| node) .filter(|node| fn_predicate(node)) .cloned() .collect() @@ -216,28 +217,83 @@ impl Arena { .unwrap_or_default() } - // BUG(reproducibility): This method iterates over `FxHashMap`, whose iteration - // order is non-deterministic. Any caller that collects results into a Vec and - // processes them sequentially (e.g., `source_files()`, `functions()`, - // `list_type_definitions()`) will produce output in unpredictable order. - // - // This directly affects WASM output determinism: the codegen pipeline calls - // `typed_context.source_files()` → `source_file.function_definitions()` → - // `compiler.visit_function_definition()`, so function emission order in the - // WASM binary depends on HashMap iteration order. - // - // FIX REQUIRED: Either sort results by node ID before returning, use `IndexMap` - // to preserve insertion order, or use `BTreeMap` for guaranteed sorted iteration. - // Node IDs are assigned sequentially by the parser, so sorting by ID restores - // the source-order determinism needed for reproducible builds. - // - // See also: `filter_nodes()` which has the same issue. + /// Iterates over all nodes sorted by node ID, applying a filter-map function. + /// + /// Node IDs are assigned sequentially by the parser, so sorting by ID + /// restores source-order determinism needed for reproducible builds. fn list_nodes_cmp<'a, T, F>(&'a self, cmp: F) -> impl Iterator + 'a where - F: Fn(&AstNode) -> Option + Clone + 'a, - T: Clone + 'static, + F: Fn(&AstNode) -> Option + 'a, { - let cmp = cmp.clone(); - self.nodes.iter().filter_map(move |(_, node)| cmp(node)) + let mut ids: Vec = self.nodes.keys().copied().collect(); + ids.sort_unstable(); + ids.into_iter() + .filter_map(move |id| self.nodes.get(&id).and_then(&cmp)) + } +} + +#[cfg(test)] +mod arena_tests { + use super::*; + use crate::nodes::{Expression, Literal, Location, NumberLiteral}; + + fn make_number_literal_node(id: u32) -> AstNode { + AstNode::Expression(Expression::Literal(Literal::Number(Rc::new( + NumberLiteral { + id, + location: Location::default(), + value: id.to_string(), + }, + )))) + } + + #[test] + fn filter_nodes_returns_ascending_node_id_order() { + let mut arena = Arena::default(); + let ids: Vec = vec![50, 10, 40, 20, 30]; + for &id in &ids { + let node = make_number_literal_node(id); + arena.add_node(node, u32::MAX); + } + + let filtered = arena.filter_nodes(|_| true); + + let result_ids: Vec = filtered.iter().map(AstNode::id).collect(); + assert_eq!(result_ids, vec![10, 20, 30, 40, 50]); + } + + #[test] + fn filter_nodes_preserves_order_with_predicate() { + let mut arena = Arena::default(); + for id in [30, 10, 50, 20, 40] { + let node = make_number_literal_node(id); + arena.add_node(node, u32::MAX); + } + + let filtered = arena.filter_nodes(|node| node.id() > 20); + + let result_ids: Vec = filtered.iter().map(AstNode::id).collect(); + assert_eq!(result_ids, vec![30, 40, 50]); + } + + #[test] + fn list_nodes_cmp_returns_ascending_node_id_order() { + let mut arena = Arena::default(); + for id in [30, 10, 50, 20, 40] { + let node = make_number_literal_node(id); + arena.add_node(node, u32::MAX); + } + + let ids: Vec = arena + .list_nodes_cmp(|node| { + if let AstNode::Expression(Expression::Literal(Literal::Number(n))) = node { + Some(n.id) + } else { + None + } + }) + .collect(); + + assert_eq!(ids, vec![10, 20, 30, 40, 50]); } } diff --git a/core/cli/src/main.rs b/core/cli/src/main.rs index 4bc927d..74afaca 100644 --- a/core/cli/src/main.rs +++ b/core/cli/src/main.rs @@ -282,7 +282,7 @@ fn main() { println!("WASM generated at: {}", wasm_file_path.to_string_lossy()); } if args.generate_v_output { - match wasm_to_v(source_fname, &wasm_bytes.to_vec()) { + match wasm_to_v(source_fname, wasm_bytes) { Ok(v_output) => { let v_file_path = output_path.join(format!("{source_fname}.v")); if let Err(e) = fs::create_dir_all(&output_path) { diff --git a/core/inference/README.md b/core/inference/README.md index 28e4cef..e31bd26 100644 --- a/core/inference/README.md +++ b/core/inference/README.md @@ -48,7 +48,7 @@ The crate exposes five primary functions: | [`type_check`] | `Arena` | `TypedContext` | Type check and infer types | | [`analyze`] | `&TypedContext` | `()` | Semantic analysis (WIP) | | [`codegen`] | `&TypedContext` | `Vec` | Generate WebAssembly bytecode | -| [`wasm_to_v`] | `&str`, `&Vec` | `String` | Translate WASM to Rocq | +| [`wasm_to_v`] | `&str`, `&[u8]` | `String` | Translate WASM to Rocq | ## Compilation Pipeline diff --git a/core/inference/src/lib.rs b/core/inference/src/lib.rs index 9b362b8..81dda6d 100644 --- a/core/inference/src/lib.rs +++ b/core/inference/src/lib.rs @@ -125,7 +125,7 @@ //! let typed_context = type_check(arena)?; //! let codegen_output = codegen(&typed_context)?; //! // WASM bytes are directly available from codegen output: -//! // wasm_to_v("MyModule", &codegen_output.wasm().to_vec()) +//! // wasm_to_v("MyModule", codegen_output.wasm()) //! # Ok::<(), anyhow::Error>(()) //! ``` //! @@ -197,7 +197,7 @@ //! let arena = parse(source_code)?; //! let typed_context = type_check(arena)?; //! let codegen_output = codegen(&typed_context)?; -//! let rocq_code = wasm_to_v(module_name, &codegen_output.wasm().to_vec())?; +//! let rocq_code = wasm_to_v(module_name, codegen_output.wasm())?; //! Ok(rocq_code) //! } //! ``` @@ -598,7 +598,7 @@ pub fn codegen( /// let arena = parse(source)?; /// let typed_context = type_check(arena)?; /// let codegen_output = codegen(&typed_context)?; -/// let rocq_code = wasm_to_v("EvenChecker", &codegen_output.wasm().to_vec())?; +/// let rocq_code = wasm_to_v("EvenChecker", codegen_output.wasm())?; /// # Ok::<(), anyhow::Error>(()) /// ``` /// @@ -621,7 +621,7 @@ pub fn codegen( /// let typed_context = type_check(arena)?; /// let codegen_output = codegen(&typed_context)?; /// // WASM bytes are directly available from codegen output: -/// // wasm_to_v("CommutativityProof", &codegen_output.wasm().to_vec()) +/// // wasm_to_v("CommutativityProof", codegen_output.wasm()) /// # Ok::<(), anyhow::Error>(()) /// ``` /// @@ -691,9 +691,9 @@ pub fn codegen( /// - [WebAssembly Specification](https://webassembly.github.io/spec/) /// - [Inference Language Specification](https://github.com/Inferara/inference-language-spec) /// - [`inference_wasm_to_v_translator`] for implementation details -pub fn wasm_to_v(mod_name: &str, wasm: &Vec) -> anyhow::Result { +pub fn wasm_to_v(mod_name: &str, wasm: &[u8]) -> anyhow::Result { if let Ok(v) = - inference_wasm_to_v_translator::wasm_parser::translate_bytes(mod_name, wasm.as_slice()) + inference_wasm_to_v_translator::wasm_parser::translate_bytes(mod_name, wasm) { Ok(v) } else { diff --git a/core/wasm-codegen/src/compiler.rs b/core/wasm-codegen/src/compiler.rs index 1437cfe..f10a91d 100644 --- a/core/wasm-codegen/src/compiler.rs +++ b/core/wasm-codegen/src/compiler.rs @@ -269,7 +269,7 @@ impl Compiler { .expect("Constant definition must have a type info") .kind { - TypeInfoKind::Number(NumberType::I64) => ValType::I64, + TypeInfoKind::Number(NumberType::I64 | NumberType::U64) => ValType::I64, _ => ValType::I32, }; locals_map.insert(constant_definition.name(), (*local_idx, val_type)); @@ -400,8 +400,8 @@ impl Compiler { Statement::Expression(expression) => { self.lower_expression(&expression, ctx, func, locals_map); if statements_iterator.peek().is_none() - && parent_blocks_stack.first().unwrap().is_non_det() - && parent_blocks_stack.first().unwrap().is_void() + && parent_blocks_stack.last().unwrap().is_non_det() + && parent_blocks_stack.last().unwrap().is_void() { //TODO: once FunctionCall is implemented (which could call a void function), or Unit literal, //the Drop would become a bug — it'd try to pop from an empty stack. @@ -456,10 +456,80 @@ impl Compiler { ), }, NumberType::I64 => todo!(), - NumberType::U8 => todo!(), - NumberType::U16 => todo!(), - NumberType::U32 => todo!(), - NumberType::U64 => todo!(), + NumberType::U8 => match &constant_definition.value { + Literal::Number(number_literal) => { + let val = i32::from( + number_literal + .value + .parse::() + .expect("Failed to parse unsigned 8-bit integer literal"), + ); + func.instruction(&Instruction::I32Const(val)); + let (local_idx, _) = locals_map + .get(&constant_definition.name()) + .expect("Local not found in pre-scan"); + func.instruction(&Instruction::LocalSet(*local_idx)); + } + _ => panic!( + "Constant value for u8 should be a number literal. Found: {:?}", + constant_definition.value + ), + }, + NumberType::U16 => match &constant_definition.value { + Literal::Number(number_literal) => { + let val = i32::from( + number_literal + .value + .parse::() + .expect("Failed to parse unsigned 16-bit integer literal"), + ); + func.instruction(&Instruction::I32Const(val)); + let (local_idx, _) = locals_map + .get(&constant_definition.name()) + .expect("Local not found in pre-scan"); + func.instruction(&Instruction::LocalSet(*local_idx)); + } + _ => panic!( + "Constant value for u16 should be a number literal. Found: {:?}", + constant_definition.value + ), + }, + NumberType::U32 => match &constant_definition.value { + Literal::Number(number_literal) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse unsigned 32-bit integer literal") + .cast_signed(); + func.instruction(&Instruction::I32Const(val)); + let (local_idx, _) = locals_map + .get(&constant_definition.name()) + .expect("Local not found in pre-scan"); + func.instruction(&Instruction::LocalSet(*local_idx)); + } + _ => panic!( + "Constant value for u32 should be a number literal. Found: {:?}", + constant_definition.value + ), + }, + NumberType::U64 => match &constant_definition.value { + Literal::Number(number_literal) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse unsigned 64-bit integer literal") + .cast_signed(); + func.instruction(&Instruction::I64Const(val)); + let (local_idx, _) = locals_map + .get(&constant_definition.name()) + .expect("Local not found in pre-scan"); + func.instruction(&Instruction::LocalSet(*local_idx)); + } + _ => panic!( + "Constant value for u64 should be a number literal. Found: {:?}", + constant_definition.value + ), + }, } } TypeInfoKind::Custom(_) => todo!(), @@ -510,7 +580,7 @@ impl Compiler { Expression::Struct(_struct_expression) => todo!(), Expression::PrefixUnary(_prefix_unary_expression) => todo!(), Expression::Parenthesized(_parenthesized_expression) => todo!(), - Expression::Literal(literal) => self.lower_literal(literal, func), + Expression::Literal(literal) => self.lower_literal(literal, ctx, func), Expression::Identifier(identifier) => { let (local_idx, _) = locals_map .get(&identifier.name) @@ -540,14 +610,15 @@ impl Compiler { /// # Literal Types /// /// - **Bool** - Emitted as `i32.const` (0 for false, 1 for true) per WASM convention - /// - **Number** - Parsed from string and emitted as `i32.const` + /// - **Number** - Emitted as the appropriate const instruction based on inferred type /// /// # Parameters /// /// - `literal` - AST literal node to convert + /// - `ctx` - Typed context for type lookups /// - `func` - WASM function body being built #[allow(clippy::unused_self)] - fn lower_literal(&self, literal: &Literal, func: &mut Function) { + fn lower_literal(&self, literal: &Literal, ctx: &TypedContext, func: &mut Function) { match literal { Literal::Array(_array_literal) => todo!(), Literal::Bool(bool_literal) => { @@ -555,8 +626,60 @@ impl Compiler { } Literal::String(_string_literal) => todo!(), Literal::Number(number_literal) => { - let val = number_literal.value.parse::().unwrap_or(0); - func.instruction(&Instruction::I32Const(val)); + let type_info = ctx + .get_node_typeinfo(number_literal.id) + .expect("Number literal must have type info"); + match type_info.kind { + TypeInfoKind::Number(NumberType::I8 | NumberType::I16 | NumberType::I32) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse signed 32-bit integer literal"); + func.instruction(&Instruction::I32Const(val)); + } + TypeInfoKind::Number(NumberType::U8) => { + let val = i32::from( + number_literal + .value + .parse::() + .expect("Failed to parse unsigned 8-bit integer literal"), + ); + func.instruction(&Instruction::I32Const(val)); + } + TypeInfoKind::Number(NumberType::U16) => { + let val = i32::from( + number_literal + .value + .parse::() + .expect("Failed to parse unsigned 16-bit integer literal"), + ); + func.instruction(&Instruction::I32Const(val)); + } + TypeInfoKind::Number(NumberType::U32) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse unsigned 32-bit integer literal") + .cast_signed(); + func.instruction(&Instruction::I32Const(val)); + } + TypeInfoKind::Number(NumberType::I64) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse signed 64-bit integer literal"); + func.instruction(&Instruction::I64Const(val)); + } + TypeInfoKind::Number(NumberType::U64) => { + let val = number_literal + .value + .parse::() + .expect("Failed to parse unsigned 64-bit integer literal") + .cast_signed(); + func.instruction(&Instruction::I64Const(val)); + } + _ => panic!("Unsupported number literal type: {:?}", type_info.kind), + } } Literal::Unit(_unit_literal) => todo!(), } diff --git a/tests/src/codegen/wasm/validation.rs b/tests/src/codegen/wasm/validation.rs index 60f39c2..169c045 100644 --- a/tests/src/codegen/wasm/validation.rs +++ b/tests/src/codegen/wasm/validation.rs @@ -10,7 +10,8 @@ #[cfg(test)] mod codegen_validation_tests { use crate::utils::{ - codegen_output, codegen_output_with_mode, codegen_with_full_config, codegen_with_target_mode, + codegen_output, codegen_output_with_mode, codegen_with_full_config, + codegen_with_target_mode, }; use inference_wasm_codegen::{CompilationMode, OptLevel, Target}; @@ -277,6 +278,163 @@ mod codegen_validation_tests { .unwrap_or_else(|e| panic!("i64 return WASM is invalid: {e}")); } + // --- Nested non-det block tests (Bug #3: Drop emission uses parent_blocks_stack.last()) --- + + #[test] + fn nested_nondet_forall_inside_exists_produces_valid_wasm() { + let source = + r#"pub fn nested_forall_in_exists() { exists { forall { const a: i32 = 42; } } }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("Nested non-det WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0xfc, 0x3b]), + "WASM should contain exists opcode (0xfc 0x3b)" + ); + assert!( + wasm_contains_bytes(wasm, &[0xfc, 0x3a]), + "WASM should contain forall opcode (0xfc 0x3a)" + ); + } + + #[test] + fn nested_nondet_forall_inside_forall_produces_valid_wasm() { + let source = r#"pub fn nested_forall() { forall { forall { const a: i32 = 42; } } }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("Nested forall-in-forall WASM is invalid: {e}")); + let forall_count = wasm.windows(2).filter(|w| w == &[0xfc, 0x3a]).count(); + assert_eq!( + forall_count, 2, + "WASM should contain exactly 2 forall opcodes for nested forall blocks" + ); + } + + #[test] + fn nested_nondet_expression_drop_uses_innermost_block() { + let source = r#"pub fn nested_drop_test() -> i32 { exists { forall { const x: i32 = 99; } } return 0; }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("Nested non-det with const WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0xfc, 0x3b]), + "WASM should contain exists opcode" + ); + assert!( + wasm_contains_bytes(wasm, &[0xfc, 0x3a]), + "WASM should contain forall opcode" + ); + } + + // --- i64 literal tests (Bug #4: lower_literal dispatches I64Const for i64/u64) --- + + #[test] + fn i64_literal_in_return_rejected_by_type_checker() { + //FIXME: The type checker infers bare number literals as i32, so `return 100` + // in an i64 function is rejected. Once the type checker supports i64 literal + // inference (e.g., via bidirectional type checking that propagates the expected + // return type into the literal), this test should be updated to verify that the + // WASM output contains i64.const (opcode 0x42) instead of i32.const (opcode 0x41). + // The lower_literal fix in compiler.rs correctly dispatches I64Const for i64-typed + // literals, but the type checker prevents this code path from being reached today. + let source = "pub fn get_i64_value() -> i64 { return 100; }"; + let arena = crate::utils::build_ast(source.to_string()); + let type_check_result = + inference_type_checker::TypeCheckerBuilder::build_typed_context(arena); + match type_check_result { + Ok(_) => panic!("Type checker should reject i32 literal in i64 return position"), + Err(err) => { + let err_msg = err.to_string(); + assert!( + err_msg.contains("type mismatch"), + "Error should be a type mismatch. Got: {err_msg}" + ); + } + } + } + + #[test] + fn i64_uzumaki_in_return_emits_i64_uzumaki_opcode() { + let source = "pub fn get_i64() -> i64 { return @; }"; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("i64 uzumaki return WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0xfc, 0x32]), + "WASM should contain i64.uzumaki opcode (0xfc 0x32)" + ); + } + + #[test] + fn nondet_void_block_trailing_expression_emits_drop() { + let source = r#"pub fn drop_test() { forall { const a: i32 = 42; a; } }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm).unwrap_or_else(|e| panic!("Drop-path WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0x1a]), + "WASM should contain Drop opcode (0x1a) for trailing expression in non-det void block" + ); + } + + // --- Unsigned integer literal tests (lower_literal U8/U16/U32/U64 arms) --- + + #[test] + fn u8_literal_emits_i32const() { + let source = r#"pub fn u8_test() { const x: u8 = 255; }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("u8 literal WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0x41]), + "WASM should contain i32.const (0x41) for u8 literal" + ); + } + + #[test] + fn u16_literal_emits_i32const() { + let source = r#"pub fn u16_test() { const x: u16 = 60000; }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("u16 literal WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0x41]), + "WASM should contain i32.const (0x41) for u16 literal" + ); + } + + #[test] + fn u32_literal_emits_i32const() { + let source = r#"pub fn u32_test() { const x: u32 = 3000000000; }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("u32 literal WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0x41]), + "WASM should contain i32.const (0x41) for u32 literal" + ); + } + + #[test] + fn u64_literal_emits_i64const() { + let source = r#"pub fn u64_test() { const x: u64 = 9000000000000000000; }"#; + let output = codegen_output(source); + let wasm = output.wasm(); + inf_wasmparser::validate(wasm) + .unwrap_or_else(|e| panic!("u64 literal WASM is invalid: {e}")); + assert!( + wasm_contains_bytes(wasm, &[0x42]), + "WASM should contain i64.const (0x42) for u64 literal" + ); + } + // --- Helper functions --- /// Checks if a byte slice contains a given subsequence of bytes.