From 863de11b98ea6b614f2e482b2ebe14167f2e93f1 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 13:53:34 -0800
Subject: [PATCH 01/26] feat: upgrade overall `halo2-base` API to support
 future multi-threaded assignments using our basic gate

* WIP: currently `gates::flex_gate` is updated and passes basic test
---
 CHANGELOG.md                      |    4 +
 Cargo.toml                        |    2 +-
 halo2-base/Cargo.toml             |    5 +-
 halo2-base/src/gates/flex_gate.rs | 1278 ++++++++++++++++-------------
 halo2-base/src/gates/mod.rs       |  911 +++++++-------------
 halo2-base/src/gates/tests.rs     |  171 ++--
 halo2-base/src/lib.rs             |  698 ++++++----------
 halo2-base/src/utils.rs           |    9 +-
 8 files changed, 1297 insertions(+), 1781 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..ab67d01e
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,4 @@
+# v0.3.0
+
+- Remove `PlonkPlus` strategy for `GateInstructions` to reduce code complexity.
+  - Because this strategy involved 1 selector AND 1 fixed column per advice column, it seems hard to justify it will lead to better peformance for the prover or verifier.
diff --git a/Cargo.toml b/Cargo.toml
index 4f01110c..a21fa775 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [workspace]
 members = [
     "halo2-base",
-    "halo2-ecc",
+    # "halo2-ecc",
     "hashes/zkevm-keccak",
 ]
 
diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index 0046f2e0..caf56709 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "halo2-base"
-version = "0.2.2"
+version = "0.3.0"
 edition = "2021"
 
 [dependencies]
@@ -11,6 +11,9 @@ num-traits = "0.2"
 rand_chacha = "0.3"
 rustc-hash = "1.1"
 ff = "0.12"
+crossbeam = "0.8.2"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
 
 # Use Axiom's custom halo2 monorepo for faster proving when feature = "halo2-axiom" is on
 halo2_proofs_axiom = { git = "https://github.com/axiom-crypto/halo2.git", tag = "v2023_01_17", package = "halo2_proofs", optional = true }
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index 0f266b8d..4f646cdd 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -1,29 +1,32 @@
+use serde::{Deserialize, Serialize};
+
 use super::{
-    AssignedValue, Context, GateInstructions,
-    QuantumCell::{self, Constant, Existing, Witness},
+    AssignedValue, Context,
+    QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
 };
-use crate::halo2_proofs::{
-    circuit::Value,
-    plonk::{
-        Advice, Assigned, Column, ConstraintSystem, FirstPhase, Fixed, SecondPhase, Selector,
-        ThirdPhase,
+use crate::utils::ScalarField;
+use crate::{
+    halo2_proofs::{
+        plonk::{
+            Advice, Assigned, Column, ConstraintSystem, FirstPhase, Fixed, SecondPhase, Selector,
+            ThirdPhase,
+        },
+        poly::Rotation,
     },
-    poly::Rotation,
+    ContextCell,
 };
-use crate::utils::ScalarField;
-use itertools::Itertools;
 use std::{
-    iter::{self, once},
+    iter::{self},
     marker::PhantomData,
 };
 
 /// The maximum number of phases halo2 currently supports
 pub const MAX_PHASE: usize = 3;
 
-#[derive(Clone, Copy, Debug, PartialEq)]
+// Currently there is only one strategy, but we may add more in the future
+#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
 pub enum GateStrategy {
     Vertical,
-    PlonkPlus,
 }
 
 #[derive(Clone, Debug)]
@@ -37,15 +40,7 @@ pub struct BasicGateConfig<F: ScalarField> {
     // * q = q_enable[0]
     // * q_i is either 0 or 1 so this is just a simple selector
     // We chose `a + b * c` instead of `a * b + c` to allow "chaining" of gates, i.e., the output of one gate because `a` in the next gate
-
-    // If strategy is PlonkPlus, then this is a slightly extended version of the vanilla plonk (vertical) gate
-    // `q_io * (a + q_left * b + q_right * c + q_mul * b * c - d)`
-    // where
-    // * a = value[0], b = value[1], c = value[2], d = value[3]
-    // * the q_{} can be any fixed values in F, placed in two fixed columns
-    // * it is crucial that q_io goes in its own selector column! we need it to be 0, 1 to turn on/off the gate
     pub q_enable: Selector,
-    pub q_enable_plus: Vec<Column<Fixed>>,
     // one column to store the inputs and outputs of the gate
     pub value: Column<Advice>,
     _marker: PhantomData<F>,
@@ -65,17 +60,10 @@ impl<F: ScalarField> BasicGateConfig<F> {
 
         match strategy {
             GateStrategy::Vertical => {
-                let config = Self { q_enable, q_enable_plus: vec![], value, _marker: PhantomData };
+                let config = Self { q_enable, value, _marker: PhantomData };
                 config.create_gate(meta);
                 config
             }
-            GateStrategy::PlonkPlus => {
-                let q_aux = meta.fixed_column();
-                let config =
-                    Self { q_enable, q_enable_plus: vec![q_aux], value, _marker: PhantomData };
-                config.create_plonk_gate(meta);
-                config
-            }
         }
     }
 
@@ -91,25 +79,6 @@ impl<F: ScalarField> BasicGateConfig<F> {
             vec![q * (a + b * c - out)]
         })
     }
-
-    fn create_plonk_gate(&self, meta: &mut ConstraintSystem<F>) {
-        meta.create_gate("plonk plus", |meta| {
-            // q_io * (a + q_left * b + q_right * c + q_mul * b * c - d)
-            // the gate is turned "off" as long as q_io = 0
-            let q_io = meta.query_selector(self.q_enable);
-
-            let q_mul = meta.query_fixed(self.q_enable_plus[0], Rotation::cur());
-            let q_left = meta.query_fixed(self.q_enable_plus[0], Rotation::next());
-            let q_right = meta.query_fixed(self.q_enable_plus[0], Rotation(2));
-
-            let a = meta.query_advice(self.value, Rotation::cur());
-            let b = meta.query_advice(self.value, Rotation::next());
-            let c = meta.query_advice(self.value, Rotation(2));
-            let d = meta.query_advice(self.value, Rotation(3));
-
-            vec![q_io * (a + q_left * b.clone() + q_right * c.clone() + q_mul * b * c - d)]
-        })
-    }
 }
 
 #[derive(Clone, Debug)]
@@ -118,14 +87,8 @@ pub struct FlexGateConfig<F: ScalarField> {
     // `constants` is a vector of fixed columns for allocating constant values
     pub constants: Vec<Column<Fixed>>,
     pub num_advice: [usize; MAX_PHASE],
-    strategy: GateStrategy,
-    gate_len: usize,
-    pub context_id: usize,
+    _strategy: GateStrategy,
     pub max_rows: usize,
-
-    pub pow_of_two: Vec<F>,
-    /// To avoid Montgomery conversion in `F::from` for common small numbers, we keep a cache of field elements
-    pub field_element_cache: Vec<F>,
 }
 
 impl<F: ScalarField> FlexGateConfig<F> {
@@ -134,7 +97,6 @@ impl<F: ScalarField> FlexGateConfig<F> {
         strategy: GateStrategy,
         num_advice: &[usize],
         num_fixed: usize,
-        context_id: usize,
         // log2_ceil(# rows in circuit)
         circuit_degree: usize,
     ) -> Self {
@@ -145,17 +107,9 @@ impl<F: ScalarField> FlexGateConfig<F> {
             // meta.enable_constant(c);
             constants.push(c);
         }
-        let mut pow_of_two = Vec::with_capacity(F::NUM_BITS as usize);
-        let two = F::from(2);
-        pow_of_two.push(F::one());
-        pow_of_two.push(two);
-        for _ in 2..F::NUM_BITS {
-            pow_of_two.push(two * pow_of_two.last().unwrap());
-        }
-        let field_element_cache = (0..1024).map(|i| F::from(i)).collect();
 
         match strategy {
-            GateStrategy::Vertical | GateStrategy::PlonkPlus => {
+            GateStrategy::Vertical => {
                 let mut basic_gates = [(); MAX_PHASE].map(|_| vec![]);
                 let mut num_advice_array = [0usize; MAX_PHASE];
                 for ((phase, &num_columns), gates) in
@@ -170,520 +124,693 @@ impl<F: ScalarField> FlexGateConfig<F> {
                     basic_gates,
                     constants,
                     num_advice: num_advice_array,
-                    strategy,
-                    gate_len: 4,
-                    context_id,
+                    _strategy: strategy,
                     /// Warning: this needs to be updated if you create more advice columns after this `FlexGateConfig` is created
                     max_rows: (1 << circuit_degree) - meta.minimum_rows(),
-                    pow_of_two,
-                    field_element_cache,
                 }
             }
         }
     }
+}
 
-    pub fn inner_product_simple<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> AssignedValue<'b, F> {
-        let mut sum;
-        let mut a = a.into_iter();
-        let mut b = b.into_iter().peekable();
+pub trait GateInstructions<F: ScalarField> {
+    fn strategy(&self) -> GateStrategy;
 
-        let cells = if matches!(b.peek(), Some(Constant(c)) if c == &F::one()) {
-            b.next();
-            let start_a = a.next().unwrap();
-            sum = start_a.value().copied();
-            iter::once(start_a)
-        } else {
-            sum = Value::known(F::zero());
-            iter::once(Constant(F::zero()))
-        }
-        .chain(a.zip(b).flat_map(|(a, b)| {
-            sum = sum + a.value().zip(b.value()).map(|(a, b)| *a * b);
-            [a, b, Witness(sum)]
-        }));
+    fn pow_of_two(&self) -> &[F];
+    fn get_field_element(&self, n: u64) -> F;
 
-        let (lo, hi) = cells.size_hint();
-        debug_assert_eq!(Some(lo), hi);
-        let len = lo / 3;
-        let gate_offsets = (0..len).map(|i| (3 * i as isize, None));
-        self.assign_region_last(ctx, cells, gate_offsets)
+    /// Copies a, b and constrains `a + b * 1 = out`
+    // | a | b | 1 | a + b |
+    fn add(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let out_val = *a.value() + b.value();
+        ctx.assign_region_last(vec![a, b, Constant(F::one()), Witness(out_val)], vec![0])
     }
 
-    pub fn inner_product_simple_with_assignments<'a, 'b: 'a>(
+    /// Copies a, b and constrains `a + b * (-1) = out`
+    // | a - b | b | 1 | a |
+    fn sub(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> (Vec<AssignedValue<'b, F>>, AssignedValue<'b, F>) {
-        let mut sum;
-        let mut a = a.into_iter();
-        let mut b = b.into_iter().peekable();
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let out_val = *a.value() - b.value();
+        // slightly better to not have to compute -F::one() since F::one() is cached
+        ctx.assign_region(vec![Witness(out_val), b, Constant(F::one()), a], vec![0]);
+        ctx.get(-4)
+    }
 
-        let cells = if matches!(b.peek(), Some(Constant(c)) if c == &F::one()) {
-            b.next();
-            let start_a = a.next().unwrap();
-            sum = start_a.value().copied();
-            iter::once(start_a)
-        } else {
-            sum = Value::known(F::zero());
-            iter::once(Constant(F::zero()))
-        }
-        .chain(a.zip(b).flat_map(|(a, b)| {
-            sum = sum + a.value().zip(b.value()).map(|(a, b)| *a * b);
-            [a, b, Witness(sum)]
-        }));
+    // | a | -a | 1 | 0 |
+    fn neg(&self, ctx: &mut Context<F>, a: impl Into<QuantumCell<F>>) -> AssignedValue<F> {
+        let a = a.into();
+        let out_val = -*a.value();
+        ctx.assign_region(
+            vec![a, Witness(out_val), Constant(F::one()), Constant(F::zero())],
+            vec![0],
+        );
+        ctx.get(-3)
+    }
 
-        let (lo, hi) = cells.size_hint();
-        debug_assert_eq!(Some(lo), hi);
-        let len = lo / 3;
-        let gate_offsets = (0..len).map(|i| (3 * i as isize, None));
-        let mut assignments = self.assign_region(ctx, cells, gate_offsets);
-        let last = assignments.pop().unwrap();
-        (assignments, last)
+    /// Copies a, b and constrains `0 + a * b = out`
+    // | 0 | a | b | a * b |
+    fn mul(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let out_val = *a.value() * b.value();
+        ctx.assign_region_last(vec![Constant(F::zero()), a, b, Witness(out_val)], vec![0])
     }
 
-    fn inner_product_with_assignments<'a, 'b: 'a>(
+    /// a * b + c
+    fn mul_add(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> (Vec<AssignedValue<'b, F>>, AssignedValue<'b, F>) {
-        // we will do special handling of the cases where one of the vectors is all constants
-        match self.strategy {
-            GateStrategy::PlonkPlus => {
-                let vec_a = a.into_iter().collect::<Vec<_>>();
-                let vec_b = b.into_iter().collect::<Vec<_>>();
-                if vec_b.iter().all(|b| matches!(b, Constant(_))) {
-                    let vec_b: Vec<F> = vec_b
-                        .into_iter()
-                        .map(|b| if let Constant(c) = b { c } else { unreachable!() })
-                        .collect();
-                    let k = vec_a.len();
-                    let gate_segment = self.gate_len - 2;
-
-                    // Say a = [a0, .., a4] for example
-                    // Then to compute <a, b> we use transpose of
-                    // | 0  | a0 | a1 | x | a2 | a3 | y | a4 | 0 | <a,b> |
-                    // while letting q_enable equal transpose of
-                    // | *  |    |    | * |    |    | * |    |   |       |
-                    // | 0  | b0 | b1 | 0 | b2 | b3 | 0 | b4 | 0 |
-
-                    // we effect a small optimization if we know the constant b0 == 1: then instead of starting from 0 we can start from a0
-                    // this is a peculiarity of our plonk-plus gate
-                    let start_ida: usize = (vec_b[0] == F::one()).into();
-                    if start_ida == 1 && k == 1 {
-                        // this is just a0 * 1 = a0; you're doing nothing, why are you calling this function?
-                        return (vec![], self.assign_region_last(ctx, vec_a, vec![]));
-                    }
-                    let k_chunks = (k - start_ida + gate_segment - 1) / gate_segment;
-                    let mut cells = Vec::with_capacity(1 + (gate_segment + 1) * k_chunks);
-                    let mut gate_offsets = Vec::with_capacity(k_chunks);
-                    let mut running_sum =
-                        if start_ida == 1 { vec_a[0].clone() } else { Constant(F::zero()) };
-                    cells.push(running_sum.clone());
-                    for i in 0..k_chunks {
-                        let window = (start_ida + i * gate_segment)
-                            ..std::cmp::min(k, start_ida + (i + 1) * gate_segment);
-                        // we add a 0 at the start for q_mul = 0
-                        let mut c_window = [&[F::zero()], &vec_b[window.clone()]].concat();
-                        c_window.extend((c_window.len()..(gate_segment + 1)).map(|_| F::zero()));
-                        // c_window should have length gate_segment + 1
-                        gate_offsets.push((
-                            (i * (gate_segment + 1)) as isize,
-                            Some(c_window.try_into().expect("q_coeff should be correct len")),
-                        ));
-
-                        cells.extend(window.clone().map(|j| vec_a[j].clone()));
-                        cells.extend((window.len()..gate_segment).map(|_| Constant(F::zero())));
-                        running_sum = Witness(
-                            window.into_iter().fold(running_sum.value().copied(), |sum, j| {
-                                sum + Value::known(vec_b[j]) * vec_a[j].value()
-                            }),
-                        );
-                        cells.push(running_sum.clone());
-                    }
-                    let mut assignments = self.assign_region(ctx, cells, gate_offsets);
-                    let last = assignments.pop().unwrap();
-                    (assignments, last)
-                } else if vec_a.iter().all(|a| matches!(a, Constant(_))) {
-                    self.inner_product_with_assignments(ctx, vec_b, vec_a)
-                } else {
-                    self.inner_product_simple_with_assignments(ctx, vec_a, vec_b)
-                }
-            }
-            _ => self.inner_product_simple_with_assignments(ctx, a, b),
-        }
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        c: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let c = c.into();
+        let out_val = *a.value() * b.value() + c.value();
+        ctx.assign_region_last(vec![c, a, b, Witness(out_val)], vec![0])
     }
-}
 
-impl<F: ScalarField> GateInstructions<F> for FlexGateConfig<F> {
-    fn strategy(&self) -> GateStrategy {
-        self.strategy
+    /// (1 - a) * b = b - a * b
+    fn mul_not(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let out_val = (F::one() - a.value()) * b.value();
+        ctx.assign_region_smart(vec![Witness(out_val), a, b, b], vec![0], vec![(2, 3)], []);
+        ctx.get(-4)
     }
-    fn context_id(&self) -> usize {
-        self.context_id
+
+    /// Constrain x is 0 or 1.
+    fn assert_bit(&self, ctx: &mut Context<F>, x: AssignedValue<F>) {
+        ctx.assign_region(
+            vec![Constant(F::zero()), Existing(x), Existing(x), Existing(x)],
+            vec![0],
+        );
     }
-    fn pow_of_two(&self) -> &[F] {
-        &self.pow_of_two
+
+    fn div_unsafe(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        // TODO: if really necessary, make `c` of type `Assigned<F>`
+        // this would require the API using `Assigned<F>` instead of `F` everywhere, so leave as last resort
+        let c = b.value().invert().unwrap() * a.value();
+        ctx.assign_region(vec![Constant(F::zero()), Witness(c), b, a], vec![0]);
+        ctx.get(-3)
     }
-    fn get_field_element(&self, n: u64) -> F {
-        let get = self.field_element_cache.get(n as usize);
-        if let Some(fe) = get {
-            *fe
-        } else {
-            F::from(n)
+
+    fn assert_is_const(&self, ctx: &mut Context<F>, a: &AssignedValue<F>, constant: &F) {
+        if !ctx.witness_gen_only {
+            let c_index = ctx.assign_fixed(*constant);
+            ctx.constant_equality_constraints.push((
+                ContextCell { context_id: ctx.context_id, offset: c_index },
+                a.cell.unwrap(),
+            ));
         }
     }
-    /// All indices in `gate_offsets` are with respect to `inputs` indices
-    /// * `gate_offsets` specifies indices to enable selector for the gate
-    /// * `gate_offsets` specifies (index, Option<[q_left, q_right, q_mul, q_const, q_out]>)
-    /// * second coordinate should only be set if using strategy PlonkPlus; if not set, default to [1, 0, 0]
-    /// * allow the index in `gate_offsets` to be negative in case we want to do advanced overlapping
-    /// * gate_index can either be set if you know the specific column you want to assign to, or None if you want to auto-select index
-    /// * only selects from advice columns in `ctx.current_phase`
-    // same as `assign_region` except you can specify the `phase` to assign in
-    fn assign_region_in<'a, 'b: 'a>(
+
+    /// Returns the inner product of `<a, b>`
+    fn inner_product<QA>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> AssignedValue<F>
+    where
+        QA: Into<QuantumCell<F>>;
+
+    /// Returns the inner product of `<a, b>` and the last item of `a` after it is assigned
+    fn inner_product_left_last<QA>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> (AssignedValue<F>, AssignedValue<F>)
+    where
+        QA: Into<QuantumCell<F>>;
+
+    /// Returns a vector with the partial sums `sum_{j=0..=i} a[j] * b[j]`.
+    fn inner_product_with_sums<'thread, QA>(
         &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-        phase: usize,
-    ) -> Vec<AssignedValue<'b, F>> {
-        // We enforce the pattern that you should assign everything in current phase at once and then move onto next phase
-        debug_assert_eq!(phase, ctx.current_phase());
-
-        let inputs = inputs.into_iter();
-        let (len, hi) = inputs.size_hint();
+        ctx: &'thread mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> Box<dyn Iterator<Item = AssignedValue<F>> + 'thread>
+    where
+        QA: Into<QuantumCell<F>>;
+
+    fn sum<Q>(&self, ctx: &mut Context<F>, a: impl IntoIterator<Item = Q>) -> AssignedValue<F>
+    where
+        Q: Into<QuantumCell<F>>,
+    {
+        let mut a = a.into_iter().peekable();
+        let start = a.next();
+        if start.is_none() {
+            return ctx.load_zero();
+        }
+        let start = start.unwrap().into();
+        if a.peek().is_none() {
+            return ctx.assign_region_last([start], []);
+        }
+        let (len, hi) = a.size_hint();
         debug_assert_eq!(Some(len), hi);
-        // we index into `advice_alloc` twice so this assert should save a bound check
-        assert!(self.context_id < ctx.advice_alloc.len(), "context id out of bounds");
 
-        let (gate_index, row_offset) = {
-            let alloc = ctx.advice_alloc.get_mut(self.context_id).unwrap();
+        let mut sum = *start.value();
+        let cells = iter::once(start).chain(a.flat_map(|a| {
+            let a = a.into();
+            sum += a.value();
+            [a, Constant(F::one()), Witness(sum)]
+        }));
+        ctx.assign_region_last(cells, (0..len).map(|i| 3 * i as isize))
+    }
 
-            if alloc.1 + len >= ctx.max_rows {
-                alloc.1 = 0;
-                alloc.0 += 1;
-            }
-            *alloc
-        };
+    /// Returns the assignment trace where `output[i]` has the running sum `sum_{j=0..=i} a[j]`
+    fn partial_sums<'thread, Q>(
+        &self,
+        ctx: &'thread mut Context<F>,
+        a: impl IntoIterator<Item = Q>,
+    ) -> Box<dyn Iterator<Item = AssignedValue<F>> + 'thread>
+    where
+        Q: Into<QuantumCell<F>>,
+    {
+        let mut a = a.into_iter().peekable();
+        let start = a.next();
+        if start.is_none() {
+            return Box::new(iter::once(ctx.load_zero()));
+        }
+        let start = start.unwrap().into();
+        if a.peek().is_none() {
+            return Box::new(iter::once(ctx.assign_region_last([start], [])));
+        }
+        let (len, hi) = a.size_hint();
+        debug_assert_eq!(Some(len), hi);
 
-        let basic_gate = self.basic_gates[phase]
-            .get(gate_index)
-            .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}"));
-        let column = basic_gate.value;
-        let assignments = inputs
-            .enumerate()
-            .map(|(i, input)| {
-                ctx.assign_cell(
-                    input,
-                    column,
-                    #[cfg(feature = "display")]
-                    self.context_id,
-                    row_offset + i,
-                    #[cfg(feature = "halo2-pse")]
-                    (phase as u8),
-                )
+        let mut sum = *start.value();
+        let cells = iter::once(start).chain(a.flat_map(|a| {
+            let a = a.into();
+            sum += a.value();
+            [a, Constant(F::one()), Witness(sum)]
+        }));
+        ctx.assign_region(cells, (0..len).map(|i| 3 * i as isize));
+        Box::new((0..=len).rev().map(|i| ctx.get(-1 - 3 * (i as isize))))
+    }
+
+    // requires b.len() == a.len() + 1
+    // returns
+    // x_i = b_1 * (a_1...a_{i - 1})
+    //     + b_2 * (a_2...a_{i - 1})
+    //     + ...
+    //     + b_i
+    // Returns [x_1, ..., x_{b.len()}]
+    fn accumulated_product<QA, QB>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QB>,
+    ) -> Vec<AssignedValue<F>>
+    where
+        QA: Into<QuantumCell<F>>,
+        QB: Into<QuantumCell<F>>,
+    {
+        let mut b = b.into_iter();
+        let mut a = a.into_iter();
+        let b_first = b.next();
+        if let Some(b_first) = b_first {
+            let b_first = ctx.assign_region_last([b_first], []);
+            std::iter::successors(Some(b_first), |x| {
+                a.next().zip(b.next()).map(|(a, b)| self.mul_add(ctx, Existing(*x), a, b))
             })
-            .collect::<Vec<_>>();
-
-        for (i, q_coeff) in gate_offsets.into_iter() {
-            basic_gate
-                .q_enable
-                .enable(&mut ctx.region, (row_offset as isize + i) as usize)
-                .expect("enable selector should not fail");
-
-            if self.strategy == GateStrategy::PlonkPlus {
-                let q_coeff = q_coeff.unwrap_or([F::one(), F::zero(), F::zero()]);
-                for (j, q_coeff) in q_coeff.into_iter().enumerate() {
-                    #[cfg(feature = "halo2-axiom")]
-                    {
-                        ctx.region.assign_fixed(
-                            basic_gate.q_enable_plus[0],
-                            ((row_offset as isize) + i) as usize + j,
-                            Assigned::Trivial(q_coeff),
-                        );
-                    }
-                    #[cfg(feature = "halo2-pse")]
-                    {
-                        ctx.region
-                            .assign_fixed(
-                                || "",
-                                basic_gate.q_enable_plus[0],
-                                ((row_offset as isize) + i) as usize + j,
-                                || Value::known(q_coeff),
-                            )
-                            .unwrap();
-                    }
-                }
-            }
+            .collect()
+        } else {
+            vec![]
         }
+    }
 
-        ctx.advice_alloc[self.context_id].1 += assignments.len();
+    fn sum_products_with_coeff_and_var(
+        &self,
+        ctx: &mut Context<F>,
+        values: impl IntoIterator<Item = (F, QuantumCell<F>, QuantumCell<F>)>,
+        var: QuantumCell<F>,
+    ) -> AssignedValue<F>;
 
-        #[cfg(feature = "display")]
-        {
-            ctx.total_advice += assignments.len();
-        }
+    // | 1 - b | 1 | b | 1 | b | a | 1 - b | out |
+    fn or(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let not_b_val = F::one() - b.value();
+        let out_val = *a.value() + b.value() - *a.value() * b.value();
+        let cells = vec![
+            Witness(not_b_val),
+            Constant(F::one()),
+            b,
+            Constant(F::one()),
+            b,
+            a,
+            Witness(not_b_val),
+            Witness(out_val),
+        ];
+        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], vec![]);
+        ctx.last().unwrap()
+    }
+
+    // | 0 | a | b | out |
+    fn and(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        self.mul(ctx, a, b)
+    }
 
-        assignments
+    fn not(&self, ctx: &mut Context<F>, a: impl Into<QuantumCell<F>>) -> AssignedValue<F> {
+        self.sub(ctx, Constant(F::one()), a)
     }
 
-    fn assign_region_last_in<'a, 'b: 'a>(
+    /// assumes sel is boolean
+    /// returns
+    ///   a * sel + b * (1 - sel)
+    fn select(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        sel: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F>;
+
+    /// returns: a || (b && c)
+    // | 1 - b c | b | c | 1 | a - 1 | 1 - b c | out | a - 1 | 1 | 1 | a |
+    fn or_and(
         &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-        phase: usize,
-    ) -> AssignedValue<'b, F> {
-        // We enforce the pattern that you should assign everything in current phase at once and then move onto next phase
-        debug_assert_eq!(phase, ctx.current_phase());
-
-        let inputs = inputs.into_iter();
-        let (len, hi) = inputs.size_hint();
-        debug_assert_eq!(hi, Some(len));
-        debug_assert_ne!(len, 0);
-        // we index into `advice_alloc` twice so this assert should save a bound check
-        assert!(self.context_id < ctx.advice_alloc.len(), "context id out of bounds");
-
-        let (gate_index, row_offset) = {
-            let alloc = ctx.advice_alloc.get_mut(self.context_id).unwrap();
-
-            if alloc.1 + len >= ctx.max_rows {
-                alloc.1 = 0;
-                alloc.0 += 1;
-            }
-            *alloc
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        c: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F>;
+
+    /// assume bits has boolean values
+    /// returns vec[idx] with vec[idx] = 1 if and only if bits == idx as a binary number
+    fn bits_to_indicator(
+        &self,
+        ctx: &mut Context<F>,
+        bits: &[AssignedValue<F>],
+    ) -> Vec<AssignedValue<F>> {
+        let k = bits.len();
+
+        let (inv_last_bit, last_bit) = {
+            ctx.assign_region(
+                vec![
+                    Witness(F::one() - bits[k - 1].value()),
+                    Existing(bits[k - 1]),
+                    Constant(F::one()),
+                    Constant(F::one()),
+                ],
+                vec![0],
+            );
+            (ctx.get(-4), ctx.get(-3))
         };
+        let mut indicator = Vec::with_capacity(2 * (1 << k) - 2);
+        let mut offset = 0;
+        indicator.push(inv_last_bit);
+        indicator.push(last_bit);
+        for (idx, bit) in bits.iter().rev().enumerate().skip(1) {
+            for old_idx in 0..(1 << idx) {
+                let inv_prod_val = (F::one() - bit.value()) * indicator[offset + old_idx].value();
+                ctx.assign_region(
+                    vec![
+                        Witness(inv_prod_val),
+                        Existing(indicator[offset + old_idx]),
+                        Existing(*bit),
+                        Existing(indicator[offset + old_idx]),
+                    ],
+                    vec![0],
+                );
+                indicator.push(ctx.get(-4));
 
-        let basic_gate = self.basic_gates[phase]
-            .get(gate_index)
-            .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}"));
-        let column = basic_gate.value;
-        let mut out = None;
-        for (i, input) in inputs.enumerate() {
-            out = Some(ctx.assign_cell(
-                input,
-                column,
-                #[cfg(feature = "display")]
-                self.context_id,
-                row_offset + i,
-                #[cfg(feature = "halo2-pse")]
-                (phase as u8),
-            ));
+                let prod = self.mul(ctx, Existing(indicator[offset + old_idx]), Existing(*bit));
+                indicator.push(prod);
+            }
+            offset += 1 << idx;
         }
+        indicator.split_off((1 << k) - 2)
+    }
 
-        for (i, q_coeff) in gate_offsets.into_iter() {
-            basic_gate
-                .q_enable
-                .enable(&mut ctx.region, (row_offset as isize + i) as usize)
-                .expect("selector enable should not fail");
-
-            if self.strategy == GateStrategy::PlonkPlus {
-                let q_coeff = q_coeff.unwrap_or([F::one(), F::zero(), F::zero()]);
-                for (j, q_coeff) in q_coeff.into_iter().enumerate() {
-                    #[cfg(feature = "halo2-axiom")]
-                    {
-                        ctx.region.assign_fixed(
-                            basic_gate.q_enable_plus[0],
-                            ((row_offset as isize) + i) as usize + j,
-                            Assigned::Trivial(q_coeff),
-                        );
-                    }
-                    #[cfg(feature = "halo2-pse")]
-                    {
-                        ctx.region
-                            .assign_fixed(
-                                || "",
-                                basic_gate.q_enable_plus[0],
-                                ((row_offset as isize) + i) as usize + j,
-                                || Value::known(q_coeff),
-                            )
-                            .unwrap();
-                    }
-                }
+    // returns vec with vec.len() == len such that:
+    //     vec[i] == 1{i == idx}
+    fn idx_to_indicator(
+        &self,
+        ctx: &mut Context<F>,
+        idx: impl Into<QuantumCell<F>>,
+        len: usize,
+    ) -> Vec<AssignedValue<F>> {
+        let mut idx = idx.into();
+        let mut ind = Vec::with_capacity(len);
+        let idx_val = idx.value().get_lower_32() as usize;
+        for i in 0..len {
+            // check ind[i] * (i - idx) == 0
+            let ind_val = F::from(idx_val == i);
+            let val = if idx_val == i { *idx.value() } else { F::zero() };
+            ctx.assign_region_smart(
+                vec![
+                    Constant(F::zero()),
+                    Witness(ind_val),
+                    idx,
+                    Witness(val),
+                    Constant(-F::from(i as u64)),
+                    Witness(ind_val),
+                    Constant(F::zero()),
+                ],
+                vec![0, 3],
+                vec![(1, 5)],
+                vec![],
+            );
+            ind.push(ctx.get(-2));
+            // need to use assigned idx after i > 0 so equality constraint holds
+            if i == 0 {
+                idx = Existing(ctx.get(-5));
             }
         }
+        ind
+    }
+
+    // performs inner product on a, indicator
+    // `indicator` values are all boolean
+    /// Assumes for witness generation that only one element of `indicator` has non-zero value and that value is `F::one()`.
+    fn select_by_indicator<Q>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = Q>,
+        indicator: impl IntoIterator<Item = AssignedValue<F>>,
+    ) -> AssignedValue<F>
+    where
+        Q: Into<QuantumCell<F>>,
+    {
+        let mut sum = F::zero();
+        let a = a.into_iter();
+        let (len, hi) = a.size_hint();
+        debug_assert_eq!(Some(len), hi);
 
-        ctx.advice_alloc[self.context_id].1 += len;
+        let cells = std::iter::once(Constant(F::zero())).chain(
+            a.zip(indicator.into_iter()).flat_map(|(a, ind)| {
+                let a = a.into();
+                sum = if ind.value().is_zero_vartime() { sum } else { *a.value() };
+                [a, Existing(ind), Witness(sum)]
+            }),
+        );
+        ctx.assign_region_last(cells, (0..len).map(|i| 3 * i as isize))
+    }
 
-        #[cfg(feature = "display")]
-        {
-            ctx.total_advice += len;
-        }
+    fn select_from_idx<Q>(
+        &self,
+        ctx: &mut Context<F>,
+        cells: impl IntoIterator<Item = Q>,
+        idx: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F>
+    where
+        Q: Into<QuantumCell<F>>,
+    {
+        let cells = cells.into_iter();
+        let (len, hi) = cells.size_hint();
+        debug_assert_eq!(Some(len), hi);
 
-        out.unwrap()
+        let ind = self.idx_to_indicator(ctx, idx, len);
+        self.select_by_indicator(ctx, cells, ind)
     }
 
-    // Takes two vectors of `QuantumCell` and constrains a witness output to the inner product of `<vec_a, vec_b>`
-    // outputs are (assignments except last, out_cell)
-    // Currently the only places `assignments` is used are: `num_to_bits, range_check, carry_mod, check_carry_mod_to_zero`
-    fn inner_product<'a, 'b: 'a>(
+    // | out | a | inv | 1 | 0 | a | out | 0
+    fn is_zero(&self, ctx: &mut Context<F>, a: AssignedValue<F>) -> AssignedValue<F> {
+        let x = a.value();
+        let (is_zero, inv) = if x.is_zero_vartime() {
+            (F::one(), Assigned::Trivial(F::one()))
+        } else {
+            (F::zero(), Assigned::Rational(F::one(), *x))
+        };
+
+        let cells = vec![
+            Witness(is_zero),
+            Existing(a),
+            WitnessFraction(inv),
+            Constant(F::one()),
+            Constant(F::zero()),
+            Existing(a),
+            Witness(is_zero),
+            Constant(F::zero()),
+        ];
+        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6)], []);
+        ctx.get(-2)
+    }
+
+    fn is_equal(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> AssignedValue<'b, F> {
-        // we will do special handling of the cases where one of the vectors is all constants
-        match self.strategy {
-            GateStrategy::PlonkPlus => {
-                let (_, out) = self.inner_product_with_assignments(ctx, a, b);
-                out
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let diff = self.sub(ctx, a, b);
+        self.is_zero(ctx, diff)
+    }
+
+    /// returns little-endian bit vectors
+    fn num_to_bits(
+        &self,
+        ctx: &mut Context<F>,
+        a: AssignedValue<F>,
+        range_bits: usize,
+    ) -> Vec<AssignedValue<F>>;
+
+    /// given pairs `coords[i] = (x_i, y_i)`, let `f` be the unique degree `len(coords)` polynomial such that `f(x_i) = y_i` for all `i`.
+    ///
+    /// input: coords, x
+    ///
+    /// output: (f(x), Prod_i (x - x_i))
+    ///
+    /// constrains all x_i and x are distinct
+    fn lagrange_and_eval(
+        &self,
+        ctx: &mut Context<F>,
+        coords: &[(AssignedValue<F>, AssignedValue<F>)],
+        x: AssignedValue<F>,
+    ) -> (AssignedValue<F>, AssignedValue<F>) {
+        let mut z = self.sub(ctx, Existing(x), Existing(coords[0].0));
+        for coord in coords.iter().skip(1) {
+            let sub = self.sub(ctx, Existing(x), Existing(coord.0));
+            z = self.mul(ctx, Existing(z), Existing(sub));
+        }
+        let mut eval = None;
+        for i in 0..coords.len() {
+            // compute (x - x_i) * Prod_{j != i} (x_i - x_j)
+            let mut denom = self.sub(ctx, Existing(x), Existing(coords[i].0));
+            for j in 0..coords.len() {
+                if i == j {
+                    continue;
+                }
+                let sub = self.sub(ctx, coords[i].0, coords[j].0);
+                denom = self.mul(ctx, denom, sub);
             }
-            _ => self.inner_product_simple(ctx, a, b),
+            // TODO: batch inversion
+            let is_zero = self.is_zero(ctx, denom);
+            self.assert_is_const(ctx, &is_zero, &F::zero());
+
+            // y_i / denom
+            let quot = self.div_unsafe(ctx, coords[i].1, denom);
+            eval = if let Some(eval) = eval {
+                let eval = self.add(ctx, eval, quot);
+                Some(eval)
+            } else {
+                Some(quot)
+            };
+        }
+        let out = self.mul(ctx, eval.unwrap(), z);
+        (out, z)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct GateChip<F: ScalarField> {
+    strategy: GateStrategy,
+    pub pow_of_two: Vec<F>,
+    /// To avoid Montgomery conversion in `F::from` for common small numbers, we keep a cache of field elements
+    pub field_element_cache: Vec<F>,
+}
+
+impl<F: ScalarField> Default for GateChip<F> {
+    fn default() -> Self {
+        Self::new(GateStrategy::Vertical)
+    }
+}
+
+impl<F: ScalarField> GateChip<F> {
+    pub fn new(strategy: GateStrategy) -> Self {
+        let mut pow_of_two = Vec::with_capacity(F::NUM_BITS as usize);
+        let two = F::from(2);
+        pow_of_two.push(F::one());
+        pow_of_two.push(two);
+        for _ in 2..F::NUM_BITS {
+            pow_of_two.push(two * pow_of_two.last().unwrap());
         }
+        let field_element_cache = (0..1024).map(|i| F::from(i)).collect();
+
+        Self { strategy, pow_of_two, field_element_cache }
     }
 
-    fn inner_product_with_sums<'a, 'b: 'a>(
+    fn inner_product_simple<QA>(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> Box<dyn Iterator<Item = AssignedValue<'b, F>> + 'b> {
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> bool
+    where
+        QA: Into<QuantumCell<F>>,
+    {
+        let mut sum;
+        let mut a = a.into_iter();
         let mut b = b.into_iter().peekable();
-        let flag = matches!(b.peek(), Some(&Constant(c)) if c == F::one());
-        let (assignments_without_last, last) =
-            self.inner_product_simple_with_assignments(ctx, a, b);
-        if flag {
-            Box::new(assignments_without_last.into_iter().step_by(3).chain(once(last)))
+
+        let b_starts_with_one = matches!(b.peek(), Some(Constant(c)) if c == &F::one());
+        let cells = if b_starts_with_one {
+            b.next();
+            let start_a = a.next().unwrap().into();
+            sum = *start_a.value();
+            iter::once(start_a)
         } else {
-            // in this case the first assignment is 0 so we skip it
-            Box::new(assignments_without_last.into_iter().step_by(3).skip(1).chain(once(last)))
+            sum = F::zero();
+            iter::once(Constant(F::zero()))
         }
+        .chain(a.zip(b).flat_map(|(a, b)| {
+            let a = a.into();
+            sum += *a.value() * b.value();
+            [a, b, Witness(sum)]
+        }));
+
+        let gate_offsets = if ctx.witness_gen_only() {
+            vec![]
+        } else {
+            let (lo, hi) = cells.size_hint();
+            debug_assert_eq!(Some(lo), hi);
+            let len = lo / 3;
+            (0..len).map(|i| 3 * i as isize).collect()
+        };
+        ctx.assign_region(cells, gate_offsets);
+        b_starts_with_one
     }
+}
 
-    fn inner_product_left<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        a_assigned: &mut Vec<AssignedValue<'b, F>>,
-    ) -> AssignedValue<'b, F> {
-        match self.strategy {
-            GateStrategy::PlonkPlus => {
-                let a = a.into_iter();
-                let (len, _) = a.size_hint();
-                let (assignments, acc) = self.inner_product_with_assignments(ctx, a, b);
-                let mut assignments = assignments.into_iter();
-                a_assigned.clear();
-                assert!(a_assigned.capacity() >= len);
-                a_assigned.extend(
-                    iter::once(assignments.next().unwrap())
-                        .chain(
-                            assignments
-                                .chunks(3)
-                                .into_iter()
-                                .flat_map(|chunk| chunk.into_iter().take(2)),
-                        )
-                        .take(len),
-                );
-                acc
-            }
-            _ => {
-                let mut a = a.into_iter();
-                let mut b = b.into_iter().peekable();
-                let (len, hi) = b.size_hint();
-                debug_assert_eq!(Some(len), hi);
-                // we do not use `assign_region` and implement directly to avoid `collect`ing the vector of assignments
-                let phase = ctx.current_phase();
-                assert!(self.context_id < ctx.advice_alloc.len(), "context id out of bounds");
-
-                let (gate_index, mut row_offset) = {
-                    let alloc = ctx.advice_alloc.get_mut(self.context_id).unwrap();
-                    if alloc.1 + 3 * len + 1 >= ctx.max_rows {
-                        alloc.1 = 0;
-                        alloc.0 += 1;
-                    }
-                    *alloc
-                };
-                let basic_gate = self.basic_gates[phase]
-                    .get(gate_index)
-                    .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}"));
-                let column = basic_gate.value;
-                let q_enable = basic_gate.q_enable;
-
-                let mut right_one = false;
-                let start = ctx.assign_cell(
-                    if matches!(b.peek(), Some(&Constant(x)) if x == F::one()) {
-                        right_one = true;
-                        b.next();
-                        a.next().unwrap()
-                    } else {
-                        Constant(F::zero())
-                    },
-                    column,
-                    #[cfg(feature = "display")]
-                    self.context_id,
-                    row_offset,
-                    #[cfg(feature = "halo2-pse")]
-                    (phase as u8),
-                );
+impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
+    fn strategy(&self) -> GateStrategy {
+        self.strategy
+    }
+    fn pow_of_two(&self) -> &[F] {
+        &self.pow_of_two
+    }
+    fn get_field_element(&self, n: u64) -> F {
+        let get = self.field_element_cache.get(n as usize);
+        if let Some(fe) = get {
+            *fe
+        } else {
+            F::from(n)
+        }
+    }
 
-                row_offset += 1;
-                let mut acc = start.value().copied();
-                a_assigned.clear();
-                assert!(a_assigned.capacity() >= len);
-                if right_one {
-                    a_assigned.push(start);
-                }
-                let mut last = None;
-
-                for (a, b) in a.zip(b) {
-                    q_enable
-                        .enable(&mut ctx.region, row_offset - 1)
-                        .expect("enable selector should not fail");
-
-                    acc = acc + a.value().zip(b.value()).map(|(a, b)| *a * b);
-                    let [a, _, c] = [(a, 0), (b, 1), (Witness(acc), 2)].map(|(qcell, idx)| {
-                        ctx.assign_cell(
-                            qcell,
-                            column,
-                            #[cfg(feature = "display")]
-                            self.context_id,
-                            row_offset + idx,
-                            #[cfg(feature = "halo2-pse")]
-                            (phase as u8),
-                        )
-                    });
-                    last = Some(c);
-                    row_offset += 3;
-                    a_assigned.push(a);
-                }
-                ctx.advice_alloc[self.context_id].1 = row_offset;
+    fn inner_product<QA>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> AssignedValue<F>
+    where
+        QA: Into<QuantumCell<F>>,
+    {
+        self.inner_product_simple(ctx, a, b);
+        ctx.last().unwrap()
+    }
 
-                #[cfg(feature = "display")]
-                {
-                    ctx.total_advice += 3 * (len - usize::from(right_one)) + 1;
-                }
-                last.unwrap_or_else(|| a_assigned[0].clone())
+    /// Returns the inner product of `<a, b>` and the last item of `a` after it is assigned
+    fn inner_product_left_last<QA>(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> (AssignedValue<F>, AssignedValue<F>)
+    where
+        QA: Into<QuantumCell<F>>,
+    {
+        let a = a.into_iter();
+        let (len, hi) = a.size_hint();
+        debug_assert_eq!(Some(len), hi);
+        let row_offset = ctx.advice.len();
+        let b_starts_with_one = self.inner_product_simple(ctx, a, b);
+        let a_last = if b_starts_with_one {
+            if len == 1 {
+                ctx.get(row_offset as isize)
+            } else {
+                ctx.get((row_offset + 1 + 3 * (len - 2)) as isize)
             }
+        } else {
+            ctx.get((row_offset + 1 + 3 * (len - 1)) as isize)
+        };
+        (ctx.last().unwrap(), a_last)
+    }
+
+    /// Returns a vector with the partial sums `sum_{j=0..=i} a[j] * b[j]`.
+    fn inner_product_with_sums<'thread, QA>(
+        &self,
+        ctx: &'thread mut Context<F>,
+        a: impl IntoIterator<Item = QA>,
+        b: impl IntoIterator<Item = QuantumCell<F>>,
+    ) -> Box<dyn Iterator<Item = AssignedValue<F>> + 'thread>
+    where
+        QA: Into<QuantumCell<F>>,
+    {
+        let row_offset = ctx.advice.len();
+        let b_starts_with_one = self.inner_product_simple(ctx, a, b);
+        if b_starts_with_one {
+            Box::new((row_offset..ctx.advice.len()).step_by(3).map(|i| ctx.get(i as isize)))
+        } else {
+            // in this case the first assignment is 0 so we skip it
+            Box::new((row_offset..ctx.advice.len()).step_by(3).skip(1).map(|i| ctx.get(i as isize)))
         }
     }
 
-    fn sum_products_with_coeff_and_var<'a, 'b: 'a>(
+    fn sum_products_with_coeff_and_var(
         &self,
-        ctx: &mut Context<'_, F>,
-        values: impl IntoIterator<Item = (F, QuantumCell<'a, 'b, F>, QuantumCell<'a, 'b, F>)>,
-        var: QuantumCell<'a, 'b, F>,
-    ) -> AssignedValue<'b, F> {
+        ctx: &mut Context<F>,
+        values: impl IntoIterator<Item = (F, QuantumCell<F>, QuantumCell<F>)>,
+        var: QuantumCell<F>,
+    ) -> AssignedValue<F> {
         // TODO: optimize
         match self.strategy {
-            GateStrategy::PlonkPlus => {
-                let mut cells = Vec::new();
-                let mut gate_offsets = Vec::new();
-                let mut acc = var.value().copied();
-                cells.push(var);
-                for (i, (c, a, b)) in values.into_iter().enumerate() {
-                    acc = acc + Value::known(c) * a.value() * b.value();
-                    cells.append(&mut vec![a, b, Witness(acc)]);
-                    gate_offsets.push((3 * i as isize, Some([c, F::zero(), F::zero()])));
-                }
-                self.assign_region_last(ctx, cells, gate_offsets)
-            }
             GateStrategy::Vertical => {
                 let (a, b): (Vec<_>, Vec<_>) = std::iter::once((var, Constant(F::one())))
                     .chain(values.into_iter().filter_map(|(c, va, vb)| {
@@ -691,7 +818,7 @@ impl<F: ScalarField> GateInstructions<F> for FlexGateConfig<F> {
                             Some((va, vb))
                         } else if c != F::zero() {
                             let prod = self.mul(ctx, va, vb);
-                            Some((QuantumCell::ExistingOwned(prod), Constant(c)))
+                            Some((QuantumCell::Existing(prod), Constant(c)))
                         } else {
                             None
                         }
@@ -702,17 +829,17 @@ impl<F: ScalarField> GateInstructions<F> for FlexGateConfig<F> {
         }
     }
 
-    /// assumes sel is boolean
-    /// returns
-    ///   a * sel + b * (1 - sel)
-    fn select<'v>(
+    fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-        sel: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let diff_val: Value<F> = a.value().zip(b.value()).map(|(a, b)| *a - b);
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        sel: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let sel = sel.into();
+        let diff_val = *a.value() - b.value();
         let out_val = diff_val * sel.value() + b.value();
         match self.strategy {
             // | a - b | 1 | b | a |
@@ -721,53 +848,34 @@ impl<F: ScalarField> GateInstructions<F> for FlexGateConfig<F> {
                 let cells = vec![
                     Witness(diff_val),
                     Constant(F::one()),
-                    b.clone(),
+                    b,
                     a,
                     b,
                     sel,
                     Witness(diff_val),
                     Witness(out_val),
                 ];
-                let mut assigned_cells =
-                    self.assign_region_smart(ctx, cells, vec![0, 4], vec![(0, 6), (2, 4)], vec![]);
-                assigned_cells.pop().unwrap()
-            }
-            // | 0 | a | a - b | b | sel | a - b | out |
-            // selectors
-            // | 1 | 0 | 0     | 1 | 0   | 0
-            // | 0 | 1 | -1    | 1 | 0   | 0
-            GateStrategy::PlonkPlus => {
-                let mut assignments = self.assign_region(
-                    ctx,
-                    vec![
-                        Constant(F::zero()),
-                        a,
-                        Witness(diff_val),
-                        b,
-                        sel,
-                        Witness(diff_val),
-                        Witness(out_val),
-                    ],
-                    vec![(0, Some([F::zero(), F::one(), -F::one()])), (3, None)],
-                );
-                ctx.region.constrain_equal(assignments[2].cell(), assignments[5].cell());
-                assignments.pop().unwrap()
+                ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], []);
+                ctx.last().unwrap()
             }
         }
     }
 
     /// returns: a || (b && c)
     // | 1 - b c | b | c | 1 | a - 1 | 1 - b c | out | a - 1 | 1 | 1 | a |
-    fn or_and<'v>(
+    fn or_and(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-        c: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let bc_val = b.value().zip(c.value()).map(|(b, c)| *b * c);
-        let not_bc_val = bc_val.map(|x| F::one() - x);
-        let not_a_val = a.value().map(|x| *x - F::one());
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        c: impl Into<QuantumCell<F>>,
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+        let c = c.into();
+        let bc_val = *b.value() * c.value();
+        let not_bc_val = F::one() - bc_val;
+        let not_a_val = *a.value() - F::one();
         let out_val = bc_val + a.value() - bc_val * a.value();
         let cells = vec![
             Witness(not_bc_val),
@@ -782,53 +890,41 @@ impl<F: ScalarField> GateInstructions<F> for FlexGateConfig<F> {
             Constant(F::one()),
             a,
         ];
-        let assigned_cells =
-            self.assign_region_smart(ctx, cells, vec![0, 3, 7], vec![(4, 7), (0, 5)], vec![]);
-        assigned_cells.into_iter().nth(6).unwrap()
+        ctx.assign_region_smart(cells, vec![0, 3, 7], vec![(4, 7), (0, 5)], []);
+        ctx.get(-5)
     }
 
     // returns little-endian bit vectors
-    fn num_to_bits<'v>(
+    fn num_to_bits(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &AssignedValue<'v, F>,
+        ctx: &mut Context<F>,
+        a: AssignedValue<F>,
         range_bits: usize,
-    ) -> Vec<AssignedValue<'v, F>> {
-        let bits = a
-            .value()
-            .map(|a| {
-                a.to_repr()
-                    .as_ref()
-                    .iter()
-                    .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
-                    .take(range_bits)
-                    .into_iter()
-                    .map(|x| F::from(x))
-                    .collect::<Vec<_>>()
-            })
-            .transpose_vec(range_bits);
+    ) -> Vec<AssignedValue<F>> {
+        let a_bytes = a.value().to_repr();
+        let bits = a_bytes
+            .as_ref()
+            .iter()
+            .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
+            .take(range_bits)
+            .map(|x| F::from(x));
 
         let mut bit_cells = Vec::with_capacity(range_bits);
-
-        let acc = self.inner_product_left(
+        let row_offset = ctx.advice.len();
+        let acc = self.inner_product(
             ctx,
-            bits.into_iter().map(|x| Witness(x)),
+            bits.map(Witness),
             self.pow_of_two[..range_bits].iter().map(|c| Constant(*c)),
-            &mut bit_cells,
         );
-        ctx.region.constrain_equal(a.cell(), acc.cell());
+        ctx.constrain_equal(&a, &acc);
+        debug_assert!(range_bits > 0);
+        bit_cells.push(ctx.get(row_offset as isize));
+        for i in 1..range_bits {
+            bit_cells.push(ctx.get((row_offset + 1 + 3 * (i - 2)) as isize));
+        }
 
         for bit_cell in &bit_cells {
-            self.assign_region(
-                ctx,
-                vec![
-                    Constant(F::zero()),
-                    Existing(bit_cell),
-                    Existing(bit_cell),
-                    Existing(bit_cell),
-                ],
-                vec![(0, None)],
-            );
+            self.assert_bit(ctx, *bit_cell);
         }
         bit_cells
     }
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index 52706772..bfa89593 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -1,664 +1,312 @@
-use self::{flex_gate::GateStrategy, range::RangeStrategy};
+use self::flex_gate::{FlexGateConfig, GateStrategy, MAX_PHASE};
 use super::{
     utils::ScalarField,
     AssignedValue, Context,
-    QuantumCell::{self, Constant, Existing, ExistingOwned, Witness, WitnessFraction},
+    QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
 };
 use crate::{
-    halo2_proofs::{circuit::Value, plonk::Assigned},
+    halo2_proofs::{
+        circuit::{Layouter, Region, SimpleFloorPlanner, Value},
+        plonk::{Advice, Circuit, Column, ConstraintSystem, Error},
+    },
     utils::{biguint_to_fe, bit_length, fe_to_biguint, PrimeField},
+    ContextCell,
 };
 use core::iter;
 use num_bigint::BigUint;
 use num_integer::Integer;
 use num_traits::{One, Zero};
-use std::ops::Shl;
+use serde::{Deserialize, Serialize};
+use std::{collections::HashMap, ops::Shl, rc::Rc};
 
 pub mod flex_gate;
-pub mod range;
-
-pub trait GateInstructions<F: ScalarField> {
-    fn strategy(&self) -> GateStrategy;
-    fn context_id(&self) -> usize;
-
-    fn pow_of_two(&self) -> &[F];
-    fn get_field_element(&self, n: u64) -> F;
-
-    fn assign_region<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-    ) -> Vec<AssignedValue<'b, F>> {
-        self.assign_region_in(ctx, inputs, gate_offsets, ctx.current_phase())
-    }
-
-    fn assign_region_in<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-        phase: usize,
-    ) -> Vec<AssignedValue<'b, F>>;
-
-    /// Only returns the last assigned cell
-    ///
-    /// Does not collect the vec, saving heap allocation
-    fn assign_region_last<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-    ) -> AssignedValue<'b, F> {
-        self.assign_region_last_in(ctx, inputs, gate_offsets, ctx.current_phase())
-    }
-
-    fn assign_region_last_in<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = (isize, Option<[F; 3]>)>,
-        phase: usize,
-    ) -> AssignedValue<'b, F>;
-
-    /// Only call this if ctx.region is not in shape mode, i.e., if not using simple layouter or ctx.first_pass = false
-    ///
-    /// All indices in `gate_offsets`, `equality_offsets`, `external_equality` are with respect to `inputs` indices
-    /// - `gate_offsets` specifies indices to enable selector for the gate; assume `gate_offsets` is sorted in increasing order
-    /// - `equality_offsets` specifies pairs of indices to constrain equality
-    /// - `external_equality` specifies an existing cell to constrain equality with the cell at a certain index
-    fn assign_region_smart<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        inputs: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        gate_offsets: impl IntoIterator<Item = usize>,
-        equality_offsets: impl IntoIterator<Item = (usize, usize)>,
-        external_equality: Vec<(&AssignedValue<F>, usize)>,
-    ) -> Vec<AssignedValue<'b, F>> {
-        let assignments =
-            self.assign_region(ctx, inputs, gate_offsets.into_iter().map(|i| (i as isize, None)));
-        for (offset1, offset2) in equality_offsets.into_iter() {
-            ctx.region.constrain_equal(assignments[offset1].cell(), assignments[offset2].cell());
-        }
-        for (assigned, eq_offset) in external_equality.into_iter() {
-            ctx.region.constrain_equal(assigned.cell(), assignments[eq_offset].cell());
-        }
-        assignments
-    }
-
-    fn assign_witnesses<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        witnesses: impl IntoIterator<Item = Value<F>>,
-    ) -> Vec<AssignedValue<'v, F>> {
-        self.assign_region(ctx, witnesses.into_iter().map(Witness), [])
-    }
-
-    fn load_witness<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        witness: Value<F>,
-    ) -> AssignedValue<'v, F> {
-        self.assign_region_last(ctx, [Witness(witness)], [])
-    }
-
-    fn load_constant<'a>(&self, ctx: &mut Context<'_, F>, c: F) -> AssignedValue<'a, F> {
-        self.assign_region_last(ctx, [Constant(c)], [])
-    }
-
-    fn load_zero<'a>(&self, ctx: &mut Context<'a, F>) -> AssignedValue<'a, F> {
-        if let Some(zcell) = &ctx.zero_cell {
-            return zcell.clone();
-        }
-        let zero_cell = self.assign_region_last(ctx, [Constant(F::zero())], []);
-        ctx.zero_cell = Some(zero_cell.clone());
-        zero_cell
-    }
-
-    /// Copies a, b and constrains `a + b * 1 = out`
-    // | a | b | 1 | a + b |
-    fn add<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let out_val = a.value().zip(b.value()).map(|(a, b)| *a + b);
-        self.assign_region_last(
-            ctx,
-            vec![a, b, Constant(F::one()), Witness(out_val)],
-            vec![(0, None)],
-        )
-    }
-
-    /// Copies a, b and constrains `a + b * (-1) = out`
-    // | a - b | b | 1 | a |
-    fn sub<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let out_val = a.value().zip(b.value()).map(|(a, b)| *a - b);
-        // slightly better to not have to compute -F::one() since F::one() is cached
-        let assigned_cells = self.assign_region(
-            ctx,
-            vec![Witness(out_val), b, Constant(F::one()), a],
-            vec![(0, None)],
-        );
-        assigned_cells.into_iter().next().unwrap()
-    }
-
-    // | a | -a | 1 | 0 |
-    fn neg<'v>(&self, ctx: &mut Context<'_, F>, a: QuantumCell<'_, 'v, F>) -> AssignedValue<'v, F> {
-        let out_val = a.value().map(|v| -*v);
-        let assigned_cells = self.assign_region(
-            ctx,
-            vec![a, Witness(out_val), Constant(F::one()), Constant(F::zero())],
-            vec![(0, None)],
-        );
-        assigned_cells.into_iter().nth(1).unwrap()
-    }
-
-    /// Copies a, b and constrains `0 + a * b = out`
-    // | 0 | a | b | a * b |
-    fn mul<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let out_val = a.value().zip(b.value()).map(|(a, b)| *a * b);
-        self.assign_region_last(
-            ctx,
-            vec![Constant(F::zero()), a, b, Witness(out_val)],
-            vec![(0, None)],
-        )
-    }
-
-    /// a * b + c
-    fn mul_add<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-        c: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let out_val = a.value().zip(b.value()).map(|(a, b)| *a * b) + c.value();
-        self.assign_region_last(ctx, vec![c, a, b, Witness(out_val)], vec![(0, None)])
-    }
-
-    /// (1 - a) * b = b - a * b
-    fn mul_not<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let out_val = a.value().zip(b.value()).map(|(a, b)| (F::one() - a) * b);
-        let assignments =
-            self.assign_region(ctx, vec![Witness(out_val), a, b.clone(), b], vec![(0, None)]);
-        ctx.region.constrain_equal(assignments[2].cell(), assignments[3].cell());
-        assignments.into_iter().next().unwrap()
-    }
-
-    /// Constrain x is 0 or 1.
-    fn assert_bit(&self, ctx: &mut Context<'_, F>, x: &AssignedValue<F>) {
-        self.assign_region_last(
-            ctx,
-            [Constant(F::zero()), Existing(x), Existing(x), Existing(x)],
-            [(0, None)],
-        );
-    }
-
-    fn div_unsafe<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        // TODO: if really necessary, make `c` of type `Assigned<F>`
-        // this would require the API using `Assigned<F>` instead of `F` everywhere, so leave as last resort
-        let c = a.value().zip(b.value()).map(|(a, b)| b.invert().unwrap() * a);
-        let assignments =
-            self.assign_region(ctx, vec![Constant(F::zero()), Witness(c), b, a], vec![(0, None)]);
-        assignments.into_iter().nth(1).unwrap()
-    }
-
-    fn assert_equal(&self, ctx: &mut Context<'_, F>, a: QuantumCell<F>, b: QuantumCell<F>) {
-        if let (Existing(a), Existing(b)) = (&a, &b) {
-            ctx.region.constrain_equal(a.cell(), b.cell());
-        } else {
-            self.assign_region_smart(
-                ctx,
-                vec![Constant(F::zero()), a, Constant(F::one()), b],
-                vec![0],
-                vec![],
-                vec![],
-            );
-        }
-    }
-
-    fn assert_is_const(&self, ctx: &mut Context<'_, F>, a: &AssignedValue<F>, constant: F) {
-        let c_cell = ctx.assign_fixed(constant);
-        #[cfg(feature = "halo2-axiom")]
-        ctx.region.constrain_equal(a.cell(), &c_cell);
-        #[cfg(feature = "halo2-pse")]
-        ctx.region.constrain_equal(a.cell(), c_cell).unwrap();
-    }
-
-    /// Returns `(assignments, output)` where `output` is the inner product of `<a, b>`
-    ///
-    /// `assignments` is for internal use
-    fn inner_product<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> AssignedValue<'b, F>;
-
-    /// very specialized for optimal range check, not for general consumption
-    /// - `a_assigned` is expected to have capacity a.len()
-    /// - we re-use `a_assigned` to save memory allocation
-    fn inner_product_left<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        a_assigned: &mut Vec<AssignedValue<'b, F>>,
-    ) -> AssignedValue<'b, F>;
-
-    /// Returns an iterator with the partial sums `sum_{j=0..=i} a[j] * b[j]`.
-    fn inner_product_with_sums<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> Box<dyn Iterator<Item = AssignedValue<'b, F>> + 'b>;
-
-    fn sum<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'b, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> AssignedValue<'b, F> {
-        let mut a = a.into_iter().peekable();
-        let start = a.next();
-        if start.is_none() {
-            return self.load_zero(ctx);
-        }
-        let start = start.unwrap();
-        if a.peek().is_none() {
-            return self.assign_region_last(ctx, [start], []);
-        }
-        let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
-
-        let mut sum = start.value().copied();
-        let cells = iter::once(start).chain(a.flat_map(|a| {
-            sum = sum + a.value();
-            [a, Constant(F::one()), Witness(sum)]
-        }));
-        self.assign_region_last(ctx, cells, (0..len).map(|i| (3 * i as isize, None)))
-    }
-
-    /// Returns the assignment trace where `output[3 * i]` has the running sum `sum_{j=0..=i} a[j]`
-    fn sum_with_assignments<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'b, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-    ) -> Vec<AssignedValue<'b, F>> {
-        let mut a = a.into_iter().peekable();
-        let start = a.next();
-        if start.is_none() {
-            return vec![self.load_zero(ctx)];
-        }
-        let start = start.unwrap();
-        if a.peek().is_none() {
-            return self.assign_region(ctx, [start], []);
-        }
-        let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
-
-        let mut sum = start.value().copied();
-        let cells = iter::once(start).chain(a.flat_map(|a| {
-            sum = sum + a.value();
-            [a, Constant(F::one()), Witness(sum)]
-        }));
-        self.assign_region(ctx, cells, (0..len).map(|i| (3 * i as isize, None)))
-    }
-
-    // requires b.len() == a.len() + 1
-    // returns
-    // x_i = b_1 * (a_1...a_{i - 1})
-    //     + b_2 * (a_2...a_{i - 1})
-    //     + ...
-    //     + b_i
-    // Returns [x_1, ..., x_{b.len()}]
-    fn accumulated_product<'a, 'v: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'v, F>>,
-        b: impl IntoIterator<Item = QuantumCell<'a, 'v, F>>,
-    ) -> Vec<AssignedValue<'v, F>> {
-        let mut b = b.into_iter();
-        let mut a = a.into_iter();
-        let b_first = b.next();
-        if let Some(b_first) = b_first {
-            let b_first = self.assign_region_last(ctx, [b_first], []);
-            std::iter::successors(Some(b_first), |x| {
-                a.next().zip(b.next()).map(|(a, b)| self.mul_add(ctx, Existing(x), a, b))
-            })
-            .collect()
-        } else {
-            vec![]
-        }
-    }
-
-    fn sum_products_with_coeff_and_var<'a, 'b: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        values: impl IntoIterator<Item = (F, QuantumCell<'a, 'b, F>, QuantumCell<'a, 'b, F>)>,
-        var: QuantumCell<'a, 'b, F>,
-    ) -> AssignedValue<'b, F>;
-
-    // | 1 - b | 1 | b | 1 | b | a | 1 - b | out |
-    fn or<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let not_b_val = b.value().map(|x| F::one() - x);
-        let out_val = a.value().zip(b.value()).map(|(a, b)| *a + b)
-            - a.value().zip(b.value()).map(|(a, b)| *a * b);
-        let cells = vec![
-            Witness(not_b_val),
-            Constant(F::one()),
-            b.clone(),
-            Constant(F::one()),
-            b,
-            a,
-            Witness(not_b_val),
-            Witness(out_val),
-        ];
-        let mut assigned_cells =
-            self.assign_region_smart(ctx, cells, vec![0, 4], vec![(0, 6), (2, 4)], vec![]);
-        assigned_cells.pop().unwrap()
-    }
-
-    // | 0 | a | b | out |
-    fn and<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        self.mul(ctx, a, b)
-    }
-
-    fn not<'v>(&self, ctx: &mut Context<'_, F>, a: QuantumCell<'_, 'v, F>) -> AssignedValue<'v, F> {
-        self.sub(ctx, Constant(F::one()), a)
-    }
-
-    fn select<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-        sel: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F>;
+// pub mod range;
+
+type ThreadBreakPoints = Vec<usize>;
+type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
+
+#[derive(Clone, Debug, Default)]
+pub struct GateThreadBuilder<F: ScalarField> {
+    /// Threads for each challenge phase
+    pub threads: [Vec<Context<F>>; MAX_PHASE],
+    thread_count: usize,
+    witness_gen_only: bool,
+    use_unknown: bool,
+}
 
-    fn or_and<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-        c: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F>;
-
-    /// assume bits has boolean values
-    /// returns vec[idx] with vec[idx] = 1 if and only if bits == idx as a binary number
-    fn bits_to_indicator<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        bits: &[AssignedValue<'v, F>],
-    ) -> Vec<AssignedValue<'v, F>> {
-        let k = bits.len();
-
-        let (inv_last_bit, last_bit) = {
-            let mut assignments = self
-                .assign_region(
-                    ctx,
-                    vec![
-                        Witness(bits[k - 1].value().map(|b| F::one() - b)),
-                        Existing(&bits[k - 1]),
-                        Constant(F::one()),
-                        Constant(F::one()),
-                    ],
-                    vec![(0, None)],
-                )
-                .into_iter();
-            (assignments.next().unwrap(), assignments.next().unwrap())
+impl<F: ScalarField> GateThreadBuilder<F> {
+    pub fn new(witness_gen_only: bool) -> Self {
+        let mut threads = [(); MAX_PHASE].map(|_| vec![]);
+        // start with a main thread in phase 0
+        threads[0].push(Context::new(witness_gen_only, 0));
+        Self { threads, thread_count: 1, witness_gen_only, use_unknown: false }
+    }
+
+    pub fn unknown(self, use_unknown: bool) -> Self {
+        Self { use_unknown, ..self }
+    }
+
+    pub fn main(&mut self, phase: usize) -> &mut Context<F> {
+        self.threads[phase].first_mut().unwrap()
+    }
+
+    pub fn new_thread(&mut self, phase: usize) -> &mut Context<F> {
+        let thread_id = self.thread_count;
+        self.thread_count += 1;
+        self.threads[phase].push(Context::new(self.witness_gen_only, thread_id));
+        self.threads[phase].last_mut().unwrap()
+    }
+
+    /// Auto-calculate configuration parameters for the circuit
+    pub fn config(&self, k: usize) -> FlexGateConfigParams {
+        let max_rows = 1 << k;
+        let total_advice_per_phase = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.advice.len()).sum::<usize>())
+            .collect::<Vec<_>>();
+        // we do a rough estimate by taking ceil(advice_cells_per_phase / 2^k )
+        // if this is too small, manual configuration will be needed
+        let num_advice_per_phase = total_advice_per_phase
+            .iter()
+            .map(|count| (count + max_rows - 1) >> k)
+            .collect::<Vec<_>>();
+
+        let total_lookup_advice_per_phase = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.cells_to_lookup.len()).sum::<usize>())
+            .collect::<Vec<_>>();
+        let num_lookup_advice_per_phase = total_lookup_advice_per_phase
+            .iter()
+            .map(|count| (count + max_rows - 1) >> k)
+            .collect::<Vec<_>>();
+
+        let total_fixed: usize = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.constants.len()).sum::<usize>())
+            .sum();
+        let num_fixed = (total_fixed + max_rows - 1) >> k;
+
+        let params = FlexGateConfigParams {
+            strategy: GateStrategy::Vertical,
+            num_advice_per_phase,
+            num_lookup_advice_per_phase,
+            num_fixed,
+            k,
         };
-        let mut indicator = Vec::with_capacity(2 * (1 << k) - 2);
-        let mut offset = 0;
-        indicator.push(inv_last_bit);
-        indicator.push(last_bit);
-        for (idx, bit) in bits.iter().rev().enumerate().skip(1) {
-            for old_idx in 0..(1 << idx) {
-                let inv_prod_val = indicator[offset + old_idx]
-                    .value()
-                    .zip(bit.value())
-                    .map(|(a, b)| (F::one() - b) * a);
-                let inv_prod = self
-                    .assign_region_smart(
-                        ctx,
-                        vec![
-                            Witness(inv_prod_val),
-                            Existing(&indicator[offset + old_idx]),
-                            Existing(bit),
-                            Existing(&indicator[offset + old_idx]),
-                        ],
-                        vec![0],
-                        vec![],
-                        vec![],
-                    )
-                    .into_iter()
-                    .next()
-                    .unwrap();
-                indicator.push(inv_prod);
-
-                let prod = self.mul(ctx, Existing(&indicator[offset + old_idx]), Existing(bit));
-                indicator.push(prod);
+        #[cfg(feature = "display")]
+        {
+            for phase in 0..MAX_PHASE {
+                if total_advice_per_phase[phase] != 0 || total_lookup_advice_per_phase[phase] != 0 {
+                    println!(
+                        "Gate Chip | Phase {}: {} advice cells , {} lookup advice cells",
+                        phase, total_advice_per_phase[phase], total_lookup_advice_per_phase[phase],
+                    );
+                }
             }
-            offset += 1 << idx;
+            println!("Total {total_fixed} fixed cells");
+            println!("Auto-calculated config params:\n {params:#?}");
         }
-        indicator.split_off((1 << k) - 2)
-    }
-
-    // returns vec with vec.len() == len such that:
-    //     vec[i] == 1{i == idx}
-    fn idx_to_indicator<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        mut idx: QuantumCell<'_, 'v, F>,
-        len: usize,
-    ) -> Vec<AssignedValue<'v, F>> {
-        let ind = self.assign_region(
-            ctx,
-            (0..len).map(|i| {
-                Witness(idx.value().map(|x| {
-                    if x.get_lower_32() == i as u32 {
-                        F::one()
-                    } else {
-                        F::zero()
+        std::env::set_var("FLEX_GATE_CONFIG_PARAMS", serde_json::to_string(&params).unwrap());
+        params
+    }
+
+    /// Assigns all advice and fixed cells, turns on selectors, imposes equality constraints.
+    /// This should only be called during keygen.
+    pub fn assign_all(
+        self,
+        config: &FlexGateConfig<F>,
+        lookup_advice: &[Vec<Column<Advice>>],
+        region: &mut Region<F>,
+    ) -> MultiPhaseThreadBreakPoints {
+        assert!(!self.witness_gen_only);
+        let use_unknown = self.use_unknown;
+        let max_rows = config.max_rows;
+        let mut break_points = vec![];
+        let mut assigned_advices = HashMap::new();
+        let mut assigned_constants = HashMap::new();
+        let mut fixed_col = 0;
+        let mut fixed_offset = 0;
+        for (phase, threads) in self.threads.into_iter().enumerate() {
+            let mut break_point = vec![];
+            let mut gate_index = 0;
+            let mut row_offset = 0;
+            let mut lookup_offset = 0;
+            let mut lookup_col = 0;
+            for ctx in threads {
+                for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
+                {
+                    if (q && row_offset + 4 > max_rows) || row_offset >= max_rows {
+                        break_point.push(row_offset);
+                        row_offset = 0;
+                        gate_index += 1;
                     }
-                }))
-            }),
-            vec![],
-        );
-
-        // check ind[i] * (i - idx) == 0
-        for (i, ind) in ind.iter().enumerate() {
-            let val = ind.value().zip(idx.value()).map(|(ind, idx)| *ind * idx);
-            let assignments = self.assign_region(
-                ctx,
-                vec![
-                    Constant(F::zero()),
-                    Existing(ind),
-                    idx,
-                    Witness(val),
-                    Constant(-F::from(i as u64)),
-                    Existing(ind),
-                    Constant(F::zero()),
-                ],
-                vec![(0, None), (3, None)],
-            );
-            // need to use assigned idx after i > 0 so equality constraint holds
-            idx = ExistingOwned(assignments.into_iter().nth(2).unwrap());
-        }
-        ind
-    }
-
-    // performs inner product on a, indicator
-    // `indicator` values are all boolean
-    /// Assumes for witness generation that only one element of `indicator` has non-zero value and that value is `F::one()`.
-    fn select_by_indicator<'a, 'i, 'b: 'a + 'i>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: impl IntoIterator<Item = QuantumCell<'a, 'b, F>>,
-        indicator: impl IntoIterator<Item = &'i AssignedValue<'b, F>>,
-    ) -> AssignedValue<'b, F> {
-        let mut sum = Value::known(F::zero());
-        let a = a.into_iter();
-        let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
-
-        let cells =
-            std::iter::once(Constant(F::zero())).chain(a.zip(indicator).flat_map(|(a, ind)| {
-                sum = sum.zip(a.value().zip(ind.value())).map(|(sum, (a, ind))| {
-                    if ind.is_zero_vartime() {
-                        sum
-                    } else {
-                        *a
+                    let basic_gate = config.basic_gates[phase]
+                        .get(gate_index)
+                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}"));
+                    let column = basic_gate.value;
+                    let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
+                    #[cfg(feature = "halo2-axiom")]
+                    let cell = *region
+                        .assign_advice(column, row_offset, value)
+                        .expect("assign_advice should not fail")
+                        .cell();
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    let cell = region
+                        .assign_advice(|| "", column, row_offset, || value)
+                        .expect("assign_advice should not fail")
+                        .cell();
+                    assigned_advices.insert((ctx.context_id, i), cell);
+
+                    if q {
+                        basic_gate
+                            .q_enable
+                            .enable(region, row_offset)
+                            .expect("enable selector should not fail");
+                    }
+                    row_offset += 1;
+                }
+                for (c, i) in ctx.constants.into_iter() {
+                    #[cfg(feature = "halo2-axiom")]
+                    let cell = region.assign_fixed(config.constants[fixed_col], fixed_offset, c);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    let cell = region
+                        .assign_fixed(
+                            || "",
+                            config.constants[fixed_col],
+                            fixed_offset,
+                            || Value::known(c),
+                        )
+                        .unwrap()
+                        .cell();
+                    assigned_constants.insert((ctx.context_id, i), cell);
+                    fixed_col += 1;
+                    if fixed_col >= config.constants.len() {
+                        fixed_col = 0;
+                        fixed_offset += 1;
                     }
-                });
-                [a, Existing(ind), Witness(sum)]
-            }));
-        self.assign_region_last(ctx, cells, (0..len).map(|i| (3 * i as isize, None)))
-    }
-
-    fn select_from_idx<'a, 'v: 'a>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        cells: impl IntoIterator<Item = QuantumCell<'a, 'v, F>>,
-        idx: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let cells = cells.into_iter();
-        let (len, hi) = cells.size_hint();
-        debug_assert_eq!(Some(len), hi);
-
-        let ind = self.idx_to_indicator(ctx, idx, len);
-        let out = self.select_by_indicator(ctx, cells, &ind);
-        out
-    }
-
-    // | out | a | inv | 1 | 0 | a | out | 0
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: &AssignedValue<'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let (is_zero, inv) = a
-            .value()
-            .map(|x| {
-                if x.is_zero_vartime() {
-                    (F::one(), Assigned::Trivial(F::one()))
-                } else {
-                    (F::zero(), Assigned::Rational(F::one(), *x))
                 }
-            })
-            .unzip();
 
-        let cells = vec![
-            Witness(is_zero),
-            Existing(a),
-            WitnessFraction(inv),
-            Constant(F::one()),
-            Constant(F::zero()),
-            Existing(a),
-            Witness(is_zero),
-            Constant(F::zero()),
-        ];
-        let assigned_cells = self.assign_region_smart(ctx, cells, vec![0, 4], vec![(0, 6)], vec![]);
-        assigned_cells.into_iter().next().unwrap()
-    }
+                for (left, right) in ctx.advice_equality_constraints {
+                    let left = assigned_advices[&(left.context_id, left.offset)];
+                    let right = assigned_advices[&(right.context_id, right.offset)];
+                    #[cfg(feature = "halo2-axiom")]
+                    region.constrain_equal(&left, &right);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    region.constrain_equal(left, right).unwrap();
+                }
+                for (left, right) in ctx.constant_equality_constraints {
+                    let left = assigned_constants[&(left.context_id, left.offset)];
+                    let right = assigned_advices[&(right.context_id, right.offset)];
+                    #[cfg(feature = "halo2-axiom")]
+                    region.constrain_equal(&left, &right);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    region.constrain_equal(left, right).unwrap();
+                }
 
-    fn is_equal<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: QuantumCell<'_, 'v, F>,
-        b: QuantumCell<'_, 'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let diff = self.sub(ctx, a, b);
-        self.is_zero(ctx, &diff)
+                for index in ctx.cells_to_lookup {
+                    if lookup_offset >= max_rows {
+                        lookup_offset = 0;
+                        lookup_col += 1;
+                    }
+                    let value = ctx.advice[index];
+                    let acell = assigned_advices[&(ctx.context_id, index)];
+                    let value = if use_unknown { Value::unknown() } else { Value::known(value) };
+                    let column = lookup_advice[phase][lookup_col];
+
+                    #[cfg(feature = "halo2-axiom")]
+                    {
+                        let bcell = *region
+                            .assign_advice(column, lookup_offset, value)
+                            .expect("assign_advice should not fail")
+                            .cell();
+                        region.constrain_equal(&acell, &bcell);
+                    }
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    {
+                        let bcell = region
+                            .assign_advice(|| "", column, lookup_offset, || value)
+                            .expect("assign_advice should not fail")
+                            .cell();
+                        region.constrain_equal(acell, bcell).unwrap();
+                    }
+                    lookup_offset += 1;
+                }
+            }
+            break_points.push(break_point);
+        }
+        break_points
     }
+}
 
-    // returns little-endian bit vectors
-    fn num_to_bits<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        a: &AssignedValue<'v, F>,
-        range_bits: usize,
-    ) -> Vec<AssignedValue<'v, F>>;
-
-    /// given pairs `coords[i] = (x_i, y_i)`, let `f` be the unique degree `len(coords)` polynomial such that `f(x_i) = y_i` for all `i`.
-    ///
-    /// input: coords, x
-    ///
-    /// output: (f(x), Prod_i (x - x_i))
-    ///
-    /// constrains all x_i and x are distinct
-    fn lagrange_and_eval<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coords: &[(AssignedValue<'v, F>, AssignedValue<'v, F>)],
-        x: &AssignedValue<'v, F>,
-    ) -> (AssignedValue<'v, F>, AssignedValue<'v, F>) {
-        let mut z = self.sub(ctx, Existing(x), Existing(&coords[0].0));
-        for coord in coords.iter().skip(1) {
-            let sub = self.sub(ctx, Existing(x), Existing(&coord.0));
-            z = self.mul(ctx, Existing(&z), Existing(&sub));
+/// Pure advice witness assignment in a single phase. Uses preprocessed `break_points` to determine when
+/// to split a thread into a new column.
+pub fn assign_threads_in<F: ScalarField>(
+    phase: usize,
+    threads: Vec<Context<F>>,
+    config: &FlexGateConfig<F>,
+    lookup_advice: &[Column<Advice>],
+    region: &mut Region<F>,
+    break_points: ThreadBreakPoints,
+) {
+    if config.basic_gates[phase].is_empty() {
+        assert!(threads.is_empty(), "Trying to assign threads in a phase with no columns");
+        return;
+    }
+    assert_eq!(break_points.len(), threads.len());
+
+    let mut break_points = break_points.into_iter();
+    let mut break_point = break_points.next();
+    let mut gate_index = 0;
+    let mut column = config.basic_gates[phase][gate_index].value;
+    let mut row_offset = 0;
+    let mut lookup_offset = 0;
+    let mut lookup_advice = lookup_advice.iter();
+    let mut lookup_column = lookup_advice.next();
+    for ctx in threads {
+        for index in ctx.cells_to_lookup {
+            if lookup_offset >= config.max_rows {
+                lookup_offset = 0;
+                lookup_column = lookup_advice.next();
+            }
+            let value = ctx.advice[index];
+            let column = *lookup_column.unwrap();
+            #[cfg(feature = "halo2-axiom")]
+            region.assign_advice(column, lookup_offset, Value::known(value)).unwrap();
+            #[cfg(not(feature = "halo2-axiom"))]
+            region.assign_advice(|| "", column, lookup_offset, || Value::known(value)).unwrap();
+
+            lookup_offset += 1;
         }
-        let mut eval = None;
-        for i in 0..coords.len() {
-            // compute (x - x_i) * Prod_{j != i} (x_i - x_j)
-            let mut denom = self.sub(ctx, Existing(x), Existing(&coords[i].0));
-            for j in 0..coords.len() {
-                if i == j {
-                    continue;
-                }
-                let sub = self.sub(ctx, Existing(&coords[i].0), Existing(&coords[j].0));
-                denom = self.mul(ctx, Existing(&denom), Existing(&sub));
+        for advice in ctx.advice {
+            if break_point == Some(row_offset) {
+                break_point = break_points.next();
+                row_offset = 0;
+                gate_index += 1;
+                column = config.basic_gates[phase][gate_index].value;
             }
-            // TODO: batch inversion
-            let is_zero = self.is_zero(ctx, &denom);
-            self.assert_is_const(ctx, &is_zero, F::zero());
-
-            // y_i / denom
-            let quot = self.div_unsafe(ctx, Existing(&coords[i].1), Existing(&denom));
-            eval = if let Some(eval) = eval {
-                let eval = self.add(ctx, Existing(&eval), Existing(&quot));
-                Some(eval)
-            } else {
-                Some(quot)
-            };
+            #[cfg(feature = "halo2-axiom")]
+            region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+            #[cfg(not(feature = "halo2-axiom"))]
+            region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
+
+            row_offset += 1;
         }
-        let out = self.mul(ctx, Existing(&eval.unwrap()), Existing(&z));
-        (out, z)
     }
 }
 
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FlexGateConfigParams {
+    pub strategy: GateStrategy,
+    pub k: usize,
+    pub num_advice_per_phase: Vec<usize>,
+    pub num_lookup_advice_per_phase: Vec<usize>,
+    pub num_fixed: usize,
+}
+
+/*
 pub trait RangeInstructions<F: ScalarField> {
     type Gate: GateInstructions<F>;
 
@@ -859,6 +507,7 @@ pub trait RangeInstructions<F: ScalarField> {
         (assigned[3].clone(), assigned[4].clone())
     }
 }
+*/
 
 #[cfg(test)]
 pub mod tests;
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index c4e811a3..57b673ef 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -1,131 +1,113 @@
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use super::flex_gate::{FlexGateConfig, GateChip, GateInstructions, GateStrategy, MAX_PHASE};
 use super::{
-    flex_gate::{FlexGateConfig, GateStrategy},
-    range, GateInstructions, RangeInstructions,
+    assign_threads_in, FlexGateConfigParams, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+    ThreadBreakPoints,
 };
 use crate::halo2_proofs::{circuit::*, dev::MockProver, halo2curves::bn256::Fr, plonk::*};
+use crate::utils::ScalarField;
 use crate::{
-    Context, ContextParams,
+    Context,
     QuantumCell::{Constant, Existing, Witness},
     SKIP_FIRST_PASS,
 };
 
-#[derive(Default)]
-struct MyCircuit<F> {
-    a: Value<F>,
-    b: Value<F>,
-    c: Value<F>,
+struct MyCircuit<F: ScalarField> {
+    inputs: [F; 3],
+    builder: RefCell<GateThreadBuilder<F>>, // trick `synthesize` to take ownership of `builder`
+    break_points: RefCell<MultiPhaseThreadBreakPoints>,
 }
 
-const NUM_ADVICE: usize = 2;
-
-impl Circuit<Fr> for MyCircuit<Fr> {
-    type Config = FlexGateConfig<Fr>;
+impl<F: ScalarField> Circuit<F> for MyCircuit<F> {
+    type Config = FlexGateConfig<F>;
     type FloorPlanner = SimpleFloorPlanner;
 
     fn without_witnesses(&self) -> Self {
-        Self::default()
+        unimplemented!()
     }
 
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        FlexGateConfig::configure(
-            meta,
-            GateStrategy::Vertical,
-            &[NUM_ADVICE],
-            1,
-            0,
-            6, /* params K */
-        )
+    fn configure(meta: &mut ConstraintSystem<F>) -> FlexGateConfig<F> {
+        let FlexGateConfigParams {
+            strategy,
+            num_advice_per_phase,
+            num_lookup_advice_per_phase: _,
+            num_fixed,
+            k,
+        } = serde_json::from_str(&std::env::var("FLEX_GATE_CONFIG_PARAMS").unwrap()).unwrap();
+        FlexGateConfig::configure(meta, strategy, &num_advice_per_phase, num_fixed, k)
     }
 
     fn synthesize(
         &self,
         config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
+        mut layouter: impl Layouter<F>,
     ) -> Result<(), Error> {
         let mut first_pass = SKIP_FIRST_PASS;
-
         layouter.assign_region(
             || "gate",
-            |region| {
+            |mut region| {
                 if first_pass {
                     first_pass = false;
                     return Ok(());
                 }
-
-                let mut aux = Context::new(
-                    region,
-                    ContextParams {
-                        max_rows: config.max_rows,
-                        num_context_ids: 1,
-                        fixed_columns: config.constants.clone(),
-                    },
-                );
-                let ctx = &mut aux;
-
-                let (a_cell, b_cell, c_cell) = {
-                    let cells = config.assign_region_smart(
-                        ctx,
-                        vec![Witness(self.a), Witness(self.b), Witness(self.c)],
-                        vec![],
-                        vec![],
-                        vec![],
-                    );
-                    (cells[0].clone(), cells[1].clone(), cells[2].clone())
-                };
-
-                // test add
-                {
-                    config.add(ctx, Existing(&a_cell), Existing(&b_cell));
+                let builder = self.builder.take();
+                if !builder.witness_gen_only {
+                    *self.break_points.borrow_mut() = builder.assign_all(&config, &[], &mut region);
+                } else {
+                    // only test first phase for now
+                    let mut threads = builder.threads.into_iter();
+                    assign_threads_in(
+                        0,
+                        threads.next().unwrap(),
+                        &config,
+                        &[],
+                        &mut region,
+                        self.break_points.borrow()[0].clone(),
+                    )
                 }
 
-                // test sub
-                {
-                    config.sub(ctx, Existing(&a_cell), Existing(&b_cell));
-                }
+                Ok(())
+            },
+        )
+    }
+}
 
-                // test multiply
-                {
-                    config.mul(ctx, Existing(&c_cell), Existing(&b_cell));
-                }
+fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
+    let [a, b, c]: [_; 3] = ctx.assign_witnesses(inputs).try_into().unwrap();
+    let chip = GateChip::default();
 
-                // test idx_to_indicator
-                {
-                    config.idx_to_indicator(ctx, Constant(Fr::from(3u64)), 4);
-                }
+    // test add
+    chip.add(ctx, a, b);
 
-                {
-                    let bits = config.assign_witnesses(
-                        ctx,
-                        vec![Value::known(Fr::zero()), Value::known(Fr::one())],
-                    );
-                    config.bits_to_indicator(ctx, &bits);
-                }
+    // test sub
+    chip.sub(ctx, a, b);
 
-                #[cfg(feature = "display")]
-                {
-                    println!("total advice cells: {}", ctx.total_advice);
-                    let const_rows = ctx.fixed_offset + 1;
-                    println!("maximum rows used by a fixed column: {const_rows}");
-                }
+    // test multiply
+    chip.mul(ctx, c, b);
 
-                Ok(())
-            },
-        )
-    }
+    // test idx_to_indicator
+    chip.idx_to_indicator(ctx, Constant(F::from(3u64)), 4);
+
+    let bits = ctx.assign_witnesses([F::zero(), F::one()]);
+    //chip.bits_to_indicator(ctx, &bits);
 }
 
 #[test]
 fn test_gates() {
     let k = 6;
-    let circuit = MyCircuit::<Fr> {
-        a: Value::known(Fr::from(10u64)),
-        b: Value::known(Fr::from(12u64)),
-        c: Value::known(Fr::from(120u64)),
-    };
+    let inputs = [10u64, 12u64, 120u64].map(Fr::from);
+    let mut builder = GateThreadBuilder::new(false);
+    gate_tests(builder.main(0), inputs);
 
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
-    // assert_eq!(prover.verify(), Ok(()));
+    // auto-tune circuit
+    builder.config(k);
+    // create circuit
+    let circuit =
+        MyCircuit { inputs, builder: RefCell::new(builder), break_points: RefCell::default() };
+
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[cfg(feature = "dev-graph")]
@@ -138,10 +120,22 @@ fn plot_gates() {
     root.fill(&WHITE).unwrap();
     let root = root.titled("Gates Layout", ("sans-serif", 60)).unwrap();
 
-    let circuit = MyCircuit::<Fr>::default();
+    let inputs = [Fr::zero(); 3];
+    let builder = GateThreadBuilder::new(false);
+    gate_tests(builder.main(0), inputs);
+
+    // auto-tune circuit
+    builder.config(k);
+    // create circuit
+    let circuit = MyCircuit {
+        inputs,
+        builder: RefCell::new(builder.unknown(true)),
+        break_points: RefCell::default(),
+    };
     halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
 }
 
+/*
 #[derive(Default)]
 struct RangeTestCircuit<F> {
     range_bits: usize,
@@ -461,3 +455,4 @@ mod lagrange {
         Ok(())
     }
 }
+*/
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index 13fb664d..ce797a78 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -24,24 +24,17 @@ compile_error!(
 #[cfg(not(any(feature = "halo2-pse", feature = "halo2-axiom")))]
 compile_error!("Must enable exactly one of \"halo2-pse\" or \"halo2-axiom\" features to choose which halo2_proofs crate to use.");
 
-use gates::flex_gate::MAX_PHASE;
+// use gates::flex_gate::MAX_PHASE;
 #[cfg(feature = "halo2-pse")]
 pub use halo2_proofs;
 #[cfg(feature = "halo2-axiom")]
 pub use halo2_proofs_axiom as halo2_proofs;
 
-use halo2_proofs::{
-    circuit::{AssignedCell, Cell, Region, Value},
-    plonk::{Advice, Assigned, Column, Fixed},
-};
-use rustc_hash::FxHashMap;
-#[cfg(feature = "halo2-pse")]
-use std::marker::PhantomData;
-use std::{cell::RefCell, rc::Rc};
+use halo2_proofs::plonk::Assigned;
+use std::collections::HashMap;
 use utils::ScalarField;
 
 pub mod gates;
-// pub mod hashes;
 pub mod utils;
 
 #[cfg(feature = "halo2-axiom")]
@@ -49,527 +42,300 @@ pub const SKIP_FIRST_PASS: bool = false;
 #[cfg(feature = "halo2-pse")]
 pub const SKIP_FIRST_PASS: bool = true;
 
-#[derive(Clone, Debug)]
-pub enum QuantumCell<'a, 'b: 'a, F: ScalarField> {
-    Existing(&'a AssignedValue<'b, F>),
-    ExistingOwned(AssignedValue<'b, F>), // this is similar to the Cow enum
-    Witness(Value<F>),
-    WitnessFraction(Value<Assigned<F>>),
+#[derive(Clone, Copy, Debug)]
+pub enum QuantumCell<F: ScalarField> {
+    Existing(AssignedValue<F>),
+    /// This is a guard for witness values assigned after pkey generation. We do not use `Value` api anymore.
+    Witness(F),
+    WitnessFraction(Assigned<F>),
     Constant(F),
 }
 
-impl<F: ScalarField> QuantumCell<'_, '_, F> {
-    pub fn value(&self) -> Value<&F> {
+impl<F: ScalarField> From<AssignedValue<F>> for QuantumCell<F> {
+    fn from(a: AssignedValue<F>) -> Self {
+        Self::Existing(a)
+    }
+}
+
+impl<F: ScalarField> QuantumCell<F> {
+    pub fn value(&self) -> &F {
         match self {
             Self::Existing(a) => a.value(),
-            Self::ExistingOwned(a) => a.value(),
-            Self::Witness(a) => a.as_ref(),
+            Self::Witness(a) => a,
             Self::WitnessFraction(_) => {
                 panic!("Trying to get value of a fraction before batch inversion")
             }
-            Self::Constant(a) => Value::known(a),
+            Self::Constant(a) => a,
         }
     }
 }
 
-#[derive(Clone, Debug)]
-pub struct AssignedValue<'a, F: ScalarField> {
-    #[cfg(feature = "halo2-axiom")]
-    pub cell: AssignedCell<&'a Assigned<F>, F>,
-
-    #[cfg(feature = "halo2-pse")]
-    pub cell: Cell,
-    #[cfg(feature = "halo2-pse")]
-    pub value: Value<F>,
-    #[cfg(feature = "halo2-pse")]
-    pub row_offset: usize,
-    #[cfg(feature = "halo2-pse")]
-    pub _marker: PhantomData<&'a F>,
-
-    #[cfg(feature = "display")]
+#[derive(Clone, Copy, Debug)]
+pub struct ContextCell {
     pub context_id: usize,
+    pub offset: usize,
 }
 
-impl<'a, F: ScalarField> AssignedValue<'a, F> {
-    #[cfg(feature = "display")]
-    pub fn context_id(&self) -> usize {
-        self.context_id
-    }
-
-    pub fn row(&self) -> usize {
-        #[cfg(feature = "halo2-axiom")]
-        {
-            self.cell.row_offset()
-        }
-
-        #[cfg(feature = "halo2-pse")]
-        {
-            self.row_offset
-        }
-    }
-
-    #[cfg(feature = "halo2-axiom")]
-    pub fn cell(&self) -> &Cell {
-        self.cell.cell()
-    }
-    #[cfg(feature = "halo2-pse")]
-    pub fn cell(&self) -> Cell {
-        self.cell
-    }
+/// The object that you fetch from a context when you want to reference its value in later computations.
+/// This performs a copy of the value, so it should only be used when you are about to assign the value again elsewhere.
+#[derive(Clone, Copy, Debug)]
+pub struct AssignedValue<F: ScalarField> {
+    pub value: Assigned<F>, // we don't use reference to avoid issues with lifetimes (you can't safely borrow from vector and push to it at the same time)
+    // only needed during vkey, pkey gen to fetch the actual cell from the relevant context
+    pub cell: Option<ContextCell>,
+}
 
-    pub fn value(&self) -> Value<&F> {
-        #[cfg(feature = "halo2-axiom")]
-        {
-            self.cell.value().map(|a| match *a {
-                Assigned::Trivial(a) => a,
-                _ => unreachable!(),
-            })
-        }
-        #[cfg(feature = "halo2-pse")]
-        {
-            self.value.as_ref()
+impl<F: ScalarField> AssignedValue<F> {
+    pub fn value(&self) -> &F {
+        match &self.value {
+            Assigned::Trivial(a) => a,
+            _ => unreachable!(), // if trying to fetch an un-evaluated fraction, you will have to do something manual
         }
     }
-
-    #[cfg(feature = "halo2-axiom")]
-    pub fn copy_advice<'v>(
-        &'a self,
-        region: &mut Region<'_, F>,
-        column: Column<Advice>,
-        offset: usize,
-    ) -> AssignedCell<&'v Assigned<F>, F> {
-        let assigned_cell = region
-            .assign_advice(column, offset, self.cell.value().map(|v| **v))
-            .unwrap_or_else(|err| panic!("{err:?}"));
-        region.constrain_equal(assigned_cell.cell(), self.cell());
-
-        assigned_cell
-    }
-
-    #[cfg(feature = "halo2-pse")]
-    pub fn copy_advice(
-        &'a self,
-        region: &mut Region<'_, F>,
-        column: Column<Advice>,
-        offset: usize,
-    ) -> Cell {
-        let cell = region
-            .assign_advice(|| "", column, offset, || self.value)
-            .expect("assign copy advice should not fail")
-            .cell();
-        region.constrain_equal(cell, self.cell()).expect("constrain equal should not fail");
-
-        cell
-    }
 }
 
-// The reason we have a `Context` is that we will need to mutably borrow `advice_rows` (etc.) to update row count
-// The `Circuit` trait takes in `Config` as an input that is NOT mutable, so we must pass around &mut Context everywhere for function calls
-// We follow halo2wrong's convention of having `Context` also include the `Region` to be passed around, instead of a `Layouter`, so that everything happens within a single `layouter.assign_region` call. This allows us to circumvent the Halo2 layouter and use our own "pseudo-layouter", which is more specialized (and hence faster) for our specific gates
-#[derive(Debug)]
-pub struct Context<'a, F: ScalarField> {
-    pub region: Region<'a, F>, // I don't see a reason to use Box<Region<'a, F>> since we will pass mutable reference of `Context` anyways
-
-    pub max_rows: usize,
+/// A context should be thought of as a single thread of execution trace.
+/// We keep the naming `Context` for historical reasons
+#[derive(Clone, Debug)]
+pub struct Context<F: ScalarField> {
+    /// flag to determine whether we are doing pkey gen or only witness gen.
+    /// in the latter case many operations can be skipped for optimization
+    witness_gen_only: bool,
+    /// identifier to reference cells from this context later
+    pub context_id: usize,
 
-    // Assigning advice in a "horizontal" first fashion requires getting the column with min rows used each time `assign_region` is called, which takes a toll on witness generation speed, so instead we will just assigned a column all the way down until it reaches `max_rows` and then increment the column index
-    //
-    /// `advice_alloc[context_id] = (index, offset)` where `index` contains the current column index corresponding to `context_id`, and `offset` contains the current row offset within column `index`
-    ///
-    /// This assumes the phase is `ctx.current_phase()` to enforce the design pattern that advice should be assigned one phase at a time.
-    pub advice_alloc: Vec<(usize, usize)>, // [Vec<(usize, usize)>; MAX_PHASE],
+    /// this is the single column of advice cells exactly as they should be assigned
+    pub advice: Vec<Assigned<F>>,
+    /// `cells_to_lookup` is a vector keeping track of all cells that we want to enable lookup for. When there is more than 1 advice column we will copy_advice all of these cells to the single lookup enabled column and do lookups there
+    pub cells_to_lookup: Vec<usize>, // `i` in `cells_to_lookup` means we want to lookup `advice[i]`
 
-    #[cfg(feature = "display")]
-    pub total_advice: usize,
+    pub zero_cell: Option<AssignedValue<F>>,
 
     // To save time from re-allocating new temporary vectors that get quickly dropped (e.g., for some range checks), we keep a vector with high capacity around that we `clear` before use each time
+    // This is NOT THREAD SAFE
     // Need to use RefCell to avoid borrow rules
     // Need to use Rc to borrow this and mutably borrow self at same time
-    preallocated_vec_to_assign: Rc<RefCell<Vec<AssignedValue<'a, F>>>>,
-
-    // `assigned_constants` is a HashMap keeping track of all constants that we use throughout
-    // we assign them to fixed columns as we go, re-using a fixed cell if the constant value has been assigned previously
-    fixed_columns: Vec<Column<Fixed>>,
-    fixed_col: usize,
-    fixed_offset: usize,
-    // fxhash is faster than normal HashMap: https://nnethercote.github.io/perf-book/hashing.html
-    #[cfg(feature = "halo2-axiom")]
-    pub assigned_constants: FxHashMap<F, Cell>,
-    // PSE's halo2curves does not derive Hash
-    #[cfg(feature = "halo2-pse")]
-    pub assigned_constants: FxHashMap<Vec<u8>, Cell>,
-
-    pub zero_cell: Option<AssignedValue<'a, F>>,
-
-    // `cells_to_lookup` is a vector keeping track of all cells that we want to enable lookup for. When there is more than 1 advice column we will copy_advice all of these cells to the single lookup enabled column and do lookups there
-    pub cells_to_lookup: Vec<AssignedValue<'a, F>>,
-
-    current_phase: usize,
-
-    #[cfg(feature = "display")]
-    pub op_count: FxHashMap<String, usize>,
-    #[cfg(feature = "display")]
-    pub advice_alloc_cache: [Vec<(usize, usize)>; MAX_PHASE],
-    #[cfg(feature = "display")]
-    pub total_lookup_cells: [usize; MAX_PHASE],
-    #[cfg(feature = "display")]
-    pub total_fixed: usize,
-}
-
-//impl<'a, F: ScalarField> std::ops::Drop for Context<'a, F> {
-//    fn drop(&mut self) {
-//        assert!(
-//            self.cells_to_lookup.is_empty(),
-//            "THERE ARE STILL ADVICE CELLS THAT NEED TO BE LOOKED UP"
-//        );
-//    }
-//}
-
-impl<'a, F: ScalarField> std::fmt::Display for Context<'a, F> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{self:#?}")
-    }
-}
-
-// a single struct to package any configuration parameters we will need for constructing a new `Context`
-#[derive(Clone, Debug)]
-pub struct ContextParams {
-    pub max_rows: usize,
-    /// `num_advice[context_id][phase]` contains the number of advice columns that context `context_id` keeps track of in phase `phase`
-    pub num_context_ids: usize,
-    pub fixed_columns: Vec<Column<Fixed>>,
+    // preallocated_vec_to_assign: Rc<RefCell<Vec<AssignedValue<'a, F>>>>,
+
+    // ========================================
+    // General principle: we don't need to optimize anything specific to `witness_gen_only == false` because it is only done during keygen
+    // If `witness_gen_only == false`:
+    /// the constants used in this context
+    pub constants: HashMap<F, usize>,
+    /// one selector column accompanying each advice column, should have same length as `advice`
+    pub selector: Vec<bool>,
+    // TODO: gates that use fixed columns as selectors?
+    /// A pair of context cells, both assumed to be `advice`, that must be constrained equal
+    pub advice_equality_constraints: Vec<(ContextCell, ContextCell)>,
+    /// A pair of context cells, where the first is in `constant` and the second in `advice` that must be constrained equal
+    pub constant_equality_constraints: Vec<(ContextCell, ContextCell)>,
 }
 
-impl<'a, F: ScalarField> Context<'a, F> {
-    pub fn new(region: Region<'a, F>, params: ContextParams) -> Self {
-        let advice_alloc = vec![(0, 0); params.num_context_ids];
-
+impl<F: ScalarField> Context<F> {
+    pub fn new(witness_gen_only: bool, context_id: usize) -> Self {
         Self {
-            region,
-            max_rows: params.max_rows,
-            advice_alloc,
-            #[cfg(feature = "display")]
-            total_advice: 0,
-            preallocated_vec_to_assign: Rc::new(RefCell::new(Vec::with_capacity(256))),
-            fixed_columns: params.fixed_columns,
-            fixed_col: 0,
-            fixed_offset: 0,
-            assigned_constants: FxHashMap::default(),
-            zero_cell: None,
+            witness_gen_only,
+            context_id,
+            advice: Vec::new(),
             cells_to_lookup: Vec::new(),
-            current_phase: 0,
-            #[cfg(feature = "display")]
-            op_count: FxHashMap::default(),
-            #[cfg(feature = "display")]
-            advice_alloc_cache: [(); MAX_PHASE].map(|_| vec![]),
-            #[cfg(feature = "display")]
-            total_lookup_cells: [0; MAX_PHASE],
-            #[cfg(feature = "display")]
-            total_fixed: 0,
+            zero_cell: None,
+            constants: HashMap::new(),
+            selector: Vec::new(),
+            advice_equality_constraints: Vec::new(),
+            constant_equality_constraints: Vec::new(),
         }
     }
 
-    pub fn preallocated_vec_to_assign(&self) -> Rc<RefCell<Vec<AssignedValue<'a, F>>>> {
-        Rc::clone(&self.preallocated_vec_to_assign)
+    pub fn witness_gen_only(&self) -> bool {
+        self.witness_gen_only
     }
 
-    pub fn next_phase(&mut self) {
-        assert!(
-            self.cells_to_lookup.is_empty(),
-            "THERE ARE STILL ADVICE CELLS THAT NEED TO BE LOOKED UP"
-        );
-        #[cfg(feature = "display")]
-        {
-            self.advice_alloc_cache[self.current_phase] = self.advice_alloc.clone();
-        }
-        #[cfg(feature = "halo2-axiom")]
-        self.region.next_phase();
-        self.current_phase += 1;
-        for advice_alloc in self.advice_alloc.iter_mut() {
-            *advice_alloc = (0, 0);
+    pub fn assign_fixed(&mut self, c: F) -> usize {
+        let index = self.constants.get(&c);
+        if let Some(index) = index {
+            *index
+        } else {
+            let index = self.constants.len();
+            self.constants.insert(c, index);
+            index
         }
-        assert!(self.current_phase < MAX_PHASE);
     }
 
-    pub fn current_phase(&self) -> usize {
-        self.current_phase
+    /// Push a `QuantumCell` onto the stack of advice cells to be assigned
+    pub fn assign_cell(&mut self, input: impl Into<QuantumCell<F>>) {
+        match input.into() {
+            QuantumCell::Existing(acell) => {
+                self.advice.push(acell.value);
+                if !self.witness_gen_only {
+                    let new_cell =
+                        ContextCell { context_id: self.context_id, offset: self.advice.len() - 1 };
+                    self.advice_equality_constraints.push((new_cell, acell.cell.unwrap()));
+                }
+            }
+            QuantumCell::Witness(val) => {
+                self.advice.push(Assigned::Trivial(val));
+            }
+            QuantumCell::WitnessFraction(val) => {
+                self.advice.push(val);
+            }
+            QuantumCell::Constant(c) => {
+                self.advice.push(Assigned::Trivial(c));
+                if !self.witness_gen_only {
+                    let c_cell =
+                        ContextCell { context_id: self.context_id, offset: self.assign_fixed(c) };
+                    let new_cell =
+                        ContextCell { context_id: self.context_id, offset: self.advice.len() - 1 };
+                    self.constant_equality_constraints.push((c_cell, new_cell));
+                }
+            }
+        }
     }
 
-    #[cfg(feature = "display")]
-    /// Returns (number of fixed columns used, total fixed cells used)
-    pub fn fixed_stats(&self) -> (usize, usize) {
-        // heuristic, fixed cells don't need to worry about blinding factors
-        ((self.total_fixed + self.max_rows - 1) / self.max_rows, self.total_fixed)
+    pub fn last(&self) -> Option<AssignedValue<F>> {
+        self.advice.last().map(|v| {
+            let cell = (!self.witness_gen_only).then_some(ContextCell {
+                context_id: self.context_id,
+                offset: self.advice.len() - 1,
+            });
+            AssignedValue { value: *v, cell }
+        })
     }
 
-    #[cfg(feature = "halo2-axiom")]
-    pub fn assign_fixed(&mut self, c: F) -> Cell {
-        let fixed = self.assigned_constants.get(&c);
-        if let Some(cell) = fixed {
-            *cell
+    pub fn get(&self, offset: isize) -> AssignedValue<F> {
+        let offset = if offset < 0 {
+            self.advice.len().wrapping_add_signed(offset)
         } else {
-            let cell = self.assign_fixed_without_caching(c);
-            self.assigned_constants.insert(c, cell);
-            cell
-        }
+            offset as usize
+        };
+        assert!(offset < self.advice.len());
+        let cell =
+            (!self.witness_gen_only).then_some(ContextCell { context_id: self.context_id, offset });
+        AssignedValue { value: self.advice[offset], cell }
     }
-    #[cfg(feature = "halo2-pse")]
-    pub fn assign_fixed(&mut self, c: F) -> Cell {
-        let fixed = self.assigned_constants.get(c.to_repr().as_ref());
-        if let Some(cell) = fixed {
-            *cell
-        } else {
-            let cell = self.assign_fixed_without_caching(c);
-            self.assigned_constants.insert(c.to_repr().as_ref().to_vec(), cell);
-            cell
+
+    pub fn constrain_equal(&mut self, a: &AssignedValue<F>, b: &AssignedValue<F>) {
+        if !self.witness_gen_only {
+            self.advice_equality_constraints.push((a.cell.unwrap(), b.cell.unwrap()));
         }
     }
 
-    /// Saving the assigned constant to the hashmap takes time.
+    /// Assigns multiple advice cells and the accompanying selector cells.
+    ///
+    /// Returns the slice of assigned cells.
     ///
-    /// In situations where you don't expect to reuse the value, you can assign the fixed value directly using this function.
-    pub fn assign_fixed_without_caching(&mut self, c: F) -> Cell {
-        #[cfg(feature = "halo2-axiom")]
-        let cell = self.region.assign_fixed(
-            self.fixed_columns[self.fixed_col],
-            self.fixed_offset,
-            Assigned::Trivial(c),
-        );
-        #[cfg(feature = "halo2-pse")]
-        let cell = self
-            .region
-            .assign_fixed(
-                || "",
-                self.fixed_columns[self.fixed_col],
-                self.fixed_offset,
-                || Value::known(c),
-            )
-            .expect("assign fixed should not fail")
-            .cell();
-        #[cfg(feature = "display")]
-        {
-            self.total_fixed += 1;
+    /// All indices in `gate_offsets` are with respect to `inputs` indices
+    /// * `gate_offsets` specifies indices to enable selector for the gate
+    /// * allow the index in `gate_offsets` to be negative in case we want to do advanced overlapping
+    pub fn assign_region<Q>(
+        &mut self,
+        inputs: impl IntoIterator<Item = Q>,
+        gate_offsets: impl IntoIterator<Item = isize>,
+    ) where
+        Q: Into<QuantumCell<F>>,
+    {
+        for input in inputs {
+            self.assign_cell(input);
         }
-        self.fixed_col += 1;
-        if self.fixed_col == self.fixed_columns.len() {
-            self.fixed_col = 0;
-            self.fixed_offset += 1;
+
+        if !self.witness_gen_only {
+            let row_offset = self.selector.len();
+            self.selector.resize(self.advice.len(), false);
+            for offset in gate_offsets {
+                *self
+                    .selector
+                    .get_mut(row_offset.checked_add_signed(offset).expect("Invalid gate offset"))
+                    .expect("Gate offset out of bounds") = true;
+            }
         }
-        cell
     }
 
-    /// Assuming that this is only called if ctx.region is not in shape mode!
-    #[cfg(feature = "halo2-axiom")]
-    pub fn assign_cell<'v>(
+    /// Calls `assign_region` and returns the last assigned cell
+    pub fn assign_region_last<Q>(
         &mut self,
-        input: QuantumCell<'_, 'v, F>,
-        column: Column<Advice>,
-        #[cfg(feature = "display")] context_id: usize,
-        row_offset: usize,
-    ) -> AssignedValue<'v, F> {
-        match input {
-            QuantumCell::Existing(acell) => {
-                AssignedValue {
-                    cell: acell.copy_advice(
-                        // || "gate: copy advice",
-                        &mut self.region,
-                        column,
-                        row_offset,
-                    ),
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
-            }
-            QuantumCell::ExistingOwned(acell) => {
-                AssignedValue {
-                    cell: acell.copy_advice(
-                        // || "gate: copy advice",
-                        &mut self.region,
-                        column,
-                        row_offset,
-                    ),
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
-            }
-            QuantumCell::Witness(val) => AssignedValue {
-                cell: self
-                    .region
-                    .assign_advice(column, row_offset, val.map(Assigned::Trivial))
-                    .expect("assign advice should not fail"),
-                #[cfg(feature = "display")]
-                context_id,
-            },
-            QuantumCell::WitnessFraction(val) => AssignedValue {
-                cell: self
-                    .region
-                    .assign_advice(column, row_offset, val)
-                    .expect("assign advice should not fail"),
-                #[cfg(feature = "display")]
-                context_id,
-            },
-            QuantumCell::Constant(c) => {
-                let acell = self
-                    .region
-                    .assign_advice(column, row_offset, Value::known(Assigned::Trivial(c)))
-                    .expect("assign fixed advice should not fail");
-                let c_cell = self.assign_fixed(c);
-                self.region.constrain_equal(acell.cell(), &c_cell);
-                AssignedValue {
-                    cell: acell,
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
-            }
-        }
+        inputs: impl IntoIterator<Item = Q>,
+        gate_offsets: impl IntoIterator<Item = isize>,
+    ) -> AssignedValue<F>
+    where
+        Q: Into<QuantumCell<F>>,
+    {
+        self.assign_region(inputs, gate_offsets);
+        self.last().unwrap()
     }
 
-    #[cfg(feature = "halo2-pse")]
-    pub fn assign_cell<'v>(
+    /// All indices in `gate_offsets`, `equality_offsets`, `external_equality` are with respect to `inputs` indices
+    /// - `gate_offsets` specifies indices to enable selector for the gate; assume `gate_offsets` is sorted in increasing order
+    /// - `equality_offsets` specifies pairs of indices to constrain equality
+    /// - `external_equality` specifies an existing cell to constrain equality with the cell at a certain index
+    pub fn assign_region_smart<Q>(
         &mut self,
-        input: QuantumCell<'_, 'v, F>,
-        column: Column<Advice>,
-        #[cfg(feature = "display")] context_id: usize,
-        row_offset: usize,
-        phase: u8,
-    ) -> AssignedValue<'v, F> {
-        match input {
-            QuantumCell::Existing(acell) => {
-                AssignedValue {
-                    cell: acell.copy_advice(
-                        // || "gate: copy advice",
-                        &mut self.region,
-                        column,
-                        row_offset,
-                    ),
-                    value: acell.value,
-                    row_offset,
-                    _marker: PhantomData,
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
+        inputs: impl IntoIterator<Item = Q>,
+        gate_offsets: impl IntoIterator<Item = isize>,
+        equality_offsets: impl IntoIterator<Item = (isize, isize)>,
+        external_equality: impl IntoIterator<Item = (Option<ContextCell>, isize)>,
+    ) where
+        Q: Into<QuantumCell<F>>,
+    {
+        let row_offset = self.advice.len();
+        self.assign_region(inputs, gate_offsets);
+
+        if !self.witness_gen_only {
+            for (offset1, offset2) in equality_offsets {
+                self.advice_equality_constraints.push((
+                    ContextCell {
+                        context_id: self.context_id,
+                        offset: row_offset.wrapping_add_signed(offset1),
+                    },
+                    ContextCell {
+                        context_id: self.context_id,
+                        offset: row_offset.wrapping_add_signed(offset2),
+                    },
+                ));
             }
-            QuantumCell::ExistingOwned(acell) => {
-                AssignedValue {
-                    cell: acell.copy_advice(
-                        // || "gate: copy advice",
-                        &mut self.region,
-                        column,
-                        row_offset,
-                    ),
-                    value: acell.value,
-                    row_offset,
-                    _marker: PhantomData,
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
-            }
-            QuantumCell::Witness(value) => AssignedValue {
-                cell: self
-                    .region
-                    .assign_advice(|| "", column, row_offset, || value)
-                    .expect("assign advice should not fail")
-                    .cell(),
-                value,
-                row_offset,
-                _marker: PhantomData,
-                #[cfg(feature = "display")]
-                context_id,
-            },
-            QuantumCell::WitnessFraction(val) => AssignedValue {
-                cell: self
-                    .region
-                    .assign_advice(|| "", column, row_offset, || val)
-                    .expect("assign advice should not fail")
-                    .cell(),
-                value: Value::unknown(),
-                row_offset,
-                _marker: PhantomData,
-                #[cfg(feature = "display")]
-                context_id,
-            },
-            QuantumCell::Constant(c) => {
-                let acell = self
-                    .region
-                    .assign_advice(|| "", column, row_offset, || Value::known(c))
-                    .expect("assign fixed advice should not fail")
-                    .cell();
-                let c_cell = self.assign_fixed(c);
-                self.region.constrain_equal(acell, c_cell).unwrap();
-                AssignedValue {
-                    cell: acell,
-                    value: Value::known(c),
-                    row_offset,
-                    _marker: PhantomData,
-                    #[cfg(feature = "display")]
-                    context_id,
-                }
+            for (cell, offset) in external_equality {
+                self.advice_equality_constraints.push((
+                    cell.unwrap(),
+                    ContextCell {
+                        context_id: self.context_id,
+                        offset: row_offset.wrapping_add_signed(offset),
+                    },
+                ));
             }
         }
     }
 
-    // convenience function to deal with rust warnings
-    pub fn constrain_equal(&mut self, a: &AssignedValue<F>, b: &AssignedValue<F>) {
-        #[cfg(feature = "halo2-axiom")]
-        self.region.constrain_equal(a.cell(), b.cell());
-        #[cfg(not(feature = "halo2-axiom"))]
-        self.region.constrain_equal(a.cell(), b.cell()).unwrap();
+    pub fn assign_witnesses(
+        &mut self,
+        witnesses: impl IntoIterator<Item = F>,
+    ) -> Vec<AssignedValue<F>> {
+        let row_offset = self.advice.len();
+        self.assign_region(witnesses.into_iter().map(QuantumCell::Witness), []);
+        self.advice[row_offset..]
+            .iter()
+            .enumerate()
+            .map(|(i, v)| {
+                let cell = (!self.witness_gen_only)
+                    .then_some(ContextCell { context_id: self.context_id, offset: row_offset + i });
+                AssignedValue { value: *v, cell }
+            })
+            .collect()
     }
 
-    /// Call this at the end of a phase
-    ///
-    /// assumes self.region is not in shape mode
-    pub fn copy_and_lookup_cells(&mut self, lookup_advice: Vec<Column<Advice>>) -> usize {
-        let total_cells = self.cells_to_lookup.len();
-        let mut cells_to_lookup = self.cells_to_lookup.iter().peekable();
-        for column in lookup_advice.into_iter() {
-            let mut offset = 0;
-            while offset < self.max_rows && cells_to_lookup.peek().is_some() {
-                let acell = cells_to_lookup.next().unwrap();
-                acell.copy_advice(&mut self.region, column, offset);
-                offset += 1;
-            }
-        }
-        if cells_to_lookup.peek().is_some() {
-            panic!("NOT ENOUGH ADVICE COLUMNS WITH LOOKUP ENABLED");
-        }
-        self.cells_to_lookup.clear();
-        #[cfg(feature = "display")]
-        {
-            self.total_lookup_cells[self.current_phase] = total_cells;
-        }
-        total_cells
+    pub fn load_witness(&mut self, witness: F) -> AssignedValue<F> {
+        self.assign_cell(QuantumCell::Witness(witness));
+        self.last().unwrap()
     }
 
-    #[cfg(feature = "display")]
-    pub fn print_stats(&mut self, context_names: &[&str]) {
-        let curr_phase = self.current_phase();
-        self.advice_alloc_cache[curr_phase] = self.advice_alloc.clone();
-        for phase in 0..=curr_phase {
-            for (context_name, alloc) in
-                context_names.iter().zip(self.advice_alloc_cache[phase].iter())
-            {
-                println!("Context \"{context_name}\" used {} advice columns and {} total advice cells in phase {phase}", alloc.0 + 1, alloc.0 * self.max_rows + alloc.1);
-            }
-            let num_lookup_advice_cells = self.total_lookup_cells[phase];
-            println!("Special lookup advice cells: optimal columns: {}, total {num_lookup_advice_cells} cells used in phase {phase}.",  (num_lookup_advice_cells + self.max_rows - 1)/self.max_rows);
-        }
-        let (fixed_cols, total_fixed) = self.fixed_stats();
-        println!("Fixed columns: {fixed_cols}, Total fixed cells: {total_fixed}");
+    pub fn load_constant(&mut self, c: F) -> AssignedValue<F> {
+        self.assign_cell(QuantumCell::Constant(c));
+        self.last().unwrap()
     }
-}
 
-#[derive(Clone, Debug)]
-pub struct AssignedPrimitive<'a, T: Into<u64> + Copy, F: ScalarField> {
-    pub value: Value<T>,
-
-    #[cfg(feature = "halo2-axiom")]
-    pub cell: AssignedCell<&'a Assigned<F>, F>,
-
-    #[cfg(feature = "halo2-pse")]
-    pub cell: Cell,
-    #[cfg(feature = "halo2-pse")]
-    row_offset: usize,
-    #[cfg(feature = "halo2-pse")]
-    _marker: PhantomData<&'a F>,
+    pub fn load_zero(&mut self) -> AssignedValue<F> {
+        if let Some(zcell) = &self.zero_cell {
+            return *zcell;
+        }
+        let zero_cell = self.load_constant(F::zero());
+        self.zero_cell = Some(zero_cell);
+        zero_cell
+    }
 }
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index bb07150a..253ec62d 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -52,10 +52,10 @@ where
 #[cfg(feature = "halo2-axiom")]
 pub trait PrimeField = BigPrimeField;
 #[cfg(feature = "halo2-pse")]
-pub trait PrimeField = FieldExt<Repr = [u8; 32]>;
+pub trait PrimeField = FieldExt<Repr = [u8; 32]> + Hash;
 
 #[cfg(feature = "halo2-pse")]
-pub trait ScalarField = FieldExt;
+pub trait ScalarField = FieldExt + Hash;
 
 #[inline(always)]
 pub(crate) fn decompose_u64_digits_to_limbs(
@@ -288,7 +288,10 @@ pub mod fs {
             bn256::{Bn256, G1Affine},
             CurveAffine,
         },
-        poly::{commitment::{Params, ParamsProver}, kzg::commitment::ParamsKZG},
+        poly::{
+            commitment::{Params, ParamsProver},
+            kzg::commitment::ParamsKZG,
+        },
     };
     use rand_chacha::{rand_core::SeedableRng, ChaCha20Rng};
 

From 941035a55641e556223af766d8504b220901a8d9 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 14:26:31 -0800
Subject: [PATCH 02/26] BUG: `GateInstructions::idx_to_indicator` was missing a
 constraint to check that the indicator witness was equal to 1 when non-zero.
 * Previously the constraint ensured that `ind[i] = 0` when `idx != i`  
 however `ind[idx]` could be anything!!!

---
 halo2-base/src/gates/flex_gate.rs |  4 +++-
 halo2-base/src/gates/mod.rs       | 12 ++++++------
 halo2-base/src/gates/tests.rs     |  4 ++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index 4f646cdd..e147eabf 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -516,11 +516,13 @@ pub trait GateInstructions<F: ScalarField> {
                 vec![(1, 5)],
                 vec![],
             );
-            ind.push(ctx.get(-2));
             // need to use assigned idx after i > 0 so equality constraint holds
             if i == 0 {
                 idx = Existing(ctx.get(-5));
             }
+            let ind_cell = ctx.get(-2);
+            self.assert_bit(ctx, ind_cell);
+            ind.push(ind_cell);
         }
         ind
     }
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index bfa89593..23e6369e 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -58,8 +58,8 @@ impl<F: ScalarField> GateThreadBuilder<F> {
     }
 
     /// Auto-calculate configuration parameters for the circuit
-    pub fn config(&self, k: usize) -> FlexGateConfigParams {
-        let max_rows = 1 << k;
+    pub fn config(&self, k: usize, minimum_rows: Option<usize>) -> FlexGateConfigParams {
+        let max_rows = (1 << k) - minimum_rows.unwrap_or(0);
         let total_advice_per_phase = self
             .threads
             .iter()
@@ -69,7 +69,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         // if this is too small, manual configuration will be needed
         let num_advice_per_phase = total_advice_per_phase
             .iter()
-            .map(|count| (count + max_rows - 1) >> k)
+            .map(|count| (count + max_rows - 1) / max_rows)
             .collect::<Vec<_>>();
 
         let total_lookup_advice_per_phase = self
@@ -79,7 +79,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
             .collect::<Vec<_>>();
         let num_lookup_advice_per_phase = total_lookup_advice_per_phase
             .iter()
-            .map(|count| (count + max_rows - 1) >> k)
+            .map(|count| (count + max_rows - 1) / max_rows)
             .collect::<Vec<_>>();
 
         let total_fixed: usize = self
@@ -87,7 +87,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
             .iter()
             .map(|threads| threads.iter().map(|ctx| ctx.constants.len()).sum::<usize>())
             .sum();
-        let num_fixed = (total_fixed + max_rows - 1) >> k;
+        let num_fixed = (total_fixed + (1 << k) - 1) >> k;
 
         let params = FlexGateConfigParams {
             strategy: GateStrategy::Vertical,
@@ -145,7 +145,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     }
                     let basic_gate = config.basic_gates[phase]
                         .get(gate_index)
-                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}"));
+                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
                     let column = basic_gate.value;
                     let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
                     #[cfg(feature = "halo2-axiom")]
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index 57b673ef..dd49fa7e 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -91,7 +91,7 @@ fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
     chip.idx_to_indicator(ctx, Constant(F::from(3u64)), 4);
 
     let bits = ctx.assign_witnesses([F::zero(), F::one()]);
-    //chip.bits_to_indicator(ctx, &bits);
+    chip.bits_to_indicator(ctx, &bits);
 }
 
 #[test]
@@ -102,7 +102,7 @@ fn test_gates() {
     gate_tests(builder.main(0), inputs);
 
     // auto-tune circuit
-    builder.config(k);
+    builder.config(k, Some(9));
     // create circuit
     let circuit =
         MyCircuit { inputs, builder: RefCell::new(builder), break_points: RefCell::default() };

From 4abc9cd2abd74c7d9bc9307196d75dd59ac0c692 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 15:31:16 -0800
Subject: [PATCH 03/26] update: working benches for `mul` and `inner_product`

---
 halo2-base/benches/inner_product.rs |  95 +++------
 halo2-base/benches/mul.rs           | 112 +++-------
 halo2-base/src/gates/flex_gate.rs   |  12 +-
 halo2-base/src/gates/mod.rs         | 306 +---------------------------
 halo2-base/src/gates/tests.rs       |  81 +-------
 5 files changed, 71 insertions(+), 535 deletions(-)

diff --git a/halo2-base/benches/inner_product.rs b/halo2-base/benches/inner_product.rs
index e5fec21c..e43672b7 100644
--- a/halo2-base/benches/inner_product.rs
+++ b/halo2-base/benches/inner_product.rs
@@ -1,9 +1,7 @@
 #![allow(unused_imports)]
 #![allow(unused_variables)]
-use halo2_base::gates::{
-    flex_gate::{FlexGateConfig, GateStrategy},
-    GateInstructions,
-};
+use halo2_base::gates::builder::{GateCircuitBuilder, GateThreadBuilder};
+use halo2_base::gates::flex_gate::{FlexGateConfig, GateChip, GateInstructions, GateStrategy};
 use halo2_base::halo2_proofs::{
     arithmetic::Field,
     circuit::*,
@@ -16,7 +14,12 @@ use halo2_base::halo2_proofs::{
     },
     transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
-use halo2_base::{Context, ContextParams, QuantumCell::Witness, SKIP_FIRST_PASS};
+use halo2_base::utils::ScalarField;
+use halo2_base::{
+    Context,
+    QuantumCell::{Existing, Witness},
+    SKIP_FIRST_PASS,
+};
 use itertools::Itertools;
 use rand::rngs::OsRng;
 use std::marker::PhantomData;
@@ -28,82 +31,48 @@ use pprof::criterion::{Output, PProfProfiler};
 // Thanks to the example provided by @jebbow in his article
 // https://www.jibbow.com/posts/criterion-flamegraphs/
 
-#[derive(Clone, Default)]
-struct MyCircuit<F> {
-    _marker: PhantomData<F>,
-}
-
-const NUM_ADVICE: usize = 1;
 const K: u32 = 19;
 
-impl Circuit<Fr> for MyCircuit<Fr> {
-    type Config = FlexGateConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
+fn inner_prod_bench<F: ScalarField>(ctx: &mut Context<F>, a: Vec<F>, b: Vec<F>) {
+    assert_eq!(a.len(), b.len());
+    let a = ctx.assign_witnesses(a);
+    let b = ctx.assign_witnesses(b);
 
-    fn without_witnesses(&self) -> Self {
-        Self::default()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        FlexGateConfig::configure(meta, GateStrategy::Vertical, &[NUM_ADVICE], 1, 0, K as usize)
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "gate",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = Context::new(
-                    region,
-                    ContextParams {
-                        max_rows: config.max_rows,
-                        num_context_ids: 1,
-                        fixed_columns: config.constants.clone(),
-                    },
-                );
-                let ctx = &mut aux;
-
-                let a = (0..5).map(|_| Witness(Value::known(Fr::random(OsRng)))).collect_vec();
-                let b = (0..5).map(|_| Witness(Value::known(Fr::random(OsRng)))).collect_vec();
-
-                for _ in 0..(1 << K) / 16 - 10 {
-                    config.inner_product(ctx, a.clone(), b.clone());
-                }
-
-                Ok(())
-            },
-        )
+    let chip = GateChip::default();
+    for _ in 0..(1 << K) / 16 - 10 {
+        chip.inner_product(ctx, a.clone(), b.clone().into_iter().map(Existing));
     }
 }
 
 fn bench(c: &mut Criterion) {
-    let circuit = MyCircuit::<Fr> { _marker: PhantomData };
+    // create circuit for keygen
+    let mut builder = GateThreadBuilder::new(false);
+    inner_prod_bench(builder.main(0), vec![Fr::zero(); 5], vec![Fr::zero(); 5]);
+    builder.config(K as usize, Some(20));
+    let circuit = GateCircuitBuilder::mock(builder);
 
+    // check the circuit is correct just in case
     MockProver::run(K, &circuit, vec![]).unwrap().assert_satisfied();
 
     let params = ParamsKZG::<Bn256>::setup(K, OsRng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
 
+    let break_points = circuit.break_points.take();
+
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
     group.bench_with_input(
         BenchmarkId::new("inner_product", K),
         &(&params, &pk),
-        |b, &(params, pk)| {
-            b.iter(|| {
-                let circuit = MyCircuit::<Fr> { _marker: PhantomData };
-                let rng = OsRng;
+        |bencher, &(params, pk)| {
+            bencher.iter(|| {
+                let mut builder = GateThreadBuilder::new(true);
+                let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
+                let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
+                inner_prod_bench(builder.main(0), a, b);
+                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
                     KZGCommitmentScheme<Bn256>,
@@ -112,7 +81,7 @@ fn bench(c: &mut Criterion) {
                     _,
                     Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
                     _,
-                >(params, pk, &[circuit], &[&[]], rng, &mut transcript)
+                >(params, pk, &[circuit], &[&[]], OsRng, &mut transcript)
                 .expect("prover should not fail");
             })
         },
diff --git a/halo2-base/benches/mul.rs b/halo2-base/benches/mul.rs
index 6698ae99..97514e47 100644
--- a/halo2-base/benches/mul.rs
+++ b/halo2-base/benches/mul.rs
@@ -1,9 +1,7 @@
-use halo2_base::gates::{
-    flex_gate::{FlexGateConfig, GateStrategy},
-    GateInstructions,
-};
+use ff::Field;
+use halo2_base::gates::builder::{GateCircuitBuilder, GateThreadBuilder};
+use halo2_base::gates::flex_gate::{GateChip, GateInstructions};
 use halo2_base::halo2_proofs::{
-    circuit::*,
     halo2curves::bn256::{Bn256, Fr, G1Affine},
     plonk::*,
     poly::kzg::{
@@ -12,11 +10,8 @@ use halo2_base::halo2_proofs::{
     },
     transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
-use halo2_base::{
-    Context, ContextParams,
-    QuantumCell::{Existing, Witness},
-    SKIP_FIRST_PASS,
-};
+use halo2_base::utils::ScalarField;
+use halo2_base::Context;
 use rand::rngs::OsRng;
 
 use criterion::{criterion_group, criterion_main};
@@ -26,92 +21,43 @@ use pprof::criterion::{Output, PProfProfiler};
 // Thanks to the example provided by @jebbow in his article
 // https://www.jibbow.com/posts/criterion-flamegraphs/
 
-#[derive(Clone, Default)]
-struct MyCircuit<F> {
-    a: Value<F>,
-    b: Value<F>,
-    c: Value<F>,
-}
-
-const NUM_ADVICE: usize = 1;
 const K: u32 = 9;
 
-impl Circuit<Fr> for MyCircuit<Fr> {
-    type Config = FlexGateConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
+fn mul_bench<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 2]) {
+    let [a, b]: [_; 2] = ctx.assign_witnesses(inputs).try_into().unwrap();
+    let chip = GateChip::default();
 
-    fn without_witnesses(&self) -> Self {
-        Self::default()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        FlexGateConfig::configure(meta, GateStrategy::PlonkPlus, &[NUM_ADVICE], 1, 0, K as usize)
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "gate",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = Context::new(
-                    region,
-                    ContextParams {
-                        max_rows: config.max_rows,
-                        num_context_ids: 1,
-                        fixed_columns: config.constants.clone(),
-                    },
-                );
-                let ctx = &mut aux;
-
-                let (_a_cell, b_cell, c_cell) = {
-                    let cells = config.assign_region_smart(
-                        ctx,
-                        vec![Witness(self.a), Witness(self.b), Witness(self.c)],
-                        vec![],
-                        vec![],
-                        vec![],
-                    );
-                    (cells[0].clone(), cells[1].clone(), cells[2].clone())
-                };
-
-                for _ in 0..120 {
-                    config.mul(ctx, Existing(&c_cell), Existing(&b_cell));
-                }
-
-                Ok(())
-            },
-        )
+    for _ in 0..120 {
+        chip.mul(ctx, a, b);
     }
 }
 
 fn bench(c: &mut Criterion) {
-    let circuit = MyCircuit::<Fr> {
-        a: Value::known(Fr::from(10u64)),
-        b: Value::known(Fr::from(12u64)),
-        c: Value::known(Fr::from(120u64)),
-    };
+    // create circuit for keygen
+    let mut builder = GateThreadBuilder::new(false);
+    mul_bench(builder.main(0), [Fr::zero(); 2]);
+    builder.config(K as usize, Some(9));
+    let circuit = GateCircuitBuilder::keygen(builder);
 
     let params = ParamsKZG::<Bn256>::setup(K, OsRng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
 
+    let break_points = circuit.break_points.take();
+
+    let a = Fr::random(OsRng);
+    let b = Fr::random(OsRng);
     // native multiplication 120 times
     c.bench_with_input(
         BenchmarkId::new("native mul", K),
-        &(&params, &pk, &circuit),
-        |b, &(params, pk, circuit)| {
-            b.iter(|| {
-                let rng = OsRng;
+        &(&params, &pk, [a, b]),
+        |bencher, &(params, pk, inputs)| {
+            bencher.iter(|| {
+                let mut builder = GateThreadBuilder::new(true);
+                // do the computation
+                mul_bench(builder.main(0), inputs);
+                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
                     KZGCommitmentScheme<Bn256>,
@@ -120,8 +66,8 @@ fn bench(c: &mut Criterion) {
                     _,
                     Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
                     _,
-                >(params, pk, &[circuit.clone()], &[&[]], rng, &mut transcript)
-                .expect("prover should not fail");
+                >(params, pk, &[circuit], &[&[]], OsRng, &mut transcript)
+                .unwrap();
             })
         },
     );
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index e147eabf..c4bbc4b4 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -1,10 +1,3 @@
-use serde::{Deserialize, Serialize};
-
-use super::{
-    AssignedValue, Context,
-    QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
-};
-use crate::utils::ScalarField;
 use crate::{
     halo2_proofs::{
         plonk::{
@@ -13,8 +6,11 @@ use crate::{
         },
         poly::Rotation,
     },
-    ContextCell,
+    utils::ScalarField,
+    AssignedValue, Context, ContextCell,
+    QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
 };
+use serde::{Deserialize, Serialize};
 use std::{
     iter::{self},
     marker::PhantomData,
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index 23e6369e..4fc04edc 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -1,311 +1,7 @@
-use self::flex_gate::{FlexGateConfig, GateStrategy, MAX_PHASE};
-use super::{
-    utils::ScalarField,
-    AssignedValue, Context,
-    QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
-};
-use crate::{
-    halo2_proofs::{
-        circuit::{Layouter, Region, SimpleFloorPlanner, Value},
-        plonk::{Advice, Circuit, Column, ConstraintSystem, Error},
-    },
-    utils::{biguint_to_fe, bit_length, fe_to_biguint, PrimeField},
-    ContextCell,
-};
-use core::iter;
-use num_bigint::BigUint;
-use num_integer::Integer;
-use num_traits::{One, Zero};
-use serde::{Deserialize, Serialize};
-use std::{collections::HashMap, ops::Shl, rc::Rc};
-
+pub mod builder;
 pub mod flex_gate;
 // pub mod range;
 
-type ThreadBreakPoints = Vec<usize>;
-type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
-
-#[derive(Clone, Debug, Default)]
-pub struct GateThreadBuilder<F: ScalarField> {
-    /// Threads for each challenge phase
-    pub threads: [Vec<Context<F>>; MAX_PHASE],
-    thread_count: usize,
-    witness_gen_only: bool,
-    use_unknown: bool,
-}
-
-impl<F: ScalarField> GateThreadBuilder<F> {
-    pub fn new(witness_gen_only: bool) -> Self {
-        let mut threads = [(); MAX_PHASE].map(|_| vec![]);
-        // start with a main thread in phase 0
-        threads[0].push(Context::new(witness_gen_only, 0));
-        Self { threads, thread_count: 1, witness_gen_only, use_unknown: false }
-    }
-
-    pub fn unknown(self, use_unknown: bool) -> Self {
-        Self { use_unknown, ..self }
-    }
-
-    pub fn main(&mut self, phase: usize) -> &mut Context<F> {
-        self.threads[phase].first_mut().unwrap()
-    }
-
-    pub fn new_thread(&mut self, phase: usize) -> &mut Context<F> {
-        let thread_id = self.thread_count;
-        self.thread_count += 1;
-        self.threads[phase].push(Context::new(self.witness_gen_only, thread_id));
-        self.threads[phase].last_mut().unwrap()
-    }
-
-    /// Auto-calculate configuration parameters for the circuit
-    pub fn config(&self, k: usize, minimum_rows: Option<usize>) -> FlexGateConfigParams {
-        let max_rows = (1 << k) - minimum_rows.unwrap_or(0);
-        let total_advice_per_phase = self
-            .threads
-            .iter()
-            .map(|threads| threads.iter().map(|ctx| ctx.advice.len()).sum::<usize>())
-            .collect::<Vec<_>>();
-        // we do a rough estimate by taking ceil(advice_cells_per_phase / 2^k )
-        // if this is too small, manual configuration will be needed
-        let num_advice_per_phase = total_advice_per_phase
-            .iter()
-            .map(|count| (count + max_rows - 1) / max_rows)
-            .collect::<Vec<_>>();
-
-        let total_lookup_advice_per_phase = self
-            .threads
-            .iter()
-            .map(|threads| threads.iter().map(|ctx| ctx.cells_to_lookup.len()).sum::<usize>())
-            .collect::<Vec<_>>();
-        let num_lookup_advice_per_phase = total_lookup_advice_per_phase
-            .iter()
-            .map(|count| (count + max_rows - 1) / max_rows)
-            .collect::<Vec<_>>();
-
-        let total_fixed: usize = self
-            .threads
-            .iter()
-            .map(|threads| threads.iter().map(|ctx| ctx.constants.len()).sum::<usize>())
-            .sum();
-        let num_fixed = (total_fixed + (1 << k) - 1) >> k;
-
-        let params = FlexGateConfigParams {
-            strategy: GateStrategy::Vertical,
-            num_advice_per_phase,
-            num_lookup_advice_per_phase,
-            num_fixed,
-            k,
-        };
-        #[cfg(feature = "display")]
-        {
-            for phase in 0..MAX_PHASE {
-                if total_advice_per_phase[phase] != 0 || total_lookup_advice_per_phase[phase] != 0 {
-                    println!(
-                        "Gate Chip | Phase {}: {} advice cells , {} lookup advice cells",
-                        phase, total_advice_per_phase[phase], total_lookup_advice_per_phase[phase],
-                    );
-                }
-            }
-            println!("Total {total_fixed} fixed cells");
-            println!("Auto-calculated config params:\n {params:#?}");
-        }
-        std::env::set_var("FLEX_GATE_CONFIG_PARAMS", serde_json::to_string(&params).unwrap());
-        params
-    }
-
-    /// Assigns all advice and fixed cells, turns on selectors, imposes equality constraints.
-    /// This should only be called during keygen.
-    pub fn assign_all(
-        self,
-        config: &FlexGateConfig<F>,
-        lookup_advice: &[Vec<Column<Advice>>],
-        region: &mut Region<F>,
-    ) -> MultiPhaseThreadBreakPoints {
-        assert!(!self.witness_gen_only);
-        let use_unknown = self.use_unknown;
-        let max_rows = config.max_rows;
-        let mut break_points = vec![];
-        let mut assigned_advices = HashMap::new();
-        let mut assigned_constants = HashMap::new();
-        let mut fixed_col = 0;
-        let mut fixed_offset = 0;
-        for (phase, threads) in self.threads.into_iter().enumerate() {
-            let mut break_point = vec![];
-            let mut gate_index = 0;
-            let mut row_offset = 0;
-            let mut lookup_offset = 0;
-            let mut lookup_col = 0;
-            for ctx in threads {
-                for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
-                {
-                    if (q && row_offset + 4 > max_rows) || row_offset >= max_rows {
-                        break_point.push(row_offset);
-                        row_offset = 0;
-                        gate_index += 1;
-                    }
-                    let basic_gate = config.basic_gates[phase]
-                        .get(gate_index)
-                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
-                    let column = basic_gate.value;
-                    let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
-                    #[cfg(feature = "halo2-axiom")]
-                    let cell = *region
-                        .assign_advice(column, row_offset, value)
-                        .expect("assign_advice should not fail")
-                        .cell();
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    let cell = region
-                        .assign_advice(|| "", column, row_offset, || value)
-                        .expect("assign_advice should not fail")
-                        .cell();
-                    assigned_advices.insert((ctx.context_id, i), cell);
-
-                    if q {
-                        basic_gate
-                            .q_enable
-                            .enable(region, row_offset)
-                            .expect("enable selector should not fail");
-                    }
-                    row_offset += 1;
-                }
-                for (c, i) in ctx.constants.into_iter() {
-                    #[cfg(feature = "halo2-axiom")]
-                    let cell = region.assign_fixed(config.constants[fixed_col], fixed_offset, c);
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    let cell = region
-                        .assign_fixed(
-                            || "",
-                            config.constants[fixed_col],
-                            fixed_offset,
-                            || Value::known(c),
-                        )
-                        .unwrap()
-                        .cell();
-                    assigned_constants.insert((ctx.context_id, i), cell);
-                    fixed_col += 1;
-                    if fixed_col >= config.constants.len() {
-                        fixed_col = 0;
-                        fixed_offset += 1;
-                    }
-                }
-
-                for (left, right) in ctx.advice_equality_constraints {
-                    let left = assigned_advices[&(left.context_id, left.offset)];
-                    let right = assigned_advices[&(right.context_id, right.offset)];
-                    #[cfg(feature = "halo2-axiom")]
-                    region.constrain_equal(&left, &right);
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    region.constrain_equal(left, right).unwrap();
-                }
-                for (left, right) in ctx.constant_equality_constraints {
-                    let left = assigned_constants[&(left.context_id, left.offset)];
-                    let right = assigned_advices[&(right.context_id, right.offset)];
-                    #[cfg(feature = "halo2-axiom")]
-                    region.constrain_equal(&left, &right);
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    region.constrain_equal(left, right).unwrap();
-                }
-
-                for index in ctx.cells_to_lookup {
-                    if lookup_offset >= max_rows {
-                        lookup_offset = 0;
-                        lookup_col += 1;
-                    }
-                    let value = ctx.advice[index];
-                    let acell = assigned_advices[&(ctx.context_id, index)];
-                    let value = if use_unknown { Value::unknown() } else { Value::known(value) };
-                    let column = lookup_advice[phase][lookup_col];
-
-                    #[cfg(feature = "halo2-axiom")]
-                    {
-                        let bcell = *region
-                            .assign_advice(column, lookup_offset, value)
-                            .expect("assign_advice should not fail")
-                            .cell();
-                        region.constrain_equal(&acell, &bcell);
-                    }
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    {
-                        let bcell = region
-                            .assign_advice(|| "", column, lookup_offset, || value)
-                            .expect("assign_advice should not fail")
-                            .cell();
-                        region.constrain_equal(acell, bcell).unwrap();
-                    }
-                    lookup_offset += 1;
-                }
-            }
-            break_points.push(break_point);
-        }
-        break_points
-    }
-}
-
-/// Pure advice witness assignment in a single phase. Uses preprocessed `break_points` to determine when
-/// to split a thread into a new column.
-pub fn assign_threads_in<F: ScalarField>(
-    phase: usize,
-    threads: Vec<Context<F>>,
-    config: &FlexGateConfig<F>,
-    lookup_advice: &[Column<Advice>],
-    region: &mut Region<F>,
-    break_points: ThreadBreakPoints,
-) {
-    if config.basic_gates[phase].is_empty() {
-        assert!(threads.is_empty(), "Trying to assign threads in a phase with no columns");
-        return;
-    }
-    assert_eq!(break_points.len(), threads.len());
-
-    let mut break_points = break_points.into_iter();
-    let mut break_point = break_points.next();
-    let mut gate_index = 0;
-    let mut column = config.basic_gates[phase][gate_index].value;
-    let mut row_offset = 0;
-    let mut lookup_offset = 0;
-    let mut lookup_advice = lookup_advice.iter();
-    let mut lookup_column = lookup_advice.next();
-    for ctx in threads {
-        for index in ctx.cells_to_lookup {
-            if lookup_offset >= config.max_rows {
-                lookup_offset = 0;
-                lookup_column = lookup_advice.next();
-            }
-            let value = ctx.advice[index];
-            let column = *lookup_column.unwrap();
-            #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(column, lookup_offset, Value::known(value)).unwrap();
-            #[cfg(not(feature = "halo2-axiom"))]
-            region.assign_advice(|| "", column, lookup_offset, || Value::known(value)).unwrap();
-
-            lookup_offset += 1;
-        }
-        for advice in ctx.advice {
-            if break_point == Some(row_offset) {
-                break_point = break_points.next();
-                row_offset = 0;
-                gate_index += 1;
-                column = config.basic_gates[phase][gate_index].value;
-            }
-            #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
-            #[cfg(not(feature = "halo2-axiom"))]
-            region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
-
-            row_offset += 1;
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct FlexGateConfigParams {
-    pub strategy: GateStrategy,
-    pub k: usize,
-    pub num_advice_per_phase: Vec<usize>,
-    pub num_lookup_advice_per_phase: Vec<usize>,
-    pub num_fixed: usize,
-}
-
 /*
 pub trait RangeInstructions<F: ScalarField> {
     type Gate: GateInstructions<F>;
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index dd49fa7e..ba35dbb9 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -1,78 +1,8 @@
-use std::cell::RefCell;
-use std::rc::Rc;
-
-use super::flex_gate::{FlexGateConfig, GateChip, GateInstructions, GateStrategy, MAX_PHASE};
-use super::{
-    assign_threads_in, FlexGateConfigParams, GateThreadBuilder, MultiPhaseThreadBreakPoints,
-    ThreadBreakPoints,
-};
-use crate::halo2_proofs::{circuit::*, dev::MockProver, halo2curves::bn256::Fr, plonk::*};
+use super::builder::{GateCircuitBuilder, GateThreadBuilder};
+use super::flex_gate::{GateChip, GateInstructions};
+use crate::halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr};
 use crate::utils::ScalarField;
-use crate::{
-    Context,
-    QuantumCell::{Constant, Existing, Witness},
-    SKIP_FIRST_PASS,
-};
-
-struct MyCircuit<F: ScalarField> {
-    inputs: [F; 3],
-    builder: RefCell<GateThreadBuilder<F>>, // trick `synthesize` to take ownership of `builder`
-    break_points: RefCell<MultiPhaseThreadBreakPoints>,
-}
-
-impl<F: ScalarField> Circuit<F> for MyCircuit<F> {
-    type Config = FlexGateConfig<F>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        unimplemented!()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> FlexGateConfig<F> {
-        let FlexGateConfigParams {
-            strategy,
-            num_advice_per_phase,
-            num_lookup_advice_per_phase: _,
-            num_fixed,
-            k,
-        } = serde_json::from_str(&std::env::var("FLEX_GATE_CONFIG_PARAMS").unwrap()).unwrap();
-        FlexGateConfig::configure(meta, strategy, &num_advice_per_phase, num_fixed, k)
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "gate",
-            |mut region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-                let builder = self.builder.take();
-                if !builder.witness_gen_only {
-                    *self.break_points.borrow_mut() = builder.assign_all(&config, &[], &mut region);
-                } else {
-                    // only test first phase for now
-                    let mut threads = builder.threads.into_iter();
-                    assign_threads_in(
-                        0,
-                        threads.next().unwrap(),
-                        &config,
-                        &[],
-                        &mut region,
-                        self.break_points.borrow()[0].clone(),
-                    )
-                }
-
-                Ok(())
-            },
-        )
-    }
-}
+use crate::{Context, QuantumCell::Constant};
 
 fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
     let [a, b, c]: [_; 3] = ctx.assign_witnesses(inputs).try_into().unwrap();
@@ -104,8 +34,7 @@ fn test_gates() {
     // auto-tune circuit
     builder.config(k, Some(9));
     // create circuit
-    let circuit =
-        MyCircuit { inputs, builder: RefCell::new(builder), break_points: RefCell::default() };
+    let circuit = GateCircuitBuilder::mock(builder);
 
     MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }

From ec80e152ef7443c6ac333f5d12731b2e6750ee36 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 16:00:30 -0800
Subject: [PATCH 04/26] feat: add `test_multithread_gates`

---
 halo2-base/Cargo.toml         |  2 +-
 halo2-base/src/gates/tests.rs | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index caf56709..cf9ededf 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -11,7 +11,6 @@ num-traits = "0.2"
 rand_chacha = "0.3"
 rustc-hash = "1.1"
 ff = "0.12"
-crossbeam = "0.8.2"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 
@@ -30,6 +29,7 @@ rand = "0.8"
 pprof = { version = "0.11", features = ["criterion", "flamegraph"] }
 criterion = "0.4"
 criterion-macro = "0.4"
+rayon = "1.6.1"
 
 # memory allocation
 [target.'cfg(not(target_env = "msvc"))'.dependencies]
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index ba35dbb9..e786f5dc 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -3,6 +3,9 @@ use super::flex_gate::{GateChip, GateInstructions};
 use crate::halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr};
 use crate::utils::ScalarField;
 use crate::{Context, QuantumCell::Constant};
+use ff::Field;
+use rand::rngs::OsRng;
+use rayon::prelude::*;
 
 fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
     let [a, b, c]: [_; 3] = ctx.assign_witnesses(inputs).try_into().unwrap();
@@ -39,6 +42,32 @@ fn test_gates() {
     MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
+#[test]
+fn test_multithread_gates() {
+    let k = 6;
+    let inputs = [10u64, 12u64, 120u64].map(Fr::from);
+    let mut builder = GateThreadBuilder::new(false);
+    gate_tests(builder.main(0), inputs);
+
+    let thread_ids = (0..4).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    let new_threads = thread_ids
+        .into_par_iter()
+        .map(|id| {
+            let mut ctx = Context::new(builder.witness_gen_only(), id);
+            gate_tests(&mut ctx, [(); 3].map(|_| Fr::random(OsRng)));
+            ctx
+        })
+        .collect::<Vec<_>>();
+    builder.threads[0].extend(new_threads);
+
+    // auto-tune circuit
+    builder.config(k, Some(9));
+    // create circuit
+    let circuit = GateCircuitBuilder::mock(builder);
+
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
+}
+
 #[cfg(feature = "dev-graph")]
 #[test]
 fn plot_gates() {

From 987853e1e310a5b81b66306ad7efe88e298310ba Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 18:40:33 -0800
Subject: [PATCH 05/26] BUG: `get_last_bit` did not do an `assert_bit` check on
 the answer

* this function was not used anywhere
---
 halo2-base/src/gates/range.rs | 528 +++++++++++++++++++---------------
 1 file changed, 296 insertions(+), 232 deletions(-)

diff --git a/halo2-base/src/gates/range.rs b/halo2-base/src/gates/range.rs
index 07033ee7..bb23aa78 100644
--- a/halo2-base/src/gates/range.rs
+++ b/halo2-base/src/gates/range.rs
@@ -1,9 +1,9 @@
 use crate::{
-    gates::{
-        flex_gate::{FlexGateConfig, GateStrategy, MAX_PHASE},
-        GateInstructions,
+    gates::flex_gate::{FlexGateConfig, GateInstructions, GateStrategy, MAX_PHASE},
+    utils::{
+        biguint_to_fe, bit_length, decompose_fe_to_u64_limbs, fe_to_biguint, BigPrimeField,
+        ScalarField,
     },
-    utils::{decompose_fe_to_u64_limbs, value_to_option, ScalarField},
     AssignedValue,
     QuantumCell::{self, Constant, Existing, Witness},
 };
@@ -16,40 +16,34 @@ use crate::{
         poly::Rotation,
     },
     utils::PrimeField,
+    Context,
 };
-use std::cmp::Ordering;
+use num_bigint::BigUint;
+use num_integer::Integer;
+use num_traits::One;
+use std::{cmp::Ordering, ops::Shl};
 
-use super::{Context, RangeInstructions};
+use super::flex_gate::GateChip;
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum RangeStrategy {
     Vertical, // vanilla implementation with vertical basic gate(s)
-    // CustomVerticalShort, // vertical basic gate(s) and vertical custom range gates of length 2,3
-    PlonkPlus,
-    // CustomHorizontal, // vertical basic gate and dedicated horizontal custom gate
 }
 
 #[derive(Clone, Debug)]
 pub struct RangeConfig<F: ScalarField> {
-    // `lookup_advice` are special advice columns only used for lookups
-    //
-    // If `strategy` is `Vertical` or `CustomVertical`:
-    // * If `gate` has only 1 advice column, enable lookups for that column, in which case `lookup_advice` is empty
-    // * Otherwise, add some user-specified number of `lookup_advice` columns
-    //   * In this case, we don't even need a selector so `q_lookup` is empty
-    // If `strategy` is `CustomHorizontal`:
-    // * TODO
+    pub gate: FlexGateConfig<F>,
+    /// `lookup_advice` are special advice columns only used for lookups
+    ///
+    /// If `strategy` is `Vertical`:
+    /// * If `gate` has only 1 advice column, enable lookups for that column, in which case `lookup_advice` is empty
+    /// * Otherwise, add some user-specified number of `lookup_advice` columns
+    ///   * In this case, we don't even need a selector so `q_lookup` is empty
     pub lookup_advice: [Vec<Column<Advice>>; MAX_PHASE],
     pub q_lookup: Vec<Option<Selector>>,
     pub lookup: TableColumn,
-    pub lookup_bits: usize,
-    pub limb_bases: Vec<QuantumCell<'static, 'static, F>>,
-    // selector for custom range gate
-    // `q_range[k][i]` stores the selector for a custom range gate of length `k`
-    // pub q_range: HashMap<usize, Vec<Selector>>,
-    pub gate: FlexGateConfig<F>,
-    strategy: RangeStrategy,
-    pub context_id: usize,
+    lookup_bits: usize,
+    _strategy: RangeStrategy,
 }
 
 impl<F: ScalarField> RangeConfig<F> {
@@ -60,7 +54,6 @@ impl<F: ScalarField> RangeConfig<F> {
         num_lookup_advice: &[usize],
         num_fixed: usize,
         lookup_bits: usize,
-        context_id: usize,
         // params.k()
         circuit_degree: usize,
     ) -> Self {
@@ -71,11 +64,9 @@ impl<F: ScalarField> RangeConfig<F> {
             meta,
             match range_strategy {
                 RangeStrategy::Vertical => GateStrategy::Vertical,
-                RangeStrategy::PlonkPlus => GateStrategy::PlonkPlus,
             },
             num_advice,
             num_fixed,
-            context_id,
             circuit_degree,
         );
 
@@ -101,31 +92,17 @@ impl<F: ScalarField> RangeConfig<F> {
             }
         }
 
-        let limb_base = F::from(1u64 << lookup_bits);
-        let mut running_base = limb_base;
-        let num_bases = F::NUM_BITS as usize / lookup_bits;
-        let mut limb_bases = Vec::with_capacity(num_bases + 1);
-        limb_bases.extend([Constant(F::one()), Constant(running_base)]);
-        for _ in 2..=num_bases {
-            running_base *= &limb_base;
-            limb_bases.push(Constant(running_base));
-        }
-
-        let config = Self {
-            lookup_advice,
-            q_lookup,
-            lookup,
-            lookup_bits,
-            limb_bases,
-            gate,
-            strategy: range_strategy,
-            context_id,
-        };
+        let config =
+            Self { lookup_advice, q_lookup, lookup, lookup_bits, gate, _strategy: range_strategy };
         config.create_lookup(meta);
 
         config
     }
 
+    pub fn lookup_bits(&self) -> usize {
+        self.lookup_bits
+    }
+
     fn create_lookup(&self, meta: &mut ConstraintSystem<F>) {
         for (phase, q_l) in self.q_lookup.iter().enumerate() {
             if let Some(q) = q_l {
@@ -163,158 +140,236 @@ impl<F: ScalarField> RangeConfig<F> {
         )?;
         Ok(())
     }
+}
+
+pub trait RangeInstructions<F: ScalarField> {
+    type Gate: GateInstructions<F>;
+
+    fn gate(&self) -> &Self::Gate;
+    fn strategy(&self) -> RangeStrategy;
+
+    fn lookup_bits(&self) -> usize;
+
+    /// Constrain that `a` lies in the range [0, 2<sup>range_bits</sup>).
+    fn range_check(&self, ctx: &mut Context<F>, a: AssignedValue<F>, range_bits: usize);
+
+    fn check_less_than(
+        &self,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        num_bits: usize,
+    );
 
-    /// Call this at the end of a phase to assign cells to special columns for lookup arguments
+    /// Checks that `a` is in `[0, b)`.
     ///
-    /// returns total number of lookup cells assigned
-    pub fn finalize(&self, ctx: &mut Context<'_, F>) -> usize {
-        ctx.copy_and_lookup_cells(self.lookup_advice[ctx.current_phase].clone())
+    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `bit_length(b)` bits.
+    fn check_less_than_safe(&self, ctx: &mut Context<F>, a: AssignedValue<F>, b: u64) {
+        let range_bits =
+            (bit_length(b) + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
+
+        self.range_check(ctx, a, range_bits);
+        self.check_less_than(ctx, a, Constant(self.gate().get_field_element(b)), range_bits)
     }
 
-    /// assuming this is called when ctx.region is not in shape mode
-    /// `offset` is the offset of the cell in `ctx.region`
-    /// `offset` is only used if there is a single advice column
-    fn enable_lookup<'a>(&self, ctx: &mut Context<'a, F>, acell: AssignedValue<'a, F>) {
-        let phase = ctx.current_phase();
-        if let Some(q) = &self.q_lookup[phase] {
-            q.enable(&mut ctx.region, acell.row()).expect("enable selector should not fail");
-        } else {
-            ctx.cells_to_lookup.push(acell);
-        }
+    /// Checks that `a` is in `[0, b)`.
+    ///
+    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `bit_length(b)` bits.
+    fn check_big_less_than_safe(&self, ctx: &mut Context<F>, a: AssignedValue<F>, b: BigUint)
+    where
+        F: BigPrimeField,
+    {
+        let range_bits =
+            (b.bits() as usize + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
+
+        self.range_check(ctx, a, range_bits);
+        self.check_less_than(ctx, a, Constant(biguint_to_fe(&b)), range_bits)
     }
 
-    // returns the limbs
-    fn range_check_simple<'a>(
+    /// Returns whether `a` is in `[0, b)`.
+    ///
+    /// Warning: This may fail silently if `a` or `b` have more than `num_bits` bits
+    fn is_less_than(
         &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        range_bits: usize,
-        limbs_assigned: &mut Vec<AssignedValue<'a, F>>,
-    ) {
-        let k = (range_bits + self.lookup_bits - 1) / self.lookup_bits;
-        // println!("range check {} bits {} len", range_bits, k);
-        let rem_bits = range_bits % self.lookup_bits;
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        num_bits: usize,
+    ) -> AssignedValue<F>;
 
-        assert!(self.limb_bases.len() >= k);
-        if k == 1 {
-            limbs_assigned.clear();
-            limbs_assigned.push(a.clone())
-        } else {
-            let acc = match value_to_option(a.value()) {
-                Some(a) => {
-                    let limbs = decompose_fe_to_u64_limbs(a, k, self.lookup_bits)
-                        .into_iter()
-                        .map(|x| Witness(Value::known(F::from(x))));
-                    self.gate.inner_product_left(
-                        ctx,
-                        limbs,
-                        self.limb_bases[..k].iter().cloned(),
-                        limbs_assigned,
-                    )
-                }
-                _ => self.gate.inner_product_left(
-                    ctx,
-                    vec![Witness(Value::unknown()); k],
-                    self.limb_bases[..k].iter().cloned(),
-                    limbs_assigned,
-                ),
-            };
-            // the inner product above must equal `a`
-            ctx.region.constrain_equal(a.cell(), acc.cell());
-        };
-        assert_eq!(limbs_assigned.len(), k);
+    /// Returns whether `a` is in `[0, b)`.
+    ///
+    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `range_bits` bits.
+    fn is_less_than_safe(
+        &self,
+        ctx: &mut Context<F>,
+        a: AssignedValue<F>,
+        b: u64,
+    ) -> AssignedValue<F> {
+        let range_bits =
+            (bit_length(b) + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
 
-        // range check all the limbs
-        for limb in limbs_assigned.iter() {
-            self.enable_lookup(ctx, limb.clone());
-        }
+        self.range_check(ctx, a, range_bits);
+        self.is_less_than(ctx, a, Constant(self.gate().get_field_element(b)), range_bits)
+    }
 
-        // additional constraints for the last limb if rem_bits != 0
-        match rem_bits.cmp(&1) {
-            // we want to check x := limbs[k-1] is boolean
-            // we constrain x*(x-1) = 0 + x * x - x == 0
-            // | 0 | x | x | x |
-            Ordering::Equal => {
-                self.gate.assert_bit(ctx, &limbs_assigned[k - 1]);
-            }
-            Ordering::Greater => {
-                let mult_val = self.gate.get_field_element(1u64 << (self.lookup_bits - rem_bits));
-                let check = self.gate.assign_region_last(
-                    ctx,
-                    vec![
-                        Constant(F::zero()),
-                        Existing(&limbs_assigned[k - 1]),
-                        Constant(mult_val),
-                        Witness(limbs_assigned[k - 1].value().map(|limb| mult_val * limb)),
-                    ],
-                    vec![(0, None)],
-                );
-                self.enable_lookup(ctx, check);
-            }
-            _ => {}
-        }
+    /// Returns whether `a` is in `[0, b)`.
+    ///
+    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `range_bits` bits.
+    fn is_big_less_than_safe(
+        &self,
+        ctx: &mut Context<F>,
+        a: AssignedValue<F>,
+        b: BigUint,
+    ) -> AssignedValue<F>
+    where
+        F: BigPrimeField,
+    {
+        let range_bits =
+            (b.bits() as usize + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
+
+        self.range_check(ctx, a, range_bits);
+        self.is_less_than(ctx, a, Constant(biguint_to_fe(&b)), range_bits)
     }
 
-    /// breaks up `a` into smaller pieces to lookup and stores them in `limbs_assigned`
+    /// Returns `(c, r)` such that `a = b * c + r`.
     ///
-    /// this is an internal function to avoid memory re-allocation of `limbs_assigned`
-    pub fn range_check_limbs<'a>(
+    /// Assumes that `b != 0`.
+    fn div_mod(
         &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        range_bits: usize,
-        limbs_assigned: &mut Vec<AssignedValue<'a, F>>,
-    ) {
-        assert_ne!(range_bits, 0);
-        #[cfg(feature = "display")]
-        {
-            let key = format!(
-                "range check length {}",
-                (range_bits + self.lookup_bits - 1) / self.lookup_bits
-            );
-            let count = ctx.op_count.entry(key).or_insert(0);
-            *count += 1;
-        }
-        match self.strategy {
-            RangeStrategy::Vertical | RangeStrategy::PlonkPlus => {
-                self.range_check_simple(ctx, a, range_bits, limbs_assigned)
-            }
-        }
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<BigUint>,
+        a_num_bits: usize,
+    ) -> (AssignedValue<F>, AssignedValue<F>)
+    where
+        F: PrimeField,
+    {
+        let a = a.into();
+        let b = b.into();
+        let a_val = fe_to_biguint(a.value());
+        let (div, rem) = a_val.div_mod_floor(&b);
+        let [div, rem] = [div, rem].map(|v| biguint_to_fe(&v));
+        ctx.assign_region(
+            vec![Witness(rem), Constant(biguint_to_fe(&b)), Witness(div), a],
+            vec![0],
+        );
+        let rem = ctx.get(-4);
+        let div = ctx.get(-2);
+        self.check_big_less_than_safe(
+            ctx,
+            div,
+            BigUint::one().shl(a_num_bits as u32) / &b + BigUint::one(),
+        );
+        self.check_big_less_than_safe(ctx, rem, b);
+        (div, rem)
     }
 
-    /// assume `a` has been range checked already to `limb_bits` bits
-    pub fn get_last_bit<'a>(
+    /// Returns `(c, r)` such that `a = b * c + r`.
+    ///
+    /// Assumes that `b != 0`.
+    ///
+    /// Let `X = 2 ** b_num_bits`.
+    /// Write `a = a1 * X + a0` and `c = c1 * X + c0`.
+    /// If we write `b * c0 + r = d1 * X + d0` then
+    ///     `b * c + r = (b * c1 + d1) * X + d0`
+    fn div_mod_var(
         &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
+        a_num_bits: usize,
+        b_num_bits: usize,
+    ) -> (AssignedValue<F>, AssignedValue<F>)
+    where
+        F: BigPrimeField,
+    {
+        let a = a.into();
+        let b = b.into();
+        let a_val = fe_to_biguint(a.value());
+        let b_val = fe_to_biguint(b.value());
+        let (div, rem) = a_val.div_mod_floor(&b_val);
+        let x = BigUint::one().shl(b_num_bits as u32);
+        let (div_hi, div_lo) = div.div_mod_floor(&x);
+
+        let x_fe = self.gate().pow_of_two()[b_num_bits];
+        let [div, div_hi, div_lo, rem] = [div, div_hi, div_lo, rem].map(|v| biguint_to_fe(&v));
+        ctx.assign_region(
+            vec![Witness(div_lo), Witness(div_hi), Constant(x_fe), Witness(div), Witness(rem)],
+            vec![0],
+        );
+        let [div_lo, div_hi, div, rem] = [-5, -4, -2, -1].map(|i| ctx.get(i));
+        self.range_check(ctx, div_lo, b_num_bits);
+        self.range_check(ctx, div_hi, a_num_bits.saturating_sub(b_num_bits));
+
+        let (bcr0_hi, bcr0_lo) = {
+            let bcr0 = self.gate().mul_add(ctx, b, Existing(div_lo), Existing(rem));
+            self.div_mod(ctx, Existing(bcr0), x.clone(), a_num_bits)
+        };
+        let bcr_hi = self.gate().mul_add(ctx, b, Existing(div_hi), Existing(bcr0_hi));
+
+        let (a_hi, a_lo) = self.div_mod(ctx, a, x, a_num_bits);
+        ctx.constrain_equal(&bcr_hi, &a_hi);
+        ctx.constrain_equal(&bcr0_lo, &a_lo);
+
+        self.range_check(ctx, rem, b_num_bits);
+        self.check_less_than(ctx, Existing(rem), b, b_num_bits);
+        (div, rem)
+    }
+
+    /// Assume `a` has been range checked already to `limb_bits` bits
+    fn get_last_bit(
+        &self,
+        ctx: &mut Context<F>,
+        a: AssignedValue<F>,
         limb_bits: usize,
-    ) -> AssignedValue<'a, F> {
+    ) -> AssignedValue<F> {
         let a_v = a.value();
-        let bit_v = a_v.map(|a| {
-            let a = a.get_lower_32();
-            if a ^ 1 == 0 {
-                F::zero()
-            } else {
-                F::one()
-            }
+        let bit_v = {
+            let a = a_v.get_lower_32();
+            F::from(a ^ 1 != 0)
+        };
+        let two = self.gate().get_field_element(2u64);
+        let h_v = (*a_v - bit_v) * two.invert().unwrap();
+        ctx.assign_region(vec![Witness(bit_v), Witness(h_v), Constant(two), Existing(a)], vec![0]);
+
+        let half = ctx.get(-3);
+        self.range_check(ctx, half, limb_bits - 1);
+        let bit = ctx.get(-4);
+        self.gate().assert_bit(ctx, bit);
+        bit
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RangeChip<F: ScalarField> {
+    strategy: RangeStrategy,
+    pub gate: GateChip<F>,
+    pub lookup_bits: usize,
+    pub limb_bases: Vec<QuantumCell<F>>,
+}
+
+impl<F: ScalarField> RangeChip<F> {
+    pub fn new(strategy: RangeStrategy, lookup_bits: usize) -> Self {
+        let limb_base = F::from(1u64 << lookup_bits);
+        let mut running_base = limb_base;
+        let num_bases = F::NUM_BITS as usize / lookup_bits;
+        let mut limb_bases = Vec::with_capacity(num_bases + 1);
+        limb_bases.extend([Constant(F::one()), Constant(running_base)]);
+        for _ in 2..=num_bases {
+            running_base *= &limb_base;
+            limb_bases.push(Constant(running_base));
+        }
+        let gate = GateChip::new(match strategy {
+            RangeStrategy::Vertical => GateStrategy::Vertical,
         });
-        let two = self.gate.get_field_element(2u64);
-        let h_v = a.value().zip(bit_v).map(|(a, b)| (*a - b) * two.invert().unwrap());
-        let assignments = self.gate.assign_region_smart(
-            ctx,
-            vec![Witness(bit_v), Witness(h_v), Constant(two), Existing(a)],
-            vec![0],
-            vec![],
-            vec![],
-        );
 
-        self.range_check(ctx, &assignments[1], limb_bits - 1);
-        assignments.into_iter().next().unwrap()
+        Self { strategy, gate, lookup_bits, limb_bases }
     }
 }
 
-impl<F: ScalarField> RangeInstructions<F> for RangeConfig<F> {
-    type Gate = FlexGateConfig<F>;
+impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
+    type Gate = GateChip<F>;
 
     fn gate(&self) -> &Self::Gate {
         &self.gate
@@ -327,28 +382,63 @@ impl<F: ScalarField> RangeInstructions<F> for RangeConfig<F> {
         self.lookup_bits
     }
 
-    fn range_check<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        range_bits: usize,
-    ) {
-        let tmp = ctx.preallocated_vec_to_assign();
-        self.range_check_limbs(ctx, a, range_bits, &mut tmp.as_ref().borrow_mut());
+    fn range_check(&self, ctx: &mut Context<F>, a: AssignedValue<F>, range_bits: usize) {
+        // the number of limbs
+        let k = (range_bits + self.lookup_bits - 1) / self.lookup_bits;
+        // println!("range check {} bits {} len", range_bits, k);
+        let rem_bits = range_bits % self.lookup_bits;
+
+        debug_assert!(self.limb_bases.len() >= k);
+
+        if k == 1 {
+            ctx.cells_to_lookup.push(a);
+        } else {
+            let limbs = decompose_fe_to_u64_limbs(a.value(), k, self.lookup_bits)
+                .into_iter()
+                .map(|x| Witness(F::from(x)));
+            let row_offset = ctx.advice.len() as isize;
+            let acc = self.gate.inner_product(ctx, limbs, self.limb_bases[..k].to_vec());
+            // the inner product above must equal `a`
+            ctx.constrain_equal(&a, &acc);
+            // we fetch the cells to lookup by getting the indices where `limbs` were assigned in `inner_product`. Because `limb_bases[0]` is 1, the progression of indices is 0,1,4,...,4+3*i
+            ctx.cells_to_lookup.push(ctx.get(row_offset));
+            for i in 0..k - 1 {
+                ctx.cells_to_lookup.push(ctx.get(row_offset + 1 + 3 * i as isize));
+            }
+        };
+
+        // additional constraints for the last limb if rem_bits != 0
+        match rem_bits.cmp(&1) {
+            // we want to check x := limbs[k-1] is boolean
+            // we constrain x*(x-1) = 0 + x * x - x == 0
+            // | 0 | x | x | x |
+            Ordering::Equal => {
+                self.gate.assert_bit(ctx, *ctx.cells_to_lookup.last().unwrap());
+            }
+            Ordering::Greater => {
+                let mult_val = self.gate.pow_of_two[self.lookup_bits - rem_bits];
+                let check =
+                    self.gate.mul(ctx, *ctx.cells_to_lookup.last().unwrap(), Constant(mult_val));
+                ctx.cells_to_lookup.push(check);
+            }
+            _ => {}
+        }
     }
 
     /// Warning: This may fail silently if a or b have more than num_bits
-    fn check_less_than<'a>(
+    fn check_less_than(
         &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: QuantumCell<'_, 'a, F>,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
         num_bits: usize,
     ) {
+        let a = a.into();
+        let b = b.into();
         let pow_of_two = self.gate.pow_of_two[num_bits];
         let check_cell = match self.strategy {
             RangeStrategy::Vertical => {
-                let shift_a_val = a.value().map(|av| pow_of_two + av);
+                let shift_a_val = pow_of_two + a.value();
                 // | a + 2^(num_bits) - b | b | 1 | a + 2^(num_bits) | - 2^(num_bits) | 1 | a |
                 let cells = vec![
                     Witness(shift_a_val - b.value()),
@@ -359,47 +449,34 @@ impl<F: ScalarField> RangeInstructions<F> for RangeConfig<F> {
                     Constant(F::one()),
                     a,
                 ];
-                let assigned_cells =
-                    self.gate.assign_region(ctx, cells, vec![(0, None), (3, None)]);
-                assigned_cells.into_iter().next().unwrap()
-            }
-            RangeStrategy::PlonkPlus => {
-                // | a | 1 | b | a + 2^{num_bits} - b |
-                // selectors:
-                // | 1 | 0 | 0 |
-                // | 0 | 2^{num_bits} | -1 |
-                let out_val = Value::known(pow_of_two) + a.value() - b.value();
-                let assigned_cells = self.gate.assign_region(
-                    ctx,
-                    vec![a, Constant(F::one()), b, Witness(out_val)],
-                    vec![(0, Some([F::zero(), pow_of_two, -F::one()]))],
-                );
-                assigned_cells.into_iter().nth(3).unwrap()
+                ctx.assign_region(cells, vec![0, 3]);
+                ctx.get(-7)
             }
         };
 
-        self.range_check(ctx, &check_cell, num_bits);
+        self.range_check(ctx, check_cell, num_bits);
     }
 
     /// Warning: This may fail silently if a or b have more than num_bits
-    fn is_less_than<'a>(
+    fn is_less_than(
         &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: QuantumCell<'_, 'a, F>,
+        ctx: &mut Context<F>,
+        a: impl Into<QuantumCell<F>>,
+        b: impl Into<QuantumCell<F>>,
         num_bits: usize,
-    ) -> AssignedValue<'a, F> {
-        // TODO: optimize this for PlonkPlus strategy
+    ) -> AssignedValue<F> {
+        let a = a.into();
+        let b = b.into();
+
         let k = (num_bits + self.lookup_bits - 1) / self.lookup_bits;
         let padded_bits = k * self.lookup_bits;
         let pow_padded = self.gate.pow_of_two[padded_bits];
 
-        let shift_a_val = a.value().map(|av| pow_padded + av);
+        let shift_a_val = pow_padded + a.value();
         let shifted_val = shift_a_val - b.value();
         let shifted_cell = match self.strategy {
             RangeStrategy::Vertical => {
-                let assignments = self.gate.assign_region_smart(
-                    ctx,
+                ctx.assign_region(
                     vec![
                         Witness(shifted_val),
                         b,
@@ -410,28 +487,15 @@ impl<F: ScalarField> RangeInstructions<F> for RangeConfig<F> {
                         a,
                     ],
                     vec![0, 3],
-                    vec![],
-                    vec![],
                 );
-                assignments.into_iter().next().unwrap()
+                ctx.get(-7)
             }
-            RangeStrategy::PlonkPlus => self.gate.assign_region_last(
-                ctx,
-                vec![a, Constant(pow_padded), b, Witness(shifted_val)],
-                vec![(0, Some([F::zero(), F::one(), -F::one()]))],
-            ),
         };
 
         // check whether a - b + 2^padded_bits < 2^padded_bits ?
         // since assuming a, b < 2^padded_bits we are guaranteed a - b + 2^padded_bits < 2^{padded_bits + 1}
-        let limbs = ctx.preallocated_vec_to_assign();
-        self.range_check_limbs(
-            ctx,
-            &shifted_cell,
-            padded_bits + self.lookup_bits,
-            &mut limbs.borrow_mut(),
-        );
-        let res = self.gate().is_zero(ctx, limbs.borrow().get(k).unwrap());
-        res
+        self.range_check(ctx, shifted_cell, padded_bits + self.lookup_bits);
+        // ctx.cells_to_lookup.last() will have the (k + 1)-th limb of `a - b + 2^{k * limb_bits}`, which is zero iff `a < b`
+        self.gate.is_zero(ctx, *ctx.cells_to_lookup.last().unwrap())
     }
 }

From 631a71e3d71b0716ce9491f907700e167531fbe3 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 19:37:35 -0800
Subject: [PATCH 06/26] fix: `builder::assign_*` was not handling cases where
 two gates overlap and there is a break point in that overlap * we need to
 copy a cell between columns to fix

---
 halo2-base/src/gates/builder.rs | 519 ++++++++++++++++++++++++++++++++
 1 file changed, 519 insertions(+)
 create mode 100644 halo2-base/src/gates/builder.rs

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
new file mode 100644
index 00000000..c74369a4
--- /dev/null
+++ b/halo2-base/src/gates/builder.rs
@@ -0,0 +1,519 @@
+use super::{
+    flex_gate::{FlexGateConfig, GateStrategy, MAX_PHASE},
+    range::{RangeConfig, RangeStrategy},
+};
+use crate::{
+    halo2_proofs::{
+        circuit::{Layouter, Region, SimpleFloorPlanner, Value},
+        plonk::{Advice, Circuit, Column, ConstraintSystem, Error, Selector},
+    },
+    utils::ScalarField,
+    Context, SKIP_FIRST_PASS,
+};
+use serde::{Deserialize, Serialize};
+use std::{cell::RefCell, collections::HashMap};
+
+type ThreadBreakPoints = Vec<usize>;
+type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
+
+#[derive(Clone, Debug, Default)]
+pub struct GateThreadBuilder<F: ScalarField> {
+    /// Threads for each challenge phase
+    pub threads: [Vec<Context<F>>; MAX_PHASE],
+    thread_count: usize,
+    witness_gen_only: bool,
+    use_unknown: bool,
+}
+
+impl<F: ScalarField> GateThreadBuilder<F> {
+    pub fn new(witness_gen_only: bool) -> Self {
+        let mut threads = [(); MAX_PHASE].map(|_| vec![]);
+        // start with a main thread in phase 0
+        threads[0].push(Context::new(witness_gen_only, 0));
+        Self { threads, thread_count: 1, witness_gen_only, use_unknown: false }
+    }
+
+    pub fn unknown(self, use_unknown: bool) -> Self {
+        Self { use_unknown, ..self }
+    }
+
+    pub fn main(&mut self, phase: usize) -> &mut Context<F> {
+        self.threads[phase].first_mut().unwrap()
+    }
+
+    pub fn witness_gen_only(&self) -> bool {
+        self.witness_gen_only
+    }
+
+    pub fn thread_count(&self) -> usize {
+        self.thread_count
+    }
+
+    pub fn get_new_thread_id(&mut self) -> usize {
+        let thread_id = self.thread_count;
+        self.thread_count += 1;
+        thread_id
+    }
+
+    pub fn new_thread(&mut self, phase: usize) -> &mut Context<F> {
+        let thread_id = self.thread_count;
+        self.thread_count += 1;
+        self.threads[phase].push(Context::new(self.witness_gen_only, thread_id));
+        self.threads[phase].last_mut().unwrap()
+    }
+
+    /// Auto-calculate configuration parameters for the circuit
+    pub fn config(&self, k: usize, minimum_rows: Option<usize>) -> FlexGateConfigParams {
+        let max_rows = (1 << k) - minimum_rows.unwrap_or(0);
+        let total_advice_per_phase = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.advice.len()).sum::<usize>())
+            .collect::<Vec<_>>();
+        // we do a rough estimate by taking ceil(advice_cells_per_phase / 2^k )
+        // if this is too small, manual configuration will be needed
+        let num_advice_per_phase = total_advice_per_phase
+            .iter()
+            .map(|count| (count + max_rows - 1) / max_rows)
+            .collect::<Vec<_>>();
+
+        let total_lookup_advice_per_phase = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.cells_to_lookup.len()).sum::<usize>())
+            .collect::<Vec<_>>();
+        let num_lookup_advice_per_phase = total_lookup_advice_per_phase
+            .iter()
+            .map(|count| (count + max_rows - 1) / max_rows)
+            .collect::<Vec<_>>();
+
+        let total_fixed: usize = self
+            .threads
+            .iter()
+            .map(|threads| threads.iter().map(|ctx| ctx.constants.len()).sum::<usize>())
+            .sum();
+        let num_fixed = (total_fixed + (1 << k) - 1) >> k;
+
+        let params = FlexGateConfigParams {
+            strategy: GateStrategy::Vertical,
+            num_advice_per_phase,
+            num_lookup_advice_per_phase,
+            num_fixed,
+            k,
+        };
+        #[cfg(feature = "display")]
+        {
+            for phase in 0..MAX_PHASE {
+                if total_advice_per_phase[phase] != 0 || total_lookup_advice_per_phase[phase] != 0 {
+                    println!(
+                        "Gate Chip | Phase {}: {} advice cells , {} lookup advice cells",
+                        phase, total_advice_per_phase[phase], total_lookup_advice_per_phase[phase],
+                    );
+                }
+            }
+            println!("Total {total_fixed} fixed cells");
+            println!("Auto-calculated config params:\n {params:#?}");
+        }
+        std::env::set_var("FLEX_GATE_CONFIG_PARAMS", serde_json::to_string(&params).unwrap());
+        params
+    }
+
+    /// Assigns all advice and fixed cells, turns on selectors, imposes equality constraints.
+    /// This should only be called during keygen.
+    pub fn assign_all(
+        self,
+        config: &FlexGateConfig<F>,
+        lookup_advice: &[Vec<Column<Advice>>],
+        q_lookup: &[Option<Selector>],
+        region: &mut Region<F>,
+    ) -> MultiPhaseThreadBreakPoints {
+        assert!(!self.witness_gen_only);
+        let use_unknown = self.use_unknown;
+        let max_rows = config.max_rows;
+        let mut break_points = vec![];
+        let mut assigned_advices = HashMap::new();
+        let mut assigned_constants = HashMap::new();
+        let mut fixed_col = 0;
+        let mut fixed_offset = 0;
+        for (phase, threads) in self.threads.into_iter().enumerate() {
+            let mut break_point = vec![];
+            let mut gate_index = 0;
+            let mut row_offset = 0;
+            let mut lookup_offset = 0;
+            let mut lookup_col = 0;
+            for ctx in threads {
+                let mut basic_gate = config.basic_gates[phase]
+                        .get(gate_index)
+                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
+
+                for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
+                {
+                    let column = basic_gate.value;
+                    let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
+                    #[cfg(feature = "halo2-axiom")]
+                    let cell = *region.assign_advice(column, row_offset, value).unwrap().cell();
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    let cell =
+                        region.assign_advice(|| "", column, row_offset, || value).unwrap().cell();
+                    assigned_advices.insert((ctx.context_id, i), (cell, row_offset));
+
+                    if (q && row_offset + 4 > max_rows) || row_offset >= max_rows - 1 {
+                        break_point.push(row_offset);
+                        row_offset = 0;
+                        gate_index += 1;
+
+                        // when there is a break point, because we may have two gates that overlap at the current cell, we must copy the current cell to the next column for safety
+                        basic_gate = config.basic_gates[phase]
+                        .get(gate_index)
+                        .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
+                        let column = basic_gate.value;
+
+                        #[cfg(feature = "halo2-axiom")]
+                        {
+                            let ncell =
+                                *region.assign_advice(column, row_offset, value).unwrap().cell();
+                            region.constrain_equal(&ncell, &cell);
+                        }
+                        #[cfg(not(feature = "halo2-axiom"))]
+                        {
+                            let ncell = region
+                                .assign_advice(|| "", column, row_offset, || value)
+                                .unwrap()
+                                .cell();
+                            region.constrain_equal(ncell, cell).unwrap();
+                        }
+                    }
+
+                    if q {
+                        basic_gate
+                            .q_enable
+                            .enable(region, row_offset)
+                            .expect("enable selector should not fail");
+                    }
+
+                    row_offset += 1;
+                }
+                for (c, i) in ctx.constants.into_iter() {
+                    #[cfg(feature = "halo2-axiom")]
+                    let cell = region.assign_fixed(config.constants[fixed_col], fixed_offset, c);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    let cell = region
+                        .assign_fixed(
+                            || "",
+                            config.constants[fixed_col],
+                            fixed_offset,
+                            || Value::known(c),
+                        )
+                        .unwrap()
+                        .cell();
+                    assigned_constants.insert((ctx.context_id, i), cell);
+                    fixed_col += 1;
+                    if fixed_col >= config.constants.len() {
+                        fixed_col = 0;
+                        fixed_offset += 1;
+                    }
+                }
+
+                for (left, right) in ctx.advice_equality_constraints {
+                    let (left, _) = assigned_advices[&(left.context_id, left.offset)];
+                    let (right, _) = assigned_advices[&(right.context_id, right.offset)];
+                    #[cfg(feature = "halo2-axiom")]
+                    region.constrain_equal(&left, &right);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    region.constrain_equal(left, right).unwrap();
+                }
+                for (left, right) in ctx.constant_equality_constraints {
+                    let left = assigned_constants[&(left.context_id, left.offset)];
+                    let (right, _) = assigned_advices[&(right.context_id, right.offset)];
+                    #[cfg(feature = "halo2-axiom")]
+                    region.constrain_equal(&left, &right);
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    region.constrain_equal(left, right).unwrap();
+                }
+
+                for advice in ctx.cells_to_lookup {
+                    // if q_lookup is Some, that means there should be a single advice column and it has lookup enabled
+                    let cell = advice.cell.unwrap();
+                    let (acell, row_offset) = assigned_advices[&(cell.context_id, cell.offset)];
+                    if let Some(q_lookup) = q_lookup[phase] {
+                        assert_eq!(config.basic_gates[phase].len(), 1);
+                        q_lookup.enable(region, row_offset).unwrap();
+                        continue;
+                    }
+                    // otherwise, we copy the advice value to the special lookup_advice columns
+                    if lookup_offset >= max_rows {
+                        lookup_offset = 0;
+                        lookup_col += 1;
+                    }
+                    let value = advice.value;
+                    let value = if use_unknown { Value::unknown() } else { Value::known(value) };
+                    let column = lookup_advice[phase][lookup_col];
+
+                    #[cfg(feature = "halo2-axiom")]
+                    {
+                        let bcell = *region
+                            .assign_advice(column, lookup_offset, value)
+                            .expect("assign_advice should not fail")
+                            .cell();
+                        region.constrain_equal(&acell, &bcell);
+                    }
+                    #[cfg(not(feature = "halo2-axiom"))]
+                    {
+                        let bcell = region
+                            .assign_advice(|| "", column, lookup_offset, || value)
+                            .expect("assign_advice should not fail")
+                            .cell();
+                        region.constrain_equal(acell, bcell).unwrap();
+                    }
+                    lookup_offset += 1;
+                }
+            }
+            break_points.push(break_point);
+        }
+        break_points
+    }
+}
+
+/// Pure advice witness assignment in a single phase. Uses preprocessed `break_points` to determine when
+/// to split a thread into a new column.
+pub fn assign_threads_in<F: ScalarField>(
+    phase: usize,
+    threads: Vec<Context<F>>,
+    config: &FlexGateConfig<F>,
+    lookup_advice: &[Column<Advice>],
+    region: &mut Region<F>,
+    break_points: ThreadBreakPoints,
+) {
+    if config.basic_gates[phase].is_empty() {
+        assert!(threads.is_empty(), "Trying to assign threads in a phase with no columns");
+        return;
+    }
+
+    let mut break_points = break_points.into_iter();
+    let mut break_point = break_points.next();
+    let mut gate_index = 0;
+    let mut column = config.basic_gates[phase][gate_index].value;
+    let mut row_offset = 0;
+    let mut lookup_offset = 0;
+    let mut lookup_advice = lookup_advice.iter();
+    let mut lookup_column = lookup_advice.next();
+    for ctx in threads {
+        for advice in ctx.cells_to_lookup {
+            if lookup_offset >= config.max_rows {
+                lookup_offset = 0;
+                lookup_column = lookup_advice.next();
+            }
+            let value = advice.value;
+            let column = *lookup_column.unwrap();
+            #[cfg(feature = "halo2-axiom")]
+            region.assign_advice(column, lookup_offset, Value::known(value)).unwrap();
+            #[cfg(not(feature = "halo2-axiom"))]
+            region.assign_advice(|| "", column, lookup_offset, || Value::known(value)).unwrap();
+
+            lookup_offset += 1;
+        }
+        for advice in ctx.advice {
+            #[cfg(feature = "halo2-axiom")]
+            region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+            #[cfg(not(feature = "halo2-axiom"))]
+            region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
+
+            if break_point == Some(row_offset) {
+                break_point = break_points.next();
+                row_offset = 0;
+                gate_index += 1;
+                column = config.basic_gates[phase][gate_index].value;
+
+                #[cfg(feature = "halo2-axiom")]
+                region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+                #[cfg(not(feature = "halo2-axiom"))]
+                region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
+            }
+
+            row_offset += 1;
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FlexGateConfigParams {
+    pub strategy: GateStrategy,
+    pub k: usize,
+    pub num_advice_per_phase: Vec<usize>,
+    pub num_lookup_advice_per_phase: Vec<usize>,
+    pub num_fixed: usize,
+}
+
+/// A wrapper struct to auto-build a circuit from a `GateThreadBuilder`.
+#[derive(Clone, Debug)]
+pub struct GateCircuitBuilder<F: ScalarField> {
+    pub builder: RefCell<GateThreadBuilder<F>>, // `RefCell` is just to trick circuit `synthesize` to take ownership of the inner builder
+    pub break_points: RefCell<MultiPhaseThreadBreakPoints>, // `RefCell` allows the circuit to record break points in a keygen call of `synthesize` for use in later witness gen
+}
+
+impl<F: ScalarField> GateCircuitBuilder<F> {
+    pub fn keygen(builder: GateThreadBuilder<F>) -> Self {
+        Self { builder: RefCell::new(builder.unknown(true)), break_points: RefCell::new(vec![]) }
+    }
+
+    pub fn mock(builder: GateThreadBuilder<F>) -> Self {
+        Self { builder: RefCell::new(builder.unknown(false)), break_points: RefCell::new(vec![]) }
+    }
+
+    pub fn witness_gen(
+        builder: GateThreadBuilder<F>,
+        break_points: MultiPhaseThreadBreakPoints,
+    ) -> Self {
+        Self { builder: RefCell::new(builder), break_points: RefCell::new(break_points) }
+    }
+}
+
+impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
+    type Config = FlexGateConfig<F>;
+    type FloorPlanner = SimpleFloorPlanner;
+
+    fn without_witnesses(&self) -> Self {
+        unimplemented!()
+    }
+
+    fn configure(meta: &mut ConstraintSystem<F>) -> FlexGateConfig<F> {
+        let FlexGateConfigParams {
+            strategy,
+            num_advice_per_phase,
+            num_lookup_advice_per_phase: _,
+            num_fixed,
+            k,
+        } = serde_json::from_str(&std::env::var("FLEX_GATE_CONFIG_PARAMS").unwrap()).unwrap();
+        FlexGateConfig::configure(meta, strategy, &num_advice_per_phase, num_fixed, k)
+    }
+
+    fn synthesize(
+        &self,
+        config: Self::Config,
+        mut layouter: impl Layouter<F>,
+    ) -> Result<(), Error> {
+        let mut first_pass = SKIP_FIRST_PASS;
+        layouter.assign_region(
+            || "GateCircuitBuilder generated circuit",
+            |mut region| {
+                if first_pass {
+                    first_pass = false;
+                    return Ok(());
+                }
+                let builder = self.builder.take();
+                if !builder.witness_gen_only {
+                    *self.break_points.borrow_mut() =
+                        builder.assign_all(&config, &[], &[], &mut region);
+                } else {
+                    let break_points = self.break_points.take();
+                    for (phase, (threads, break_points)) in
+                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    {
+                        assign_threads_in(phase, threads, &config, &[], &mut region, break_points);
+                        #[cfg(feature = "halo2-axiom")]
+                        region.next_phase();
+                    }
+                }
+                Ok(())
+            },
+        )
+    }
+}
+
+/// A wrapper struct to auto-build a circuit from a `GateThreadBuilder`.
+#[derive(Clone, Debug)]
+pub struct RangeCircuitBuilder<F: ScalarField>(pub GateCircuitBuilder<F>);
+
+impl<F: ScalarField> RangeCircuitBuilder<F> {
+    pub fn keygen(builder: GateThreadBuilder<F>) -> Self {
+        Self(GateCircuitBuilder::keygen(builder))
+    }
+
+    pub fn mock(builder: GateThreadBuilder<F>) -> Self {
+        Self(GateCircuitBuilder::mock(builder))
+    }
+
+    pub fn witness_gen(
+        builder: GateThreadBuilder<F>,
+        break_points: MultiPhaseThreadBreakPoints,
+    ) -> Self {
+        Self(GateCircuitBuilder::witness_gen(builder, break_points))
+    }
+}
+
+impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
+    type Config = RangeConfig<F>;
+    type FloorPlanner = SimpleFloorPlanner;
+
+    fn without_witnesses(&self) -> Self {
+        unimplemented!()
+    }
+
+    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
+        let FlexGateConfigParams {
+            strategy,
+            num_advice_per_phase,
+            num_lookup_advice_per_phase,
+            num_fixed,
+            k,
+        } = serde_json::from_str(&std::env::var("FLEX_GATE_CONFIG_PARAMS").unwrap()).unwrap();
+        let strategy = match strategy {
+            GateStrategy::Vertical => RangeStrategy::Vertical,
+        };
+        let lookup_bits = std::env::var("LOOKUP_BITS").unwrap().parse().unwrap();
+        RangeConfig::configure(
+            meta,
+            strategy,
+            &num_advice_per_phase,
+            &num_lookup_advice_per_phase,
+            num_fixed,
+            lookup_bits,
+            k,
+        )
+    }
+
+    fn synthesize(
+        &self,
+        config: Self::Config,
+        mut layouter: impl Layouter<F>,
+    ) -> Result<(), Error> {
+        config.load_lookup_table(&mut layouter).expect("load lookup table should not fail");
+
+        let mut first_pass = SKIP_FIRST_PASS;
+        layouter.assign_region(
+            || "RangeCircuitBuilder generated circuit",
+            |mut region| {
+                if first_pass {
+                    first_pass = false;
+                    return Ok(());
+                }
+                let builder = self.0.builder.take();
+                if !builder.witness_gen_only {
+                    *self.0.break_points.borrow_mut() = builder.assign_all(
+                        &config.gate,
+                        &config.lookup_advice,
+                        &config.q_lookup,
+                        &mut region,
+                    )
+                } else {
+                    let break_points = self.0.break_points.take();
+                    for (phase, (threads, break_points)) in
+                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    {
+                        assign_threads_in(
+                            phase,
+                            threads,
+                            &config.gate,
+                            &config.lookup_advice[phase],
+                            &mut region,
+                            break_points,
+                        );
+                        #[cfg(feature = "halo2-axiom")]
+                        region.next_phase();
+                    }
+                }
+                Ok(())
+            },
+        )
+    }
+}

From 22195f4cdc0ddb03d73f7d98bf69a59a088fd1d7 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 6 Feb 2023 20:37:58 -0800
Subject: [PATCH 07/26] feat: update `gates::range` to working tests and new
 API

* In keygen mode, the `CircuitBuilder` will clone the `ThreadBuilder`
  instead of `take`ing it because the same circuit is used for both vk
gen and pk gen. This could lead to more memory usage for pk gen.
---
 halo2-base/benches/inner_product.rs  |  10 +-
 halo2-base/examples/inner_product.rs |  95 +++++++
 halo2-base/src/gates/builder.rs      |  36 ++-
 halo2-base/src/gates/mod.rs          | 205 +--------------
 halo2-base/src/gates/range.rs        |  12 +-
 halo2-base/src/gates/tests.rs        | 369 +++++----------------------
 halo2-base/src/lib.rs                |   2 +-
 halo2-base/src/utils.rs              |   4 +-
 8 files changed, 202 insertions(+), 531 deletions(-)
 create mode 100644 halo2-base/examples/inner_product.rs

diff --git a/halo2-base/benches/inner_product.rs b/halo2-base/benches/inner_product.rs
index e43672b7..5d2902ae 100644
--- a/halo2-base/benches/inner_product.rs
+++ b/halo2-base/benches/inner_product.rs
@@ -45,25 +45,27 @@ fn inner_prod_bench<F: ScalarField>(ctx: &mut Context<F>, a: Vec<F>, b: Vec<F>)
 }
 
 fn bench(c: &mut Criterion) {
+    let k = 19u32;
     // create circuit for keygen
     let mut builder = GateThreadBuilder::new(false);
     inner_prod_bench(builder.main(0), vec![Fr::zero(); 5], vec![Fr::zero(); 5]);
-    builder.config(K as usize, Some(20));
+    builder.config(k as usize, Some(20));
     let circuit = GateCircuitBuilder::mock(builder);
 
     // check the circuit is correct just in case
-    MockProver::run(K, &circuit, vec![]).unwrap().assert_satisfied();
+    MockProver::run(k, &circuit, vec![]).unwrap().assert_satisfied();
 
-    let params = ParamsKZG::<Bn256>::setup(K, OsRng);
+    let params = ParamsKZG::<Bn256>::setup(k, OsRng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
 
     let break_points = circuit.break_points.take();
+    drop(circuit);
 
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
     group.bench_with_input(
-        BenchmarkId::new("inner_product", K),
+        BenchmarkId::new("inner_product", k),
         &(&params, &pk),
         |bencher, &(params, pk)| {
             bencher.iter(|| {
diff --git a/halo2-base/examples/inner_product.rs b/halo2-base/examples/inner_product.rs
new file mode 100644
index 00000000..d7976f47
--- /dev/null
+++ b/halo2-base/examples/inner_product.rs
@@ -0,0 +1,95 @@
+#![allow(unused_imports)]
+#![allow(unused_variables)]
+use halo2_base::gates::builder::{GateCircuitBuilder, GateThreadBuilder};
+use halo2_base::gates::flex_gate::{FlexGateConfig, GateChip, GateInstructions, GateStrategy};
+use halo2_base::halo2_proofs::{
+    arithmetic::Field,
+    circuit::*,
+    dev::MockProver,
+    halo2curves::bn256::{Bn256, Fr, G1Affine},
+    plonk::*,
+    poly::kzg::multiopen::VerifierSHPLONK,
+    poly::kzg::strategy::SingleStrategy,
+    poly::kzg::{
+        commitment::{KZGCommitmentScheme, ParamsKZG},
+        multiopen::ProverSHPLONK,
+    },
+    transcript::{Blake2bRead, TranscriptReadBuffer},
+    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
+};
+use halo2_base::utils::ScalarField;
+use halo2_base::{
+    Context,
+    QuantumCell::{Existing, Witness},
+    SKIP_FIRST_PASS,
+};
+use itertools::Itertools;
+use rand::rngs::OsRng;
+use std::marker::PhantomData;
+
+use criterion::{criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion};
+
+use pprof::criterion::{Output, PProfProfiler};
+// Thanks to the example provided by @jebbow in his article
+// https://www.jibbow.com/posts/criterion-flamegraphs/
+
+const K: u32 = 19;
+
+fn inner_prod_bench<F: ScalarField>(ctx: &mut Context<F>, a: Vec<F>, b: Vec<F>) {
+    assert_eq!(a.len(), b.len());
+    let a = ctx.assign_witnesses(a);
+    let b = ctx.assign_witnesses(b);
+
+    let chip = GateChip::default();
+    for _ in 0..(1 << K) / 16 - 10 {
+        chip.inner_product(ctx, a.clone(), b.clone().into_iter().map(Existing));
+    }
+}
+
+fn main() {
+    let k = 10u32;
+    // create circuit for keygen
+    let mut builder = GateThreadBuilder::new(false);
+    inner_prod_bench(builder.main(0), vec![Fr::zero(); 5], vec![Fr::zero(); 5]);
+    builder.config(k as usize, Some(20));
+    let circuit = GateCircuitBuilder::mock(builder);
+
+    // check the circuit is correct just in case
+    MockProver::run(k, &circuit, vec![]).unwrap().assert_satisfied();
+
+    let params = ParamsKZG::<Bn256>::setup(k, OsRng);
+    let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
+    let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+
+    let break_points = circuit.break_points.take();
+
+    let mut builder = GateThreadBuilder::new(true);
+    let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
+    let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
+    inner_prod_bench(builder.main(0), a, b);
+    let circuit = GateCircuitBuilder::witness_gen(builder, break_points);
+
+    let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+    create_proof::<
+        KZGCommitmentScheme<Bn256>,
+        ProverSHPLONK<'_, Bn256>,
+        Challenge255<G1Affine>,
+        _,
+        Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
+        _,
+    >(&params, &pk, &[circuit], &[&[]], OsRng, &mut transcript)
+    .expect("prover should not fail");
+
+    let strategy = SingleStrategy::new(&params);
+    let proof = transcript.finalize();
+    let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
+    verify_proof::<
+        KZGCommitmentScheme<Bn256>,
+        VerifierSHPLONK<'_, Bn256>,
+        Challenge255<G1Affine>,
+        Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
+        _,
+    >(&params, pk.get_vk(), strategy, &[&[]], &mut transcript)
+    .unwrap();
+}
diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index c74369a4..c5601b18 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -291,9 +291,11 @@ pub fn assign_threads_in<F: ScalarField>(
 
     let mut break_points = break_points.into_iter();
     let mut break_point = break_points.next();
+
     let mut gate_index = 0;
     let mut column = config.basic_gates[phase][gate_index].value;
     let mut row_offset = 0;
+
     let mut lookup_offset = 0;
     let mut lookup_advice = lookup_advice.iter();
     let mut lookup_column = lookup_advice.next();
@@ -304,11 +306,13 @@ pub fn assign_threads_in<F: ScalarField>(
                 lookup_column = lookup_advice.next();
             }
             let value = advice.value;
-            let column = *lookup_column.unwrap();
+            let lookup_column = *lookup_column.unwrap();
             #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(column, lookup_offset, Value::known(value)).unwrap();
+            region.assign_advice(lookup_column, lookup_offset, Value::known(value)).unwrap();
             #[cfg(not(feature = "halo2-axiom"))]
-            region.assign_advice(|| "", column, lookup_offset, || Value::known(value)).unwrap();
+            region
+                .assign_advice(|| "", lookup_column, lookup_offset, || Value::known(value))
+                .unwrap();
 
             lookup_offset += 1;
         }
@@ -400,18 +404,22 @@ impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
-                let builder = self.builder.take();
-                if !builder.witness_gen_only {
+                if !self.builder.borrow().witness_gen_only {
+                    // clone the builder so we can re-use the circuit for both vk and pk gen
+                    let builder = self.builder.borrow().clone();
                     *self.break_points.borrow_mut() =
                         builder.assign_all(&config, &[], &[], &mut region);
                 } else {
+                    let builder = self.builder.take();
                     let break_points = self.break_points.take();
                     for (phase, (threads, break_points)) in
                         builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
                     {
-                        assign_threads_in(phase, threads, &config, &[], &mut region, break_points);
                         #[cfg(feature = "halo2-axiom")]
-                        region.next_phase();
+                        if phase != 0 && !threads.is_empty() {
+                            region.next_phase();
+                        }
+                        assign_threads_in(phase, threads, &config, &[], &mut region, break_points);
                     }
                 }
                 Ok(())
@@ -487,19 +495,25 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
-                let builder = self.0.builder.take();
-                if !builder.witness_gen_only {
+                if !self.0.builder.borrow().witness_gen_only {
+                    // clone the builder so we can re-use the circuit for both vk and pk gen
+                    let builder = self.0.builder.borrow().clone();
                     *self.0.break_points.borrow_mut() = builder.assign_all(
                         &config.gate,
                         &config.lookup_advice,
                         &config.q_lookup,
                         &mut region,
-                    )
+                    );
                 } else {
+                    let builder = self.0.builder.take();
                     let break_points = self.0.break_points.take();
                     for (phase, (threads, break_points)) in
                         builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
                     {
+                        #[cfg(feature = "halo2-axiom")]
+                        if phase != 0 && !threads.is_empty() {
+                            region.next_phase();
+                        }
                         assign_threads_in(
                             phase,
                             threads,
@@ -508,8 +522,6 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                             &mut region,
                             break_points,
                         );
-                        #[cfg(feature = "halo2-axiom")]
-                        region.next_phase();
                     }
                 }
                 Ok(())
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index 4fc04edc..6bdde332 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -1,209 +1,6 @@
 pub mod builder;
 pub mod flex_gate;
-// pub mod range;
-
-/*
-pub trait RangeInstructions<F: ScalarField> {
-    type Gate: GateInstructions<F>;
-
-    fn gate(&self) -> &Self::Gate;
-    fn strategy(&self) -> RangeStrategy;
-
-    fn lookup_bits(&self) -> usize;
-
-    fn range_check<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        range_bits: usize,
-    );
-
-    fn check_less_than<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: QuantumCell<'_, 'a, F>,
-        num_bits: usize,
-    );
-
-    /// Checks that `a` is in `[0, b)`.
-    ///
-    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `bit_length(b)` bits.
-    fn check_less_than_safe<'a>(&self, ctx: &mut Context<'a, F>, a: &AssignedValue<'a, F>, b: u64) {
-        let range_bits =
-            (bit_length(b) + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
-
-        self.range_check(ctx, a, range_bits);
-        self.check_less_than(
-            ctx,
-            Existing(a),
-            Constant(self.gate().get_field_element(b)),
-            range_bits,
-        )
-    }
-
-    /// Checks that `a` is in `[0, b)`.
-    ///
-    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `bit_length(b)` bits.
-    fn check_big_less_than_safe<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        b: BigUint,
-    ) where
-        F: PrimeField,
-    {
-        let range_bits =
-            (b.bits() as usize + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
-
-        self.range_check(ctx, a, range_bits);
-        self.check_less_than(ctx, Existing(a), Constant(biguint_to_fe(&b)), range_bits)
-    }
-
-    /// Returns whether `a` is in `[0, b)`.
-    ///
-    /// Warning: This may fail silently if `a` or `b` have more than `num_bits` bits
-    fn is_less_than<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: QuantumCell<'_, 'a, F>,
-        num_bits: usize,
-    ) -> AssignedValue<'a, F>;
-
-    /// Returns whether `a` is in `[0, b)`.
-    ///
-    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `range_bits` bits.
-    fn is_less_than_safe<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        b: u64,
-    ) -> AssignedValue<'a, F> {
-        let range_bits =
-            (bit_length(b) + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
-
-        self.range_check(ctx, a, range_bits);
-        self.is_less_than(ctx, Existing(a), Constant(F::from(b)), range_bits)
-    }
-
-    /// Returns whether `a` is in `[0, b)`.
-    ///
-    /// Does not require bit assumptions on `a, b` because we range check that `a` has at most `range_bits` bits.
-    fn is_big_less_than_safe<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: &AssignedValue<'a, F>,
-        b: BigUint,
-    ) -> AssignedValue<'a, F>
-    where
-        F: PrimeField,
-    {
-        let range_bits =
-            (b.bits() as usize + self.lookup_bits() - 1) / self.lookup_bits() * self.lookup_bits();
-
-        self.range_check(ctx, a, range_bits);
-        self.is_less_than(ctx, Existing(a), Constant(biguint_to_fe(&b)), range_bits)
-    }
-
-    /// Returns `(c, r)` such that `a = b * c + r`.
-    ///
-    /// Assumes that `b != 0`.
-    fn div_mod<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: impl Into<BigUint>,
-        a_num_bits: usize,
-    ) -> (AssignedValue<'a, F>, AssignedValue<'a, F>)
-    where
-        F: PrimeField,
-    {
-        let b = b.into();
-        let mut a_val = BigUint::zero();
-        a.value().map(|v| a_val = fe_to_biguint(v));
-        let (div, rem) = a_val.div_mod_floor(&b);
-        let [div, rem] = [div, rem].map(|v| biguint_to_fe(&v));
-        let assigned = self.gate().assign_region(
-            ctx,
-            vec![
-                Witness(Value::known(rem)),
-                Constant(biguint_to_fe(&b)),
-                Witness(Value::known(div)),
-                a,
-            ],
-            vec![(0, None)],
-        );
-        self.check_big_less_than_safe(
-            ctx,
-            &assigned[2],
-            BigUint::one().shl(a_num_bits as u32) / &b + BigUint::one(),
-        );
-        self.check_big_less_than_safe(ctx, &assigned[0], b);
-        (assigned[2].clone(), assigned[0].clone())
-    }
-
-    /// Returns `(c, r)` such that `a = b * c + r`.
-    ///
-    /// Assumes that `b != 0`.
-    ///
-    /// Let `X = 2 ** b_num_bits`.
-    /// Write `a = a1 * X + a0` and `c = c1 * X + c0`.
-    /// If we write `b * c0 + r = d1 * X + d0` then
-    ///     `b * c + r = (b * c1 + d1) * X + d0`
-    fn div_mod_var<'a>(
-        &self,
-        ctx: &mut Context<'a, F>,
-        a: QuantumCell<'_, 'a, F>,
-        b: QuantumCell<'_, 'a, F>,
-        a_num_bits: usize,
-        b_num_bits: usize,
-    ) -> (AssignedValue<'a, F>, AssignedValue<'a, F>)
-    where
-        F: PrimeField,
-    {
-        let mut a_val = BigUint::zero();
-        a.value().map(|v| a_val = fe_to_biguint(v));
-        let mut b_val = BigUint::one();
-        b.value().map(|v| b_val = fe_to_biguint(v));
-        let (div, rem) = a_val.div_mod_floor(&b_val);
-        let x = BigUint::one().shl(b_num_bits as u32);
-        let (div_hi, div_lo) = div.div_mod_floor(&x);
-
-        let x_fe = self.gate().pow_of_two()[b_num_bits];
-        let [div, div_hi, div_lo, rem] = [div, div_hi, div_lo, rem].map(|v| biguint_to_fe(&v));
-        let assigned = self.gate().assign_region(
-            ctx,
-            vec![
-                Witness(Value::known(div_lo)),
-                Witness(Value::known(div_hi)),
-                Constant(x_fe),
-                Witness(Value::known(div)),
-                Witness(Value::known(rem)),
-            ],
-            vec![(0, None)],
-        );
-        self.range_check(ctx, &assigned[0], b_num_bits);
-        self.range_check(ctx, &assigned[1], a_num_bits.saturating_sub(b_num_bits));
-
-        let (bcr0_hi, bcr0_lo) = {
-            let bcr0 =
-                self.gate().mul_add(ctx, b.clone(), Existing(&assigned[0]), Existing(&assigned[4]));
-            self.div_mod(ctx, Existing(&bcr0), x.clone(), a_num_bits)
-        };
-        let bcr_hi =
-            self.gate().mul_add(ctx, b.clone(), Existing(&assigned[1]), Existing(&bcr0_hi));
-
-        let (a_hi, a_lo) = self.div_mod(ctx, a, x, a_num_bits);
-        ctx.constrain_equal(&bcr_hi, &a_hi);
-        ctx.constrain_equal(&bcr0_lo, &a_lo);
-
-        self.range_check(ctx, &assigned[4], b_num_bits);
-        self.check_less_than(ctx, Existing(&assigned[4]), b, b_num_bits);
-        (assigned[3].clone(), assigned[4].clone())
-    }
-}
-*/
+pub mod range;
 
 #[cfg(test)]
 pub mod tests;
diff --git a/halo2-base/src/gates/range.rs b/halo2-base/src/gates/range.rs
index bb23aa78..6c41e8bb 100644
--- a/halo2-base/src/gates/range.rs
+++ b/halo2-base/src/gates/range.rs
@@ -92,9 +92,15 @@ impl<F: ScalarField> RangeConfig<F> {
             }
         }
 
-        let config =
+        let mut config =
             Self { lookup_advice, q_lookup, lookup, lookup_bits, gate, _strategy: range_strategy };
+
         config.create_lookup(meta);
+        config.gate.max_rows = (1 << circuit_degree) - meta.minimum_rows();
+        assert!(
+            (1 << lookup_bits) <= config.gate.max_rows,
+            "lookup table is too large for the circuit degree plus blinding factors!"
+        );
 
         config
     }
@@ -366,6 +372,10 @@ impl<F: ScalarField> RangeChip<F> {
 
         Self { strategy, gate, lookup_bits, limb_bases }
     }
+
+    pub fn default(lookup_bits: usize) -> Self {
+        Self::new(RangeStrategy::Vertical, lookup_bits)
+    }
 }
 
 impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index e786f5dc..e6941afa 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -1,9 +1,11 @@
-use super::builder::{GateCircuitBuilder, GateThreadBuilder};
+use super::builder::{GateCircuitBuilder, GateThreadBuilder, RangeCircuitBuilder};
 use super::flex_gate::{GateChip, GateInstructions};
+use super::range::{RangeChip, RangeInstructions};
 use crate::halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr};
-use crate::utils::ScalarField;
+use crate::utils::{BigPrimeField, ScalarField};
 use crate::{Context, QuantumCell::Constant};
 use ff::Field;
+use itertools::Itertools;
 use rand::rngs::OsRng;
 use rayon::prelude::*;
 
@@ -25,6 +27,10 @@ fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
 
     let bits = ctx.assign_witnesses([F::zero(), F::one()]);
     chip.bits_to_indicator(ctx, &bits);
+
+    chip.is_equal(ctx, b, a);
+
+    chip.is_zero(ctx, a);
 }
 
 #[test]
@@ -83,161 +89,62 @@ fn plot_gates() {
     gate_tests(builder.main(0), inputs);
 
     // auto-tune circuit
-    builder.config(k);
+    builder.config(k, Some(9));
     // create circuit
-    let circuit = MyCircuit {
-        inputs,
-        builder: RefCell::new(builder.unknown(true)),
-        break_points: RefCell::default(),
-    };
+    let circuit = GateCircuitBuilder::keygen(builder);
     halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
 }
 
-/*
-#[derive(Default)]
-struct RangeTestCircuit<F> {
+fn range_tests<F: BigPrimeField>(
+    ctx: &mut Context<F>,
+    lookup_bits: usize,
+    inputs: [F; 2],
     range_bits: usize,
     lt_bits: usize,
-    a: Value<F>,
-    b: Value<F>,
-}
+) {
+    let [a, b]: [_; 2] = ctx.assign_witnesses(inputs).try_into().unwrap();
+    let chip = RangeChip::default(lookup_bits);
+    std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+
+    chip.range_check(ctx, a, range_bits);
 
-impl Circuit<Fr> for RangeTestCircuit<Fr> {
-    type Config = range::RangeConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            range_bits: self.range_bits,
-            lt_bits: self.lt_bits,
-            a: Value::unknown(),
-            b: Value::unknown(),
-        }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        range::RangeConfig::configure(
-            meta,
-            range::RangeStrategy::Vertical,
-            &[NUM_ADVICE],
-            &[1],
-            1,
-            3,
-            0,
-            11, /* params K */
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        config.load_lookup_table(&mut layouter)?;
-
-        /*
-        // let's try a separate layouter for loading private inputs
-        let (a, b) = layouter.assign_region(
-            || "load private inputs",
-            |region| {
-                let mut aux = Context::new(
-                    region,
-                    ContextParams {
-                        num_advice: vec![("default".to_string(), NUM_ADVICE)],
-                        fixed_columns: config.gate.constants.clone(),
-                    },
-                );
-                let cells = config.gate.assign_region_smart(
-                    &mut aux,
-                    vec![Witness(self.a), Witness(self.b)],
-                    vec![],
-                    vec![],
-                    vec![],
-                )?;
-                Ok((cells[0].clone(), cells[1].clone()))
-            },
-        )?; */
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "range",
-            |region| {
-                // If we uncomment out the line below, get_shape will be empty and the layouter will try to assign at row 0, but "load private inputs" has already assigned to row 0, so this will panic and fail
-
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = Context::new(
-                    region,
-                    ContextParams {
-                        max_rows: config.gate.max_rows,
-                        num_context_ids: 1,
-                        fixed_columns: config.gate.constants.clone(),
-                    },
-                );
-                let ctx = &mut aux;
-
-                let (a, b) = {
-                    let cells = config.gate.assign_region_smart(
-                        ctx,
-                        vec![Witness(self.a), Witness(self.b)],
-                        vec![],
-                        vec![],
-                        vec![],
-                    );
-                    (cells[0].clone(), cells[1].clone())
-                };
-
-                {
-                    config.range_check(ctx, &a, self.range_bits);
-                }
-                {
-                    config.check_less_than(ctx, Existing(&a), Existing(&b), self.lt_bits);
-                }
-                {
-                    config.is_less_than(ctx, Existing(&a), Existing(&b), self.lt_bits);
-                }
-                {
-                    config.is_less_than(ctx, Existing(&b), Existing(&a), self.lt_bits);
-                }
-                {
-                    config.gate().is_equal(ctx, Existing(&b), Existing(&a));
-                }
-                {
-                    config.gate().is_zero(ctx, &a);
-                }
-
-                config.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                {
-                    println!("total advice cells: {}", ctx.total_advice);
-                    let const_rows = ctx.fixed_offset + 1;
-                    println!("maximum rows used by a fixed column: {const_rows}");
-                    println!("lookup cells used: {}", ctx.cells_to_lookup.len());
-                }
-                Ok(())
-            },
-        )
-    }
+    chip.check_less_than(ctx, a, b, lt_bits);
+
+    chip.is_less_than(ctx, a, b, lt_bits);
+
+    chip.is_less_than(ctx, b, a, lt_bits);
+
+    chip.div_mod(ctx, a, 7u64, lt_bits);
 }
 
 #[test]
-fn test_range() {
+fn test_range_single() {
     let k = 11;
-    let circuit = RangeTestCircuit::<Fr> {
-        range_bits: 8,
-        lt_bits: 8,
-        a: Value::known(Fr::from(100u64)),
-        b: Value::known(Fr::from(101u64)),
-    };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
-    //assert_eq!(prover.verify(), Ok(()));
+    let inputs = [100, 101].map(Fr::from);
+    let mut builder = GateThreadBuilder::new(false);
+    range_tests(builder.main(0), 3, inputs, 8, 8);
+
+    // auto-tune circuit
+    builder.config(k, Some(9));
+    // create circuit
+    let circuit = RangeCircuitBuilder::mock(builder);
+
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
+}
+
+#[test]
+fn test_range_multicolumn() {
+    let k = 5;
+    let inputs = [100, 101].map(Fr::from);
+    let mut builder = GateThreadBuilder::new(false);
+    range_tests(builder.main(0), 3, inputs, 8, 8);
+
+    // auto-tune circuit
+    builder.config(k, Some(9));
+    // create circuit
+    let circuit = RangeCircuitBuilder::mock(builder);
+
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[cfg(feature = "dev-graph")]
@@ -249,168 +156,14 @@ fn plot_range() {
     root.fill(&WHITE).unwrap();
     let root = root.titled("Range Layout", ("sans-serif", 60)).unwrap();
 
-    let circuit = RangeTestCircuit::<Fr> {
-        range_bits: 8,
-        lt_bits: 8,
-        a: Value::unknown(),
-        b: Value::unknown(),
-    };
+    let k = 11;
+    let inputs = [0, 0].map(Fr::from);
+    let mut builder = GateThreadBuilder::new(false);
+    range_tests(builder.main(0), 3, inputs, 8, 8);
 
+    // auto-tune circuit
+    builder.config(k, Some(9));
+    // create circuit
+    let circuit = RangeCircuitBuilder::keygen(builder);
     halo2_proofs::dev::CircuitLayout::default().render(7, &circuit, &root).unwrap();
 }
-
-mod lagrange {
-    use crate::halo2_proofs::{
-        arithmetic::Field,
-        halo2curves::bn256::{Bn256, G1Affine},
-        poly::{
-            commitment::{Params, ParamsProver},
-            kzg::{
-                commitment::{KZGCommitmentScheme, ParamsKZG},
-                multiopen::{ProverSHPLONK, VerifierSHPLONK},
-                strategy::SingleStrategy,
-            },
-        },
-        transcript::{
-            Blake2bRead, Blake2bWrite, Challenge255, TranscriptReadBuffer, TranscriptWriterBuffer,
-        },
-    };
-    use ark_std::{end_timer, start_timer};
-    use rand::rngs::OsRng;
-
-    use super::*;
-
-    #[derive(Default)]
-    struct MyCircuit<F> {
-        coords: Vec<Value<(F, F)>>,
-        a: Value<F>,
-    }
-
-    const NUM_ADVICE: usize = 6;
-
-    impl Circuit<Fr> for MyCircuit<Fr> {
-        type Config = FlexGateConfig<Fr>;
-        type FloorPlanner = SimpleFloorPlanner;
-
-        fn without_witnesses(&self) -> Self {
-            Self {
-                coords: self.coords.iter().map(|_| Value::unknown()).collect(),
-                a: Value::unknown(),
-            }
-        }
-
-        fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-            FlexGateConfig::configure(meta, GateStrategy::PlonkPlus, &[NUM_ADVICE], 1, 0, 14)
-        }
-
-        fn synthesize(
-            &self,
-            config: Self::Config,
-            mut layouter: impl Layouter<Fr>,
-        ) -> Result<(), Error> {
-            let mut first_pass = SKIP_FIRST_PASS;
-
-            layouter.assign_region(
-                || "gate",
-                |region| {
-                    if first_pass {
-                        first_pass = false;
-                        return Ok(());
-                    }
-
-                    let mut aux = Context::new(
-                        region,
-                        ContextParams {
-                            max_rows: config.max_rows,
-                            num_context_ids: 1,
-                            fixed_columns: config.constants.clone(),
-                        },
-                    );
-                    let ctx = &mut aux;
-
-                    let x =
-                        config.assign_witnesses(ctx, self.coords.iter().map(|c| c.map(|c| c.0)));
-                    let y =
-                        config.assign_witnesses(ctx, self.coords.iter().map(|c| c.map(|c| c.1)));
-
-                    let a = config.assign_witnesses(ctx, vec![self.a]).pop().unwrap();
-
-                    config.lagrange_and_eval(
-                        ctx,
-                        &x.into_iter().zip(y.into_iter()).collect::<Vec<_>>(),
-                        &a,
-                    );
-
-                    #[cfg(feature = "display")]
-                    {
-                        println!("total advice cells: {}", ctx.total_advice);
-                    }
-
-                    Ok(())
-                },
-            )
-        }
-    }
-
-    #[test]
-    fn test_lagrange() -> Result<(), Box<dyn std::error::Error>> {
-        let k = 14;
-        let mut rng = OsRng;
-        let circuit = MyCircuit::<Fr> {
-            coords: (0..100)
-                .map(|i: u64| Value::known((Fr::from(i), Fr::random(&mut rng))))
-                .collect(),
-            a: Value::known(Fr::from(100u64)),
-        };
-
-        let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-        prover.assert_satisfied();
-
-        let fd = std::fs::File::open(format!("../halo2_ecc/params/kzg_bn254_{k}.srs").as_str());
-        let params = if let Ok(mut f) = fd {
-            println!("Found existing params file. Reading params...");
-            ParamsKZG::<Bn256>::read(&mut f).unwrap()
-        } else {
-            ParamsKZG::<Bn256>::setup(k, &mut rng)
-        };
-
-        let vk_time = start_timer!(|| "Generating vkey");
-        let vk = keygen_vk(&params, &circuit)?;
-        end_timer!(vk_time);
-
-        let pk_time = start_timer!(|| "Generating pkey");
-        let pk = keygen_pk(&params, vk, &circuit)?;
-        end_timer!(pk_time);
-
-        // create a proof
-        let proof_time = start_timer!(|| "Proving time");
-        let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
-        create_proof::<
-            KZGCommitmentScheme<Bn256>,
-            ProverSHPLONK<'_, Bn256>,
-            Challenge255<G1Affine>,
-            _,
-            Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            _,
-        >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
-        let proof = transcript.finalize();
-        end_timer!(proof_time);
-
-        let verify_time = start_timer!(|| "Verify time");
-        let verifier_params = params.verifier_params();
-        let strategy = SingleStrategy::new(&params);
-        let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
-            KZGCommitmentScheme<Bn256>,
-            VerifierSHPLONK<'_, Bn256>,
-            Challenge255<G1Affine>,
-            Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
-            SingleStrategy<'_, Bn256>,
-        >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
-        end_timer!(verify_time);
-
-        Ok(())
-    }
-}
-*/
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index ce797a78..1bff40c8 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -107,7 +107,7 @@ pub struct Context<F: ScalarField> {
     /// this is the single column of advice cells exactly as they should be assigned
     pub advice: Vec<Assigned<F>>,
     /// `cells_to_lookup` is a vector keeping track of all cells that we want to enable lookup for. When there is more than 1 advice column we will copy_advice all of these cells to the single lookup enabled column and do lookups there
-    pub cells_to_lookup: Vec<usize>, // `i` in `cells_to_lookup` means we want to lookup `advice[i]`
+    pub cells_to_lookup: Vec<AssignedValue<F>>,
 
     pub zero_cell: Option<AssignedValue<F>>,
 
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 253ec62d..5c0c0a47 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -52,7 +52,9 @@ where
 #[cfg(feature = "halo2-axiom")]
 pub trait PrimeField = BigPrimeField;
 #[cfg(feature = "halo2-pse")]
-pub trait PrimeField = FieldExt<Repr = [u8; 32]> + Hash;
+pub trait BigPrimeField = FieldExt<Repr = [u8; 32]> + Hash;
+#[cfg(feature = "halo2-pse")]
+pub trait PrimeField = BigPrimeField;
 
 #[cfg(feature = "halo2-pse")]
 pub trait ScalarField = FieldExt + Hash;

From eff200f78b88610919c4e3d0accc319f688d98a5 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:50:33 -0800
Subject: [PATCH 08/26] fix: change `AssignedValue` type to
 `KeccakAssignedValue` for compatibility after halo2-base update

---
 halo2-base/src/gates/tests.rs                 |  1 -
 halo2-ecc/src/bigint/mod.rs                   |  2 +-
 halo2-ecc/src/lib.rs                          |  8 ++---
 .../zkevm-keccak/src/keccak_packed_multi.rs   | 33 ++++++++-----------
 .../src/keccak_packed_multi/tests.rs          |  3 ++
 5 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index e6941afa..01371c28 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -5,7 +5,6 @@ use crate::halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr};
 use crate::utils::{BigPrimeField, ScalarField};
 use crate::{Context, QuantumCell::Constant};
 use ff::Field;
-use itertools::Itertools;
 use rand::rngs::OsRng;
 use rayon::prelude::*;
 
diff --git a/halo2-ecc/src/bigint/mod.rs b/halo2-ecc/src/bigint/mod.rs
index 473c5829..41e080d5 100644
--- a/halo2-ecc/src/bigint/mod.rs
+++ b/halo2-ecc/src/bigint/mod.rs
@@ -3,7 +3,7 @@ use crate::halo2_proofs::{
     plonk::ConstraintSystem,
 };
 use halo2_base::{
-    gates::{flex_gate::FlexGateConfig, GateInstructions},
+    gates::flex_gate::{FlexGateConfig, GateInstructions},
     utils::{biguint_to_fe, decompose_biguint, fe_to_biguint, PrimeField},
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
diff --git a/halo2-ecc/src/lib.rs b/halo2-ecc/src/lib.rs
index ddf2763d..cfa6e1f5 100644
--- a/halo2-ecc/src/lib.rs
+++ b/halo2-ecc/src/lib.rs
@@ -4,11 +4,11 @@
 #![feature(int_log)]
 
 pub mod bigint;
-pub mod ecc;
-pub mod fields;
+//pub mod ecc;
+//pub mod fields;
 
-pub mod bn254;
-pub mod secp256k1;
+//pub mod bn254;
+//pub mod secp256k1;
 
 pub use halo2_base;
 pub(crate) use halo2_base::halo2_proofs;
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi.rs b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
index 085ff9c6..d474f962 100644
--- a/hashes/zkevm-keccak/src/keccak_packed_multi.rs
+++ b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
@@ -16,7 +16,7 @@ use crate::halo2_proofs::{
     },
     poly::Rotation,
 };
-use halo2_base::AssignedValue;
+use halo2_base::halo2_proofs::{circuit::AssignedCell, plonk::Assigned};
 use itertools::Itertools;
 use log::{debug, info};
 use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
@@ -382,33 +382,26 @@ impl KeccakTable {
     }
 }
 
+#[cfg(feature = "halo2-axiom")]
+type KeccakAssignedValue<'v, F> = AssignedCell<&'v Assigned<F>, F>;
+#[cfg(not(feature = "halo2-axiom"))]
+type KeccakAssignedValue<'v, F> = AssignedCell<F, F>;
+
 pub fn assign_advice_custom<'v, F: Field>(
     region: &mut Region<F>,
     column: Column<Advice>,
     offset: usize,
     value: Value<F>,
-) -> AssignedValue<'v, F> {
+) -> KeccakAssignedValue<'v, F> {
     #[cfg(feature = "halo2-axiom")]
     {
-        AssignedValue {
-            cell: region.assign_advice(column, offset, value).unwrap(),
-            #[cfg(feature = "display")]
-            context_id: usize::MAX,
-        }
+        region.assign_advice(column, offset, value).unwrap()
     }
     #[cfg(feature = "halo2-pse")]
     {
-        AssignedValue {
-            cell: region
-                .assign_advice(|| format!("assign advice {}", offset), column, offset, || value)
-                .unwrap()
-                .cell(),
-            value,
-            row_offset: offset,
-            _marker: PhantomData,
-            #[cfg(feature = "display")]
-            context_id: usize::MAX,
-        }
+        region
+            .assign_advice(|| format!("assign advice {}", offset), column, offset, || value)
+            .unwrap()
     }
 }
 
@@ -1604,7 +1597,7 @@ pub fn keccak_phase1<'v, F: Field>(
     keccak_table: &KeccakTable,
     bytes: &[u8],
     challenge: Value<F>,
-    input_rlcs: &mut Vec<AssignedValue<'v, F>>,
+    input_rlcs: &mut Vec<KeccakAssignedValue<'v, F>>,
     offset: &mut usize,
 ) {
     let num_chunks = get_num_keccak_f(bytes.len());
@@ -1967,7 +1960,7 @@ pub fn multi_keccak_phase1<'a, 'v, F: Field>(
     bytes: impl IntoIterator<Item = &'a [u8]>,
     challenge: Value<F>,
     squeeze_digests: Vec<[F; NUM_WORDS_TO_SQUEEZE]>,
-) -> (Vec<AssignedValue<'v, F>>, Vec<AssignedValue<'v, F>>) {
+) -> (Vec<KeccakAssignedValue<'v, F>>, Vec<KeccakAssignedValue<'v, F>>) {
     let mut input_rlcs = Vec::with_capacity(squeeze_digests.len());
     let mut output_rlcs = Vec::with_capacity(squeeze_digests.len());
 
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs b/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs
index 7af3ba4d..4619a197 100644
--- a/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs
+++ b/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs
@@ -38,6 +38,9 @@ impl<F: Field> Circuit<F> for KeccakCircuit<F> {
     }
 
     fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
+        // MockProver complains if you only have columns in SecondPhase, so let's just make an empty column in FirstPhase
+        meta.advice_column();
+
         let challenge = meta.challenge_usable_after(FirstPhase);
         KeccakCircuitConfig::new(meta, challenge)
     }

From 2ef4894e8da18ca0145ab2af3df73f4bb303f398 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 01:00:45 -0800
Subject: [PATCH 09/26] feat: update halo2-ecc to v0.3.0

* add multi-thread witness assignment support for `variable_base_msm`
  and `fixed_base_msm`
* batch size 100 MSM witness generation went from 500ms -> 100ms
---
 Cargo.toml                                    |   4 +-
 halo2-base/benches/inner_product.rs           |   2 +-
 halo2-base/benches/mul.rs                     |   2 +-
 halo2-base/examples/inner_product.rs          |   2 +-
 halo2-base/src/gates/builder.rs               |  40 +-
 halo2-base/src/gates/flex_gate.rs             |  61 +-
 halo2-base/src/gates/mod.rs                   |   3 +
 halo2-base/src/gates/range.rs                 |  37 +-
 halo2-base/src/gates/tests.rs                 |   8 +-
 halo2-base/src/lib.rs                         |  24 +-
 halo2-base/src/utils.rs                       |  29 +-
 halo2-ecc/Cargo.toml                          |   1 +
 halo2-ecc/benches/fixed_base_msm.rs           | 249 +++----
 halo2-ecc/benches/fp_mul.rs                   | 197 +++--
 halo2-ecc/benches/msm.rs                      | 343 +++------
 .../bn254}/bench_ec_add.config                |   0
 .../bn254}/bench_fixed_msm.config             |   0
 .../bn254}/bench_msm.config                   |   1 +
 .../bn254}/bench_pairing.config               |   0
 .../bn254}/ec_add_circuit.config              |   0
 .../bn254}/fixed_msm_circuit.config           |   0
 halo2-ecc/configs/bn254/msm_circuit.config    |   1 +
 .../bn254}/pairing_circuit.config             |   0
 .../secp256k1}/bench_ecdsa.config             |   0
 .../secp256k1}/ecdsa_circuit.config           |   0
 halo2-ecc/src/bigint/add_no_carry.rs          |  33 +-
 halo2-ecc/src/bigint/big_is_equal.rs          |  56 +-
 halo2-ecc/src/bigint/big_is_zero.rs           |  49 +-
 halo2-ecc/src/bigint/big_less_than.rs         |  12 +-
 halo2-ecc/src/bigint/carry_mod.rs             | 214 ++----
 .../src/bigint/check_carry_mod_to_zero.rs     | 138 +---
 halo2-ecc/src/bigint/check_carry_to_zero.rs   |  85 +--
 halo2-ecc/src/bigint/mod.rs                   | 183 ++---
 halo2-ecc/src/bigint/mul_no_carry.rs          |  47 +-
 halo2-ecc/src/bigint/negative.rs              |  12 +-
 .../src/bigint/scalar_mul_and_add_no_carry.rs |  49 +-
 halo2-ecc/src/bigint/scalar_mul_no_carry.rs   |  35 +-
 halo2-ecc/src/bigint/select.rs                |  50 +-
 halo2-ecc/src/bigint/select_by_indicator.rs   |  58 +-
 halo2-ecc/src/bigint/sub.rs                   |  63 +-
 halo2-ecc/src/bigint/sub_no_carry.rs          |  30 +-
 .../src/bn254/configs/msm_circuit.config      |   1 -
 halo2-ecc/src/bn254/final_exp.rs              | 105 ++-
 halo2-ecc/src/bn254/mod.rs                    |  10 +-
 halo2-ecc/src/bn254/pairing.rs                | 236 +++---
 .../src/bn254/results/msm_bench_internal.csv  |   7 -
 .../src/bn254/results/msm_bench_m2_simple.csv |   6 -
 .../results/msm_bench_m2_simple_plus.csv      |   6 -
 .../bn254/results/pairing_bench_results.txt   | 692 ------------------
 halo2-ecc/src/bn254/tests/ec_add.rs           | 317 ++------
 halo2-ecc/src/bn254/tests/fixed_base_msm.rs   | 385 +++-------
 halo2-ecc/src/bn254/tests/mod.rs              |  25 +-
 halo2-ecc/src/bn254/tests/msm.rs              | 456 ++++--------
 halo2-ecc/src/bn254/tests/pairing.rs          | 346 +++------
 halo2-ecc/src/ecc/ecdsa.rs                    |  45 +-
 halo2-ecc/src/ecc/fixed_base.rs               | 246 ++++---
 halo2-ecc/src/ecc/fixed_base_pippenger.rs     |  28 +-
 halo2-ecc/src/ecc/mod.rs                      | 511 +++++++------
 halo2-ecc/src/ecc/pippenger.rs                | 268 +++++--
 halo2-ecc/src/ecc/tests.rs                    | 191 ++---
 halo2-ecc/src/fields/fp.rs                    | 341 ++++-----
 halo2-ecc/src/fields/fp12.rs                  | 197 +++--
 halo2-ecc/src/fields/fp2.rs                   | 207 +++---
 halo2-ecc/src/fields/mod.rs                   | 227 +++---
 halo2-ecc/src/fields/tests.rs                 | 302 +++-----
 halo2-ecc/src/lib.rs                          |   7 +-
 .../src/secp256k1/results/ecdsa_bench_m1.csv  |  10 -
 .../secp256k1/results/ecdsa_bench_results.txt | 253 -------
 68 files changed, 2668 insertions(+), 4875 deletions(-)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_ec_add.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_fixed_msm.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_msm.config (92%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_pairing.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/ec_add_circuit.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/fixed_msm_circuit.config (100%)
 create mode 100644 halo2-ecc/configs/bn254/msm_circuit.config
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/pairing_circuit.config (100%)
 rename halo2-ecc/{src/secp256k1/configs => configs/secp256k1}/bench_ecdsa.config (100%)
 rename halo2-ecc/{src/secp256k1/configs => configs/secp256k1}/ecdsa_circuit.config (100%)
 delete mode 100644 halo2-ecc/src/bn254/configs/msm_circuit.config
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_internal.csv
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
 delete mode 100644 halo2-ecc/src/bn254/results/pairing_bench_results.txt
 delete mode 100644 halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
 delete mode 100644 halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt

diff --git a/Cargo.toml b/Cargo.toml
index a21fa775..9d8d2d5c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [workspace]
 members = [
     "halo2-base",
-    # "halo2-ecc",
+    "halo2-ecc",
     "hashes/zkevm-keccak",
 ]
 
@@ -31,7 +31,7 @@ debug-assertions = false
 lto = "fat" 
 # `codegen-units = 1` can lead to WORSE performance - always bench to find best profile for your machine!
 # codegen-units = 1
-panic = "abort"
+panic = "unwind"
 incremental = false
 
 # For performance profiling
diff --git a/halo2-base/benches/inner_product.rs b/halo2-base/benches/inner_product.rs
index 5d2902ae..9454faa3 100644
--- a/halo2-base/benches/inner_product.rs
+++ b/halo2-base/benches/inner_product.rs
@@ -73,7 +73,7 @@ fn bench(c: &mut Criterion) {
                 let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
                 let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
                 inner_prod_bench(builder.main(0), a, b);
-                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+                let circuit = GateCircuitBuilder::prover(builder, break_points.clone());
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-base/benches/mul.rs b/halo2-base/benches/mul.rs
index 97514e47..16687e08 100644
--- a/halo2-base/benches/mul.rs
+++ b/halo2-base/benches/mul.rs
@@ -56,7 +56,7 @@ fn bench(c: &mut Criterion) {
                 let mut builder = GateThreadBuilder::new(true);
                 // do the computation
                 mul_bench(builder.main(0), inputs);
-                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+                let circuit = GateCircuitBuilder::prover(builder, break_points.clone());
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-base/examples/inner_product.rs b/halo2-base/examples/inner_product.rs
index d7976f47..8572817e 100644
--- a/halo2-base/examples/inner_product.rs
+++ b/halo2-base/examples/inner_product.rs
@@ -68,7 +68,7 @@ fn main() {
     let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
     let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
     inner_prod_bench(builder.main(0), a, b);
-    let circuit = GateCircuitBuilder::witness_gen(builder, break_points);
+    let circuit = GateCircuitBuilder::prover(builder, break_points);
 
     let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
     create_proof::<
diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index c5601b18..c049ba28 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -13,8 +13,8 @@ use crate::{
 use serde::{Deserialize, Serialize};
 use std::{cell::RefCell, collections::HashMap};
 
-type ThreadBreakPoints = Vec<usize>;
-type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
+pub type ThreadBreakPoints = Vec<usize>;
+pub type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
 
 #[derive(Clone, Debug, Default)]
 pub struct GateThreadBuilder<F: ScalarField> {
@@ -33,12 +33,28 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         Self { threads, thread_count: 1, witness_gen_only, use_unknown: false }
     }
 
+    pub fn mock() -> Self {
+        Self::new(false)
+    }
+
+    pub fn keygen() -> Self {
+        Self::new(false)
+    }
+
+    pub fn prover() -> Self {
+        Self::new(true)
+    }
+
     pub fn unknown(self, use_unknown: bool) -> Self {
         Self { use_unknown, ..self }
     }
 
     pub fn main(&mut self, phase: usize) -> &mut Context<F> {
-        self.threads[phase].first_mut().unwrap()
+        if self.threads[phase].is_empty() {
+            self.new_thread(phase)
+        } else {
+            self.threads[phase].last_mut().unwrap()
+        }
     }
 
     pub fn witness_gen_only(&self) -> bool {
@@ -141,10 +157,11 @@ impl<F: ScalarField> GateThreadBuilder<F> {
             let mut row_offset = 0;
             let mut lookup_offset = 0;
             let mut lookup_col = 0;
-            for ctx in threads {
+            for mut ctx in threads {
                 let mut basic_gate = config.basic_gates[phase]
                         .get(gate_index)
                         .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
+                ctx.selector.resize(ctx.advice.len(), false);
 
                 for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
                 {
@@ -214,6 +231,8 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     }
                 }
 
+                // warning: currently we assume equality constraints in thread i only involves threads <= i
+                // I guess a fix is to just rerun this several times?
                 for (left, right) in ctx.advice_equality_constraints {
                     let (left, _) = assigned_advices[&(left.context_id, left.offset)];
                     let (right, _) = assigned_advices[&(right.context_id, right.offset)];
@@ -364,7 +383,7 @@ impl<F: ScalarField> GateCircuitBuilder<F> {
         Self { builder: RefCell::new(builder.unknown(false)), break_points: RefCell::new(vec![]) }
     }
 
-    pub fn witness_gen(
+    pub fn prover(
         builder: GateThreadBuilder<F>,
         break_points: MultiPhaseThreadBreakPoints,
     ) -> Self {
@@ -441,11 +460,11 @@ impl<F: ScalarField> RangeCircuitBuilder<F> {
         Self(GateCircuitBuilder::mock(builder))
     }
 
-    pub fn witness_gen(
+    pub fn prover(
         builder: GateThreadBuilder<F>,
         break_points: MultiPhaseThreadBreakPoints,
     ) -> Self {
-        Self(GateCircuitBuilder::witness_gen(builder, break_points))
+        Self(GateCircuitBuilder::prover(builder, break_points))
     }
 }
 
@@ -529,3 +548,10 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
         )
     }
 }
+
+#[derive(Clone, Copy, Debug)]
+pub enum CircuitBuilderStage {
+    Keygen,
+    Prover,
+    Mock,
+}
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index c4bbc4b4..a70de4b8 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -146,7 +146,7 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = *a.value() + b.value();
-        ctx.assign_region_last(vec![a, b, Constant(F::one()), Witness(out_val)], vec![0])
+        ctx.assign_region_last([a, b, Constant(F::one()), Witness(out_val)], [0])
     }
 
     /// Copies a, b and constrains `a + b * (-1) = out`
@@ -161,7 +161,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let out_val = *a.value() - b.value();
         // slightly better to not have to compute -F::one() since F::one() is cached
-        ctx.assign_region(vec![Witness(out_val), b, Constant(F::one()), a], vec![0]);
+        ctx.assign_region([Witness(out_val), b, Constant(F::one()), a], [0]);
         ctx.get(-4)
     }
 
@@ -169,10 +169,7 @@ pub trait GateInstructions<F: ScalarField> {
     fn neg(&self, ctx: &mut Context<F>, a: impl Into<QuantumCell<F>>) -> AssignedValue<F> {
         let a = a.into();
         let out_val = -*a.value();
-        ctx.assign_region(
-            vec![a, Witness(out_val), Constant(F::one()), Constant(F::zero())],
-            vec![0],
-        );
+        ctx.assign_region([a, Witness(out_val), Constant(F::one()), Constant(F::zero())], [0]);
         ctx.get(-3)
     }
 
@@ -187,7 +184,7 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = *a.value() * b.value();
-        ctx.assign_region_last(vec![Constant(F::zero()), a, b, Witness(out_val)], vec![0])
+        ctx.assign_region_last([Constant(F::zero()), a, b, Witness(out_val)], [0])
     }
 
     /// a * b + c
@@ -202,7 +199,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let c = c.into();
         let out_val = *a.value() * b.value() + c.value();
-        ctx.assign_region_last(vec![c, a, b, Witness(out_val)], vec![0])
+        ctx.assign_region_last([c, a, b, Witness(out_val)], [0])
     }
 
     /// (1 - a) * b = b - a * b
@@ -215,16 +212,13 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = (F::one() - a.value()) * b.value();
-        ctx.assign_region_smart(vec![Witness(out_val), a, b, b], vec![0], vec![(2, 3)], []);
+        ctx.assign_region_smart([Witness(out_val), a, b, b], [0], [(2, 3)], []);
         ctx.get(-4)
     }
 
     /// Constrain x is 0 or 1.
     fn assert_bit(&self, ctx: &mut Context<F>, x: AssignedValue<F>) {
-        ctx.assign_region(
-            vec![Constant(F::zero()), Existing(x), Existing(x), Existing(x)],
-            vec![0],
-        );
+        ctx.assign_region([Constant(F::zero()), Existing(x), Existing(x), Existing(x)], [0]);
     }
 
     fn div_unsafe(
@@ -238,7 +232,7 @@ pub trait GateInstructions<F: ScalarField> {
         // TODO: if really necessary, make `c` of type `Assigned<F>`
         // this would require the API using `Assigned<F>` instead of `F` everywhere, so leave as last resort
         let c = b.value().invert().unwrap() * a.value();
-        ctx.assign_region(vec![Constant(F::zero()), Witness(c), b, a], vec![0]);
+        ctx.assign_region([Constant(F::zero()), Witness(c), b, a], [0]);
         ctx.get(-3)
     }
 
@@ -387,7 +381,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let not_b_val = F::one() - b.value();
         let out_val = *a.value() + b.value() - *a.value() * b.value();
-        let cells = vec![
+        let cells = [
             Witness(not_b_val),
             Constant(F::one()),
             b,
@@ -397,7 +391,7 @@ pub trait GateInstructions<F: ScalarField> {
             Witness(not_b_val),
             Witness(out_val),
         ];
-        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], vec![]);
+        ctx.assign_region_smart(cells, [0, 4], [(0, 6), (2, 4)], []);
         ctx.last().unwrap()
     }
 
@@ -447,13 +441,13 @@ pub trait GateInstructions<F: ScalarField> {
 
         let (inv_last_bit, last_bit) = {
             ctx.assign_region(
-                vec![
+                [
                     Witness(F::one() - bits[k - 1].value()),
                     Existing(bits[k - 1]),
                     Constant(F::one()),
                     Constant(F::one()),
                 ],
-                vec![0],
+                [0],
             );
             (ctx.get(-4), ctx.get(-3))
         };
@@ -465,13 +459,13 @@ pub trait GateInstructions<F: ScalarField> {
             for old_idx in 0..(1 << idx) {
                 let inv_prod_val = (F::one() - bit.value()) * indicator[offset + old_idx].value();
                 ctx.assign_region(
-                    vec![
+                    [
                         Witness(inv_prod_val),
                         Existing(indicator[offset + old_idx]),
                         Existing(*bit),
                         Existing(indicator[offset + old_idx]),
                     ],
-                    vec![0],
+                    [0],
                 );
                 indicator.push(ctx.get(-4));
 
@@ -499,7 +493,7 @@ pub trait GateInstructions<F: ScalarField> {
             let ind_val = F::from(idx_val == i);
             let val = if idx_val == i { *idx.value() } else { F::zero() };
             ctx.assign_region_smart(
-                vec![
+                [
                     Constant(F::zero()),
                     Witness(ind_val),
                     idx,
@@ -508,9 +502,9 @@ pub trait GateInstructions<F: ScalarField> {
                     Witness(ind_val),
                     Constant(F::zero()),
                 ],
-                vec![0, 3],
-                vec![(1, 5)],
-                vec![],
+                [0, 3],
+                [(1, 5)],
+                [],
             );
             // need to use assigned idx after i > 0 so equality constraint holds
             if i == 0 {
@@ -576,7 +570,7 @@ pub trait GateInstructions<F: ScalarField> {
             (F::zero(), Assigned::Rational(F::one(), *x))
         };
 
-        let cells = vec![
+        let cells = [
             Witness(is_zero),
             Existing(a),
             WitnessFraction(inv),
@@ -586,7 +580,7 @@ pub trait GateInstructions<F: ScalarField> {
             Witness(is_zero),
             Constant(F::zero()),
         ];
-        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6)], []);
+        ctx.assign_region_smart(cells, [0, 4], [(0, 6)], []);
         ctx.get(-2)
     }
 
@@ -843,7 +837,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             // | a - b | 1 | b | a |
             // | b | sel | a - b | out |
             GateStrategy::Vertical => {
-                let cells = vec![
+                let cells = [
                     Witness(diff_val),
                     Constant(F::one()),
                     b,
@@ -853,7 +847,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
                     Witness(diff_val),
                     Witness(out_val),
                 ];
-                ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], []);
+                ctx.assign_region_smart(cells, [0, 4], [(0, 6), (2, 4)], []);
                 ctx.last().unwrap()
             }
         }
@@ -875,7 +869,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
         let not_bc_val = F::one() - bc_val;
         let not_a_val = *a.value() - F::one();
         let out_val = bc_val + a.value() - bc_val * a.value();
-        let cells = vec![
+        let cells = [
             Witness(not_bc_val),
             b,
             c,
@@ -888,7 +882,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             Constant(F::one()),
             a,
         ];
-        ctx.assign_region_smart(cells, vec![0, 3, 7], vec![(4, 7), (0, 5)], []);
+        ctx.assign_region_smart(cells, [0, 3, 7], [(4, 7), (0, 5)], []);
         ctx.get(-5)
     }
 
@@ -904,21 +898,22 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             .as_ref()
             .iter()
             .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
+            .map(|x| Witness(F::from(x)))
             .take(range_bits)
-            .map(|x| F::from(x));
+            .collect::<Vec<_>>();
 
         let mut bit_cells = Vec::with_capacity(range_bits);
         let row_offset = ctx.advice.len();
         let acc = self.inner_product(
             ctx,
-            bits.map(Witness),
+            bits,
             self.pow_of_two[..range_bits].iter().map(|c| Constant(*c)),
         );
         ctx.constrain_equal(&a, &acc);
         debug_assert!(range_bits > 0);
         bit_cells.push(ctx.get(row_offset as isize));
         for i in 1..range_bits {
-            bit_cells.push(ctx.get((row_offset + 1 + 3 * (i - 2)) as isize));
+            bit_cells.push(ctx.get((row_offset + 1 + 3 * (i - 1)) as isize));
         }
 
         for bit_cell in &bit_cells {
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index 6bdde332..705542b1 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -4,3 +4,6 @@ pub mod range;
 
 #[cfg(test)]
 pub mod tests;
+
+pub use flex_gate::{GateChip, GateInstructions};
+pub use range::{RangeChip, RangeInstructions};
diff --git a/halo2-base/src/gates/range.rs b/halo2-base/src/gates/range.rs
index 6c41e8bb..ff5e4ae3 100644
--- a/halo2-base/src/gates/range.rs
+++ b/halo2-base/src/gates/range.rs
@@ -1,13 +1,5 @@
 use crate::{
     gates::flex_gate::{FlexGateConfig, GateInstructions, GateStrategy, MAX_PHASE},
-    utils::{
-        biguint_to_fe, bit_length, decompose_fe_to_u64_limbs, fe_to_biguint, BigPrimeField,
-        ScalarField,
-    },
-    AssignedValue,
-    QuantumCell::{self, Constant, Existing, Witness},
-};
-use crate::{
     halo2_proofs::{
         circuit::{Layouter, Value},
         plonk::{
@@ -15,8 +7,12 @@ use crate::{
         },
         poly::Rotation,
     },
-    utils::PrimeField,
-    Context,
+    utils::{
+        biguint_to_fe, bit_length, decompose_fe_to_u64_limbs, fe_to_biguint, BigPrimeField,
+        ScalarField,
+    },
+    AssignedValue, Context,
+    QuantumCell::{self, Constant, Existing, Witness},
 };
 use num_bigint::BigUint;
 use num_integer::Integer;
@@ -249,17 +245,14 @@ pub trait RangeInstructions<F: ScalarField> {
         a_num_bits: usize,
     ) -> (AssignedValue<F>, AssignedValue<F>)
     where
-        F: PrimeField,
+        F: BigPrimeField,
     {
         let a = a.into();
         let b = b.into();
         let a_val = fe_to_biguint(a.value());
         let (div, rem) = a_val.div_mod_floor(&b);
         let [div, rem] = [div, rem].map(|v| biguint_to_fe(&v));
-        ctx.assign_region(
-            vec![Witness(rem), Constant(biguint_to_fe(&b)), Witness(div), a],
-            vec![0],
-        );
+        ctx.assign_region([Witness(rem), Constant(biguint_to_fe(&b)), Witness(div), a], [0]);
         let rem = ctx.get(-4);
         let div = ctx.get(-2);
         self.check_big_less_than_safe(
@@ -301,8 +294,8 @@ pub trait RangeInstructions<F: ScalarField> {
         let x_fe = self.gate().pow_of_two()[b_num_bits];
         let [div, div_hi, div_lo, rem] = [div, div_hi, div_lo, rem].map(|v| biguint_to_fe(&v));
         ctx.assign_region(
-            vec![Witness(div_lo), Witness(div_hi), Constant(x_fe), Witness(div), Witness(rem)],
-            vec![0],
+            [Witness(div_lo), Witness(div_hi), Constant(x_fe), Witness(div), Witness(rem)],
+            [0],
         );
         let [div_lo, div_hi, div, rem] = [-5, -4, -2, -1].map(|i| ctx.get(i));
         self.range_check(ctx, div_lo, b_num_bits);
@@ -337,7 +330,7 @@ pub trait RangeInstructions<F: ScalarField> {
         };
         let two = self.gate().get_field_element(2u64);
         let h_v = (*a_v - bit_v) * two.invert().unwrap();
-        ctx.assign_region(vec![Witness(bit_v), Witness(h_v), Constant(two), Existing(a)], vec![0]);
+        ctx.assign_region([Witness(bit_v), Witness(h_v), Constant(two), Existing(a)], [0]);
 
         let half = ctx.get(-3);
         self.range_check(ctx, half, limb_bits - 1);
@@ -450,7 +443,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
             RangeStrategy::Vertical => {
                 let shift_a_val = pow_of_two + a.value();
                 // | a + 2^(num_bits) - b | b | 1 | a + 2^(num_bits) | - 2^(num_bits) | 1 | a |
-                let cells = vec![
+                let cells = [
                     Witness(shift_a_val - b.value()),
                     b,
                     Constant(F::one()),
@@ -459,7 +452,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
                     Constant(F::one()),
                     a,
                 ];
-                ctx.assign_region(cells, vec![0, 3]);
+                ctx.assign_region(cells, [0, 3]);
                 ctx.get(-7)
             }
         };
@@ -487,7 +480,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
         let shifted_cell = match self.strategy {
             RangeStrategy::Vertical => {
                 ctx.assign_region(
-                    vec![
+                    [
                         Witness(shifted_val),
                         b,
                         Constant(F::one()),
@@ -496,7 +489,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
                         Constant(F::one()),
                         a,
                     ],
-                    vec![0, 3],
+                    [0, 3],
                 );
                 ctx.get(-7)
             }
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index 01371c28..cf6a3cb6 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -36,7 +36,7 @@ fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
 fn test_gates() {
     let k = 6;
     let inputs = [10u64, 12u64, 120u64].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
     // auto-tune circuit
@@ -51,7 +51,7 @@ fn test_gates() {
 fn test_multithread_gates() {
     let k = 6;
     let inputs = [10u64, 12u64, 120u64].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
     let thread_ids = (0..4).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
@@ -120,7 +120,7 @@ fn range_tests<F: BigPrimeField>(
 fn test_range_single() {
     let k = 11;
     let inputs = [100, 101].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     range_tests(builder.main(0), 3, inputs, 8, 8);
 
     // auto-tune circuit
@@ -135,7 +135,7 @@ fn test_range_single() {
 fn test_range_multicolumn() {
     let k = 5;
     let inputs = [100, 101].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     range_tests(builder.main(0), 3, inputs, 8, 8);
 
     // auto-tune circuit
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index 1bff40c8..ccf4f973 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -233,18 +233,22 @@ impl<F: ScalarField> Context<F> {
     ) where
         Q: Into<QuantumCell<F>>,
     {
-        for input in inputs {
-            self.assign_cell(input);
-        }
-
-        if !self.witness_gen_only {
-            let row_offset = self.selector.len();
+        if self.witness_gen_only {
+            for input in inputs {
+                self.assign_cell(input);
+            }
+        } else {
+            let row_offset = self.advice.len();
+            // note: row_offset may not equal self.selector.len() at this point if we previously used `load_constant` or `load_witness`
+            for input in inputs {
+                self.assign_cell(input);
+            }
             self.selector.resize(self.advice.len(), false);
             for offset in gate_offsets {
                 *self
                     .selector
                     .get_mut(row_offset.checked_add_signed(offset).expect("Invalid gate offset"))
-                    .expect("Gate offset out of bounds") = true;
+                    .expect("Invalid selector offset") = true;
             }
         }
     }
@@ -322,11 +326,17 @@ impl<F: ScalarField> Context<F> {
 
     pub fn load_witness(&mut self, witness: F) -> AssignedValue<F> {
         self.assign_cell(QuantumCell::Witness(witness));
+        if !self.witness_gen_only {
+            self.selector.resize(self.advice.len(), false);
+        }
         self.last().unwrap()
     }
 
     pub fn load_constant(&mut self, c: F) -> AssignedValue<F> {
         self.assign_cell(QuantumCell::Constant(c));
+        if !self.witness_gen_only {
+            self.selector.resize(self.advice.len(), false);
+        }
         self.last().unwrap()
     }
 
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 5c0c0a47..6802b71c 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -48,13 +48,10 @@ where
     }
 }
 
-// Later: will need to separate PrimeField from ScalarField when Goldilocks is introduced
-#[cfg(feature = "halo2-axiom")]
-pub trait PrimeField = BigPrimeField;
+// Later: will need to separate BigPrimeField from ScalarField when Goldilocks is introduced
+
 #[cfg(feature = "halo2-pse")]
 pub trait BigPrimeField = FieldExt<Repr = [u8; 32]> + Hash;
-#[cfg(feature = "halo2-pse")]
-pub trait PrimeField = BigPrimeField;
 
 #[cfg(feature = "halo2-pse")]
 pub trait ScalarField = FieldExt + Hash;
@@ -105,16 +102,16 @@ pub fn log2_ceil(x: u64) -> usize {
     (u64::BITS - x.leading_zeros() - (x & (x - 1) == 0) as u32) as usize
 }
 
-pub fn modulus<F: PrimeField>() -> BigUint {
+pub fn modulus<F: BigPrimeField>() -> BigUint {
     fe_to_biguint(&-F::one()) + 1u64
 }
 
-pub fn power_of_two<F: PrimeField>(n: usize) -> F {
+pub fn power_of_two<F: BigPrimeField>(n: usize) -> F {
     biguint_to_fe(&(BigUint::one() << n))
 }
 
 /// assume `e` less than modulus of F
-pub fn biguint_to_fe<F: PrimeField>(e: &BigUint) -> F {
+pub fn biguint_to_fe<F: BigPrimeField>(e: &BigUint) -> F {
     #[cfg(feature = "halo2-axiom")]
     {
         F::from_u64_digits(&e.to_u64_digits())
@@ -130,7 +127,7 @@ pub fn biguint_to_fe<F: PrimeField>(e: &BigUint) -> F {
 }
 
 /// assume `|e|` less than modulus of F
-pub fn bigint_to_fe<F: PrimeField>(e: &BigInt) -> F {
+pub fn bigint_to_fe<F: BigPrimeField>(e: &BigInt) -> F {
     #[cfg(feature = "halo2-axiom")]
     {
         let (sign, digits) = e.to_u64_digits();
@@ -158,7 +155,7 @@ pub fn fe_to_biguint<F: ff::PrimeField>(fe: &F) -> BigUint {
     BigUint::from_bytes_le(fe.to_repr().as_ref())
 }
 
-pub fn fe_to_bigint<F: PrimeField>(fe: &F) -> BigInt {
+pub fn fe_to_bigint<F: BigPrimeField>(fe: &F) -> BigInt {
     // TODO: `F` should just have modulus as lazy_static or something
     let modulus = modulus::<F>();
     let e = fe_to_biguint(fe);
@@ -169,7 +166,7 @@ pub fn fe_to_bigint<F: PrimeField>(fe: &F) -> BigInt {
     }
 }
 
-pub fn decompose<F: PrimeField>(e: &F, number_of_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose<F: BigPrimeField>(e: &F, number_of_limbs: usize, bit_len: usize) -> Vec<F> {
     if bit_len > 64 {
         decompose_biguint(&fe_to_biguint(e), number_of_limbs, bit_len)
     } else {
@@ -194,7 +191,11 @@ pub fn decompose_fe_to_u64_limbs<F: ScalarField>(
     }
 }
 
-pub fn decompose_biguint<F: PrimeField>(e: &BigUint, num_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose_biguint<F: BigPrimeField>(
+    e: &BigUint,
+    num_limbs: usize,
+    bit_len: usize,
+) -> Vec<F> {
     debug_assert!(bit_len > 64 && bit_len <= 128);
     let mut e = e.iter_u64_digits();
 
@@ -224,7 +225,7 @@ pub fn decompose_biguint<F: PrimeField>(e: &BigUint, num_limbs: usize, bit_len:
         .collect()
 }
 
-pub fn decompose_bigint<F: PrimeField>(e: &BigInt, num_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose_bigint<F: BigPrimeField>(e: &BigInt, num_limbs: usize, bit_len: usize) -> Vec<F> {
     if e.is_negative() {
         decompose_biguint::<F>(e.magnitude(), num_limbs, bit_len).into_iter().map(|x| -x).collect()
     } else {
@@ -232,7 +233,7 @@ pub fn decompose_bigint<F: PrimeField>(e: &BigInt, num_limbs: usize, bit_len: us
     }
 }
 
-pub fn decompose_bigint_option<F: PrimeField>(
+pub fn decompose_bigint_option<F: BigPrimeField>(
     value: Value<&BigInt>,
     number_of_limbs: usize,
     bit_len: usize,
diff --git a/halo2-ecc/Cargo.toml b/halo2-ecc/Cargo.toml
index a142200d..0d5041b2 100644
--- a/halo2-ecc/Cargo.toml
+++ b/halo2-ecc/Cargo.toml
@@ -13,6 +13,7 @@ rand = "0.8"
 rand_chacha = "0.3.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+rayon = "1.6.1"
 
 # arithmetic
 ff = "0.12"
diff --git a/halo2-ecc/benches/fixed_base_msm.rs b/halo2-ecc/benches/fixed_base_msm.rs
index 0bdf7e12..5c9589e4 100644
--- a/halo2-ecc/benches/fixed_base_msm.rs
+++ b/halo2-ecc/benches/fixed_base_msm.rs
@@ -1,166 +1,98 @@
-use criterion::{criterion_group, criterion_main};
-use criterion::{BenchmarkId, Criterion};
-
-#[allow(unused_imports)]
-use ff::PrimeField as _;
-use halo2_base::utils::modulus;
-use pprof::criterion::{Output, PProfProfiler};
-
 use ark_std::{end_timer, start_timer};
-use halo2_base::SKIP_FIRST_PASS;
-use rand_core::OsRng;
-use serde::{Deserialize, Serialize};
-use std::marker::PhantomData;
-
+use halo2_base::gates::{
+    builder::{
+        CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints, RangeCircuitBuilder,
+    },
+    RangeChip,
+};
 use halo2_base::halo2_proofs::{
     arithmetic::Field,
-    circuit::{Layouter, SimpleFloorPlanner, Value},
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+    halo2curves::bn256::{Bn256, Fr, G1Affine},
     plonk::*,
     poly::kzg::{
         commitment::{KZGCommitmentScheme, ParamsKZG},
         multiopen::ProverSHPLONK,
     },
-    transcript::TranscriptWriterBuffer,
-    transcript::{Blake2bWrite, Challenge255},
-};
-use halo2_base::{gates::GateInstructions, utils::PrimeField};
-use halo2_ecc::{
-    ecc::EccChip,
-    fields::fp::{FpConfig, FpStrategy},
+    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
+use halo2_ecc::{bn254::FpChip, ecc::EccChip, fields::PrimeField};
+use rand::rngs::OsRng;
+use std::sync::Mutex;
 
-type FpChip<F> = FpConfig<F, Fq>;
+use criterion::{criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion};
 
-#[derive(Serialize, Deserialize, Debug)]
+use pprof::criterion::{Output, PProfProfiler};
+// Thanks to the example provided by @jebbow in his article
+// https://www.jibbow.com/posts/criterion-flamegraphs/
+
+#[derive(Clone, Copy, Debug)]
 struct MSMCircuitParams {
-    strategy: FpStrategy,
     degree: u32,
-    num_advice: usize,
-    num_lookup_advice: usize,
-    num_fixed: usize,
     lookup_bits: usize,
     limb_bits: usize,
     num_limbs: usize,
     batch_size: usize,
-    radix: usize,
-    clump_factor: usize,
 }
 
-const BEST_100_CONFIG: MSMCircuitParams = MSMCircuitParams {
-    strategy: FpStrategy::Simple,
-    degree: 20,
-    num_advice: 10,
-    num_lookup_advice: 1,
-    num_fixed: 1,
-    lookup_bits: 19,
-    limb_bits: 88,
-    num_limbs: 3,
-    batch_size: 100,
-    radix: 0,
-    clump_factor: 4,
-};
+const BEST_100_CONFIG: MSMCircuitParams =
+    MSMCircuitParams { degree: 20, lookup_bits: 19, limb_bits: 88, num_limbs: 3, batch_size: 100 };
 
 const TEST_CONFIG: MSMCircuitParams = BEST_100_CONFIG;
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    clump_factor: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    #[allow(clippy::too_many_arguments)]
-    pub fn configure(meta: &mut ConstraintSystem<F>, params: MSMCircuitParams) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            modulus::<Fq>(),
-            0,
-            params.degree as usize,
-        );
-        MSMConfig { fp_chip, clump_factor: params.clump_factor }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
+fn fixed_base_msm_bench(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
     bases: Vec<G1Affine>,
-    scalars: Vec<Option<Fr>>,
-    _marker: PhantomData<F>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let scalars_assigned = scalars
+        .iter()
+        .map(|scalar| vec![builder.main(0).load_witness(*scalar)])
+        .collect::<Vec<_>>();
+    drop(builder);
+
+    ecc_chip.fixed_base_msm(thread_pool, &bases, scalars_assigned, Fr::NUM_BITS as usize);
 }
 
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: self.bases.clone(),
-            scalars: vec![None; self.scalars.len()],
-            _marker: PhantomData,
+fn fixed_base_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fixed_base_msm_bench(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let params = TEST_CONFIG;
-
-        MSMConfig::<Fr>::configure(meta, params)
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "fixed base msm",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let witness_time = start_timer!(|| "Witness generation");
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config
-                        .fp_chip
-                        .range
-                        .gate
-                        .assign_witnesses(ctx, vec![scalar.map_or(Value::unknown(), Value::known)]);
-                    scalars_assigned.push(assignment);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-
-                let _msm = ecc_chip.fixed_base_msm::<G1Affine>(
-                    ctx,
-                    &self.bases,
-                    &scalars_assigned,
-                    Fr::NUM_BITS as usize,
-                    0,
-                    config.clump_factor,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
@@ -168,39 +100,36 @@ fn bench(c: &mut Criterion) {
 
     let k = config.degree;
     let mut rng = OsRng;
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..config.batch_size {
-        let new_pt = G1Affine::random(&mut rng);
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-    let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
+    let circuit = fixed_base_msm_circuit(
+        config,
+        CircuitBuilderStage::Keygen,
+        vec![G1Affine::generator(); config.batch_size],
+        vec![Fr::zero(); config.batch_size],
+        None,
+    );
 
     let params = ParamsKZG::<Bn256>::setup(k, &mut rng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
+    drop(circuit);
 
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..config.batch_size).map(|_| (G1Affine::random(&mut rng), Fr::random(&mut rng))).unzip();
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
     group.bench_with_input(
         BenchmarkId::new("fixed base msm", k),
-        &(&params, &pk),
-        |b, &(params, pk)| {
+        &(&params, &pk, &bases, &scalars),
+        |b, &(params, pk, bases, scalars)| {
             b.iter(|| {
-                let mut bases = Vec::new();
-                let mut scalars = Vec::new();
-                for _ in 0..config.batch_size {
-                    let new_pt = G1Affine::random(&mut rng);
-                    bases.push(new_pt);
-
-                    let new_scalar = Some(Fr::random(&mut rng));
-                    scalars.push(new_scalar);
-                }
-
-                let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
+                let circuit = fixed_base_msm_circuit(
+                    config,
+                    CircuitBuilderStage::Prover,
+                    bases.clone(),
+                    scalars.clone(),
+                    Some(break_points.clone()),
+                );
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-ecc/benches/fp_mul.rs b/halo2-ecc/benches/fp_mul.rs
index d49162e0..c2de04ce 100644
--- a/halo2-ecc/benches/fp_mul.rs
+++ b/halo2-ecc/benches/fp_mul.rs
@@ -1,25 +1,28 @@
-use std::marker::PhantomData;
-
-use halo2_base::halo2_proofs::{
-    arithmetic::Field,
-    circuit::*,
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
-    plonk::*,
-    poly::kzg::{
-        commitment::{KZGCommitmentScheme, ParamsKZG},
-        multiopen::ProverSHPLONK,
+use ark_std::{end_timer, start_timer};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
     },
-    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
+    halo2_proofs::{
+        arithmetic::Field,
+        halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+        plonk::*,
+        poly::kzg::{
+            commitment::{KZGCommitmentScheme, ParamsKZG},
+            multiopen::ProverSHPLONK,
+        },
+        transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
+    },
+    Context,
 };
+use halo2_ecc::fields::fp::FpChip;
+use halo2_ecc::fields::{FieldChip, PrimeField};
 use rand::rngs::OsRng;
 
-use halo2_base::{
-    utils::{fe_to_bigint, modulus, PrimeField},
-    SKIP_FIRST_PASS,
-};
-use halo2_ecc::fields::fp::{FpConfig, FpStrategy};
-use halo2_ecc::fields::FieldChip;
-
 use criterion::{criterion_group, criterion_main};
 use criterion::{BenchmarkId, Criterion};
 
@@ -29,106 +32,88 @@ use pprof::criterion::{Output, PProfProfiler};
 
 const K: u32 = 19;
 
-#[derive(Default)]
-struct MyCircuit<F> {
-    a: Value<Fq>,
-    b: Value<Fq>,
-    _marker: PhantomData<F>,
-}
-
-const NUM_ADVICE: usize = 2;
-const NUM_FIXED: usize = 1;
-
-impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-    type Config = FpConfig<F, Fq>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self::default()
+fn fp_mul_bench<F: PrimeField>(
+    ctx: &mut Context<F>,
+    lookup_bits: usize,
+    limb_bits: usize,
+    num_limbs: usize,
+    _a: Fq,
+    _b: Fq,
+) {
+    std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+    let range = RangeChip::<F>::default(lookup_bits);
+    let chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+
+    let [a, b] = [_a, _b].map(|x| chip.load_private(ctx, FpChip::<F, Fq>::fe_to_witness(&x)));
+    for _ in 0..2857 {
+        chip.mul(ctx, &a, &b);
     }
+}
 
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        FpConfig::<F, _>::configure(
-            meta,
-            FpStrategy::Simple,
-            &[NUM_ADVICE],
-            &[1],
-            NUM_FIXED,
-            K as usize - 1,
-            88,
-            3,
-            modulus::<Fq>(),
-            0,
-            K as usize,
-        )
-    }
-
-    fn synthesize(&self, chip: Self::Config, mut layouter: impl Layouter<F>) -> Result<(), Error> {
-        chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "fp",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = chip.new_context(region);
-                let ctx = &mut aux;
-
-                let a_assigned = chip.load_private(ctx, self.a.as_ref().map(fe_to_bigint));
-                let b_assigned = chip.load_private(ctx, self.b.as_ref().map(fe_to_bigint));
-
-                for _ in 0..2857 {
-                    chip.mul(ctx, &a_assigned, &b_assigned);
-                }
-
-                // IMPORTANT: this copies advice cells to enable lookup
-                // This is not optional.
-                chip.finalize(ctx);
-
-                Ok(())
-            },
-        )
-    }
+fn fp_mul_circuit(
+    stage: CircuitBuilderStage,
+    a: Fq,
+    b: Fq,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = K as usize;
+    let mut builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fp_mul_bench(builder.main(0), k - 1, 88, 3, a, b);
+
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
+        }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
-    let a = Fq::random(OsRng);
-    let b = Fq::random(OsRng);
-
-    let circuit = MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+    let circuit = fp_mul_circuit(CircuitBuilderStage::Keygen, Fq::zero(), Fq::zero(), None);
 
     let params = ParamsKZG::<Bn256>::setup(K, OsRng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
 
+    let a = Fq::random(OsRng);
+    let b = Fq::random(OsRng);
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
-    group.bench_with_input(BenchmarkId::new("fp mul", K), &(&params, &pk), |b, &(params, pk)| {
-        b.iter(|| {
-            let rng = OsRng;
-            let a = Fq::random(OsRng);
-            let b = Fq::random(OsRng);
-
-            let circuit =
-                MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
-
-            let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
-            create_proof::<
-                KZGCommitmentScheme<Bn256>,
-                ProverSHPLONK<'_, Bn256>,
-                Challenge255<G1Affine>,
-                _,
-                Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
-                _,
-            >(params, pk, &[circuit], &[&[]], rng, &mut transcript)
-            .expect("prover should not fail");
-        })
-    });
+    group.bench_with_input(
+        BenchmarkId::new("fp mul", K),
+        &(&params, &pk, a, b),
+        |bencher, &(params, pk, a, b)| {
+            bencher.iter(|| {
+                let circuit =
+                    fp_mul_circuit(CircuitBuilderStage::Prover, a, b, Some(break_points.clone()));
+
+                let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+                create_proof::<
+                    KZGCommitmentScheme<Bn256>,
+                    ProverSHPLONK<'_, Bn256>,
+                    Challenge255<G1Affine>,
+                    _,
+                    Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
+                    _,
+                >(params, pk, &[circuit], &[&[]], OsRng, &mut transcript)
+                .expect("prover should not fail");
+            })
+        },
+    );
     group.finish()
 }
 
diff --git a/halo2-ecc/benches/msm.rs b/halo2-ecc/benches/msm.rs
index 22be806e..76141425 100644
--- a/halo2-ecc/benches/msm.rs
+++ b/halo2-ecc/benches/msm.rs
@@ -1,224 +1,112 @@
-use criterion::{criterion_group, criterion_main};
-use criterion::{BenchmarkId, Criterion};
-
-use halo2_base::utils::modulus;
-use pprof::criterion::{Output, PProfProfiler};
-
 use ark_std::{end_timer, start_timer};
-use halo2_base::SKIP_FIRST_PASS;
-use rand_core::OsRng;
-use serde::{Deserialize, Serialize};
-use std::marker::PhantomData;
-
+use halo2_base::gates::{
+    builder::{
+        CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints, RangeCircuitBuilder,
+    },
+    RangeChip,
+};
 use halo2_base::halo2_proofs::{
     arithmetic::Field,
-    circuit::{Layouter, SimpleFloorPlanner, Value},
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+    halo2curves::bn256::{Bn256, Fr, G1Affine},
     plonk::*,
     poly::kzg::{
         commitment::{KZGCommitmentScheme, ParamsKZG},
         multiopen::ProverSHPLONK,
     },
-    transcript::TranscriptWriterBuffer,
-    transcript::{Blake2bWrite, Challenge255},
-};
-use halo2_base::{
-    gates::GateInstructions,
-    utils::{biguint_to_fe, fe_to_biguint, PrimeField},
-    QuantumCell::Witness,
-};
-use halo2_ecc::{
-    ecc::EccChip,
-    fields::fp::{FpConfig, FpStrategy},
+    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
-use num_bigint::BigUint;
+use halo2_ecc::{bn254::FpChip, ecc::EccChip, fields::PrimeField};
+use rand::rngs::OsRng;
+use std::sync::Mutex;
 
-type FpChip<F> = FpConfig<F, Fq>;
+use criterion::{criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion};
 
-#[derive(Serialize, Deserialize, Debug)]
+use pprof::criterion::{Output, PProfProfiler};
+// Thanks to the example provided by @jebbow in his article
+// https://www.jibbow.com/posts/criterion-flamegraphs/
+
+#[derive(Clone, Copy, Debug)]
 struct MSMCircuitParams {
-    strategy: FpStrategy,
     degree: u32,
-    num_advice: usize,
-    num_lookup_advice: usize,
-    num_fixed: usize,
     lookup_bits: usize,
     limb_bits: usize,
     num_limbs: usize,
     batch_size: usize,
-    window_bits: usize,
+    clump_factor: usize,
 }
 
 const BEST_100_CONFIG: MSMCircuitParams = MSMCircuitParams {
-    strategy: FpStrategy::Simple,
     degree: 19,
-    num_advice: 20,
-    num_lookup_advice: 3,
-    num_fixed: 1,
     lookup_bits: 18,
     limb_bits: 90,
     num_limbs: 3,
     batch_size: 100,
-    window_bits: 4,
+    clump_factor: 4,
 };
-
 const TEST_CONFIG: MSMCircuitParams = BEST_100_CONFIG;
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-    window_bits: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    #[allow(clippy::too_many_arguments)]
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        window_bits: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, window_bits }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<Option<G1Affine>>,
-    scalars: Vec<Option<Fr>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
+fn msm_bench(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(0);
+    let scalars_assigned =
+        scalars.iter().map(|scalar| vec![ctx.load_witness(*scalar)]).collect::<Vec<_>>();
+    let bases_assigned =
+        bases.iter().map(|base| ecc_chip.load_private(ctx, (base.x, base.y))).collect::<Vec<_>>();
+    drop(builder);
+
+    ecc_chip.variable_base_msm_in::<G1Affine>(
+        thread_pool,
+        &bases_assigned,
+        scalars_assigned,
+        Fr::NUM_BITS as usize,
+        params.clump_factor,
+        0,
+    );
 }
 
-impl<F: PrimeField> Default for MSMCircuit<F> {
-    fn default() -> Self {
-        Self {
-            bases: vec![None; 10],
-            scalars: vec![None; 10],
-            batch_size: 10,
-            _marker: PhantomData,
+fn msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    msm_bench(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-}
-
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: vec![None; self.batch_size],
-            scalars: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let params: MSMCircuitParams = TEST_CONFIG;
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            modulus::<Fq>(),
-            params.batch_size,
-            params.window_bits,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "MSM",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let witness_time = start_timer!(|| "Witness Generation");
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config.fp_chip.range.gate.assign_region_last(
-                        ctx,
-                        vec![Witness(scalar.map_or(Value::unknown(), Value::known))],
-                        vec![],
-                    );
-                    scalars_assigned.push(vec![assignment]);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-                let mut bases_assigned = Vec::new();
-                for base in &self.bases {
-                    let base_assigned = ecc_chip.load_private(
-                        ctx,
-                        (
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.x))))
-                                .unwrap_or(Value::unknown()),
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.y))))
-                                .unwrap_or(Value::unknown()),
-                        ),
-                    );
-                    bases_assigned.push(base_assigned);
-                }
-
-                let _msm = ecc_chip.variable_base_msm::<G1Affine>(
-                    ctx,
-                    &bases_assigned,
-                    &scalars_assigned,
-                    254,
-                    config.window_bits,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
@@ -226,55 +114,50 @@ fn bench(c: &mut Criterion) {
 
     let k = config.degree;
     let mut rng = OsRng;
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..config.batch_size {
-        let new_pt = Some(G1Affine::random(&mut rng));
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-    let circuit =
-        MSMCircuit::<Fr> { bases, scalars, batch_size: config.batch_size, _marker: PhantomData };
+    let circuit = msm_circuit(
+        config,
+        CircuitBuilderStage::Keygen,
+        vec![G1Affine::generator(); config.batch_size],
+        vec![Fr::one(); config.batch_size],
+        None,
+    );
 
     let params = ParamsKZG::<Bn256>::setup(k, &mut rng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
+    drop(circuit);
 
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..config.batch_size).map(|_| (G1Affine::random(&mut rng), Fr::random(&mut rng))).unzip();
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
-    group.bench_with_input(BenchmarkId::new("msm", k), &(&params, &pk), |b, &(params, pk)| {
-        b.iter(|| {
-            let mut bases = Vec::new();
-            let mut scalars = Vec::new();
-            for _ in 0..config.batch_size {
-                let new_pt = Some(G1Affine::random(&mut rng));
-                bases.push(new_pt);
-
-                let new_scalar = Some(Fr::random(&mut rng));
-                scalars.push(new_scalar);
-            }
-
-            let circuit = MSMCircuit::<Fr> {
-                bases,
-                scalars,
-                batch_size: config.batch_size,
-                _marker: PhantomData,
-            };
+    group.bench_with_input(
+        BenchmarkId::new("msm", k),
+        &(&params, &pk, &bases, &scalars),
+        |b, &(params, pk, bases, scalars)| {
+            b.iter(|| {
+                let circuit = msm_circuit(
+                    config,
+                    CircuitBuilderStage::Prover,
+                    bases.clone(),
+                    scalars.clone(),
+                    Some(break_points.clone()),
+                );
 
-            let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
-            create_proof::<
-                KZGCommitmentScheme<Bn256>,
-                ProverSHPLONK<'_, Bn256>,
-                Challenge255<G1Affine>,
-                _,
-                Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
-                _,
-            >(params, pk, &[circuit], &[&[]], &mut rng, &mut transcript)
-            .expect("prover should not fail");
-        })
-    });
+                let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+                create_proof::<
+                    KZGCommitmentScheme<Bn256>,
+                    ProverSHPLONK<'_, Bn256>,
+                    Challenge255<G1Affine>,
+                    _,
+                    Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
+                    _,
+                >(params, pk, &[circuit], &[&[]], &mut rng, &mut transcript)
+                .expect("prover should not fail");
+            })
+        },
+    );
     group.finish()
 }
 
diff --git a/halo2-ecc/src/bn254/configs/bench_ec_add.config b/halo2-ecc/configs/bn254/bench_ec_add.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_ec_add.config
rename to halo2-ecc/configs/bn254/bench_ec_add.config
diff --git a/halo2-ecc/src/bn254/configs/bench_fixed_msm.config b/halo2-ecc/configs/bn254/bench_fixed_msm.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_fixed_msm.config
rename to halo2-ecc/configs/bn254/bench_fixed_msm.config
diff --git a/halo2-ecc/src/bn254/configs/bench_msm.config b/halo2-ecc/configs/bn254/bench_msm.config
similarity index 92%
rename from halo2-ecc/src/bn254/configs/bench_msm.config
rename to halo2-ecc/configs/bn254/bench_msm.config
index 1d1f769c..d665c0a8 100644
--- a/halo2-ecc/src/bn254/configs/bench_msm.config
+++ b/halo2-ecc/configs/bn254/bench_msm.config
@@ -1,3 +1,4 @@
+{"strategy":"Simple","degree":16,"num_advice":170,"num_lookup_advice":23,"num_fixed":1,"lookup_bits":15,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":17,"num_advice":84,"num_lookup_advice":11,"num_fixed":1,"lookup_bits":16,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":18,"num_advice":42,"num_lookup_advice":6,"num_fixed":1,"lookup_bits":17,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":19,"num_advice":20,"num_lookup_advice":3,"num_fixed":1,"lookup_bits":18,"limb_bits":90,"num_limbs":3,"batch_size":100,"window_bits":4}
diff --git a/halo2-ecc/src/bn254/configs/bench_pairing.config b/halo2-ecc/configs/bn254/bench_pairing.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_pairing.config
rename to halo2-ecc/configs/bn254/bench_pairing.config
diff --git a/halo2-ecc/src/bn254/configs/ec_add_circuit.config b/halo2-ecc/configs/bn254/ec_add_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/ec_add_circuit.config
rename to halo2-ecc/configs/bn254/ec_add_circuit.config
diff --git a/halo2-ecc/src/bn254/configs/fixed_msm_circuit.config b/halo2-ecc/configs/bn254/fixed_msm_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/fixed_msm_circuit.config
rename to halo2-ecc/configs/bn254/fixed_msm_circuit.config
diff --git a/halo2-ecc/configs/bn254/msm_circuit.config b/halo2-ecc/configs/bn254/msm_circuit.config
new file mode 100644
index 00000000..f66f6077
--- /dev/null
+++ b/halo2-ecc/configs/bn254/msm_circuit.config
@@ -0,0 +1 @@
+{"strategy":"Simple","degree":17,"num_advice":84,"num_lookup_advice":11,"num_fixed":1,"lookup_bits":16,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/configs/pairing_circuit.config b/halo2-ecc/configs/bn254/pairing_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/pairing_circuit.config
rename to halo2-ecc/configs/bn254/pairing_circuit.config
diff --git a/halo2-ecc/src/secp256k1/configs/bench_ecdsa.config b/halo2-ecc/configs/secp256k1/bench_ecdsa.config
similarity index 100%
rename from halo2-ecc/src/secp256k1/configs/bench_ecdsa.config
rename to halo2-ecc/configs/secp256k1/bench_ecdsa.config
diff --git a/halo2-ecc/src/secp256k1/configs/ecdsa_circuit.config b/halo2-ecc/configs/secp256k1/ecdsa_circuit.config
similarity index 100%
rename from halo2-ecc/src/secp256k1/configs/ecdsa_circuit.config
rename to halo2-ecc/configs/secp256k1/ecdsa_circuit.config
diff --git a/halo2-ecc/src/bigint/add_no_carry.rs b/halo2-ecc/src/bigint/add_no_carry.rs
index 8cc687d4..e7d920a8 100644
--- a/halo2-ecc/src/bigint/add_no_carry.rs
+++ b/halo2-ecc/src/bigint/add_no_carry.rs
@@ -1,34 +1,35 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
 
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.add(ctx, Existing(a_limb), Existing(b_limb)))
+        .map(|(&a_limb, &b_limb)| gate.add(ctx, a_limb, b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+// pass by reference to avoid cloning the BigInt in CRTInteger, unclear if this is optimal
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.add(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a + b);
+    let out_native = gate.add(ctx, a.native, b.native);
+    let out_val = &a.value + &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/big_is_equal.rs b/halo2-ecc/src/bigint/big_is_equal.rs
index f963937f..f64a3fae 100644
--- a/halo2-ecc/src/bigint/big_is_equal.rs
+++ b/halo2-ecc/src/bigint/big_is_equal.rs
@@ -1,47 +1,45 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 
-// given OverflowInteger<F>'s `a` and `b` of the same shape,
-// returns whether `a == b`
-pub fn assign<'v, F: PrimeField>(
+/// Given OverflowInteger<F>'s `a` and `b` of the same shape,
+/// returns whether `a == b`.
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_eq!(k, b.limbs.len());
-    assert_ne!(k, 0);
+    debug_assert_eq!(k, b.limbs.len());
+    debug_assert_ne!(k, 0);
 
     let mut a_limbs = a.limbs.iter();
     let mut b_limbs = b.limbs.iter();
-    let mut partial =
-        gate.is_equal(ctx, Existing(a_limbs.next().unwrap()), Existing(b_limbs.next().unwrap()));
-    for (a_limb, b_limb) in a_limbs.zip(b_limbs) {
-        let eq_limb = gate.is_equal(ctx, Existing(a_limb), Existing(b_limb));
-        partial = gate.and(ctx, Existing(&eq_limb), Existing(&partial));
+    let mut partial = gate.is_equal(ctx, *a_limbs.next().unwrap(), *b_limbs.next().unwrap());
+    for (&a_limb, &b_limb) in a_limbs.zip(b_limbs) {
+        let eq_limb = gate.is_equal(ctx, a_limb, b_limb);
+        partial = gate.and(ctx, eq_limb, partial);
     }
     partial
 }
 
-pub fn wrapper<'v, F: PrimeField>(
+pub fn wrapper<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> AssignedValue<F> {
     assign(gate, ctx, &a.truncation, &b.truncation)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> AssignedValue<F> {
+    debug_assert_eq!(a.value, b.value);
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.is_equal(ctx, Existing(&a.native), Existing(&b.native));
-    gate.and(ctx, Existing(&out_trunc), Existing(&out_native))
+    let out_native = gate.is_equal(ctx, a.native, b.native);
+    gate.and(ctx, out_trunc, out_native)
 }
diff --git a/halo2-ecc/src/bigint/big_is_zero.rs b/halo2-ecc/src/bigint/big_is_zero.rs
index 4ab84fa3..5014d194 100644
--- a/halo2-ecc/src/bigint/big_is_zero.rs
+++ b/halo2-ecc/src/bigint/big_is_zero.rs
@@ -1,46 +1,47 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
+use num_bigint::BigInt;
+use num_traits::Zero;
 
 /// assume you know that the limbs of `a` are all in [0, 2^{a.max_limb_bits})
-pub fn positive<'v, F: PrimeField>(
+pub fn positive<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'v, F>,
-    a: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_ne!(k, 0);
+    debug_assert_ne!(k, 0);
     debug_assert!(a.max_limb_bits as u32 + k.ilog2() < F::CAPACITY);
 
-    let sum = gate.sum(ctx, a.limbs.iter().map(Existing));
-    gate.is_zero(ctx, &sum)
+    let sum = gate.sum(ctx, a.limbs.iter().copied());
+    gate.is_zero(ctx, sum)
 }
 
 // given OverflowInteger<F> `a`, returns whether `a == 0`
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_ne!(k, 0);
+    debug_assert_ne!(k, 0);
 
     let mut a_limbs = a.limbs.iter();
-    let mut partial = gate.is_zero(ctx, a_limbs.next().unwrap());
-    for a_limb in a_limbs {
+    let mut partial = gate.is_zero(ctx, *a_limbs.next().unwrap());
+    for &a_limb in a_limbs {
         let limb_is_zero = gate.is_zero(ctx, a_limb);
-        partial = gate.and(ctx, Existing(&limb_is_zero), Existing(&partial));
+        partial = gate.and(ctx, limb_is_zero, partial);
     }
     partial
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+) -> AssignedValue<F> {
+    debug_assert_eq!(a.value, BigInt::zero());
     let out_trunc = assign::<F>(gate, ctx, &a.truncation);
-    let out_native = gate.is_zero(ctx, &a.native);
-    gate.and(ctx, Existing(&out_trunc), Existing(&out_native))
+    let out_native = gate.is_zero(ctx, a.native);
+    gate.and(ctx, out_trunc, out_native)
 }
diff --git a/halo2-ecc/src/bigint/big_less_than.rs b/halo2-ecc/src/bigint/big_less_than.rs
index 52528870..276de18c 100644
--- a/halo2-ecc/src/bigint/big_less_than.rs
+++ b/halo2-ecc/src/bigint/big_less_than.rs
@@ -1,16 +1,16 @@
 use super::OverflowInteger;
-use halo2_base::{gates::RangeInstructions, utils::PrimeField, AssignedValue, Context};
+use halo2_base::{gates::RangeInstructions, utils::ScalarField, AssignedValue, Context};
 
 // given OverflowInteger<F>'s `a` and `b` of the same shape,
 // returns whether `a < b`
-pub fn assign<'a, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
-    b: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> AssignedValue<'a, F> {
+) -> AssignedValue<F> {
     // a < b iff a - b has underflow
     let (_, underflow) = super::sub::assign::<F>(range, ctx, a, b, limb_bits, limb_base);
     underflow
diff --git a/halo2-ecc/src/bigint/carry_mod.rs b/halo2-ecc/src/bigint/carry_mod.rs
index 111f31d5..4b266cf3 100644
--- a/halo2-ecc/src/bigint/carry_mod.rs
+++ b/halo2-ecc/src/bigint/carry_mod.rs
@@ -1,12 +1,11 @@
 use super::{check_carry_to_zero, CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{range::RangeStrategy, GateInstructions, RangeInstructions},
-    utils::{biguint_to_fe, decompose_bigint_option, value_to_option, PrimeField},
+    utils::{decompose_bigint, BigPrimeField},
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
+use num_bigint::BigInt;
 use num_integer::Integer;
 use num_traits::{One, Signed};
 use std::{cmp::max, iter};
@@ -20,11 +19,14 @@ use std::{cmp::max, iter};
 // We constrain `a = out + modulus * quotient` and range check `out` and `quotient`
 //
 // Assumption: the leading two bits (in big endian) are 1, and `abs(a) <= 2^{n * k - 1 + F::NUM_BITS - 2}` (A weaker assumption is also enough, but this is good enough for forseeable use cases)
-pub fn crt<'a, F: PrimeField>(
+
+// This is currently optimized for limbs greater than 64 bits, so we need `F` to be a `BigPrimeField`
+// In the future we'll need a slightly different implementation for limbs that fit in 32 or 64 bits (e.g., `F` is Goldilocks)
+pub fn crt<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
     // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     k_bits: usize, // = a.len().bits()
     modulus: &BigInt,
     mod_vec: &[F],
@@ -32,22 +34,12 @@ pub fn crt<'a, F: PrimeField>(
     limb_bits: usize,
     limb_bases: &[F],
     limb_base_big: &BigInt,
-) -> CRTInteger<'a, F> {
+) -> CRTInteger<F> {
     let n = limb_bits;
     let k = a.truncation.limbs.len();
     let trunc_len = n * k;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("carry_mod(crt) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        // safety check:
-        a.value
-            .as_ref()
-            .map(|a| assert!(a.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2));
-    }
+    debug_assert!(a.value.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2);
 
     // in order for CRT method to work, we need `abs(out + modulus * quotient - a) < 2^{trunc_len - 1} * native_modulus::<F>`
     // this is ensured if `0 <= out < 2^{n*k}` and
@@ -55,7 +47,7 @@ pub fn crt<'a, F: PrimeField>(
     // which is ensured if
     // `abs(modulus * quotient) < 2^{trunc_len - 1 + F::NUM_BITS - 1} <= 2^{trunc_len - 1} * native_modulus::<F> - abs(a)` given our assumption `abs(a) <= 2^{n * k - 1 + F::NUM_BITS - 2}`
     let quot_max_bits = trunc_len - 1 + (F::NUM_BITS as usize) - 1 - (modulus.bits() as usize);
-    assert!(quot_max_bits < trunc_len);
+    debug_assert!(quot_max_bits < trunc_len);
     // Let n' <= quot_max_bits - n(k-1) - 1
     // If quot[i] <= 2^n for i < k - 1 and quot[k-1] <= 2^{n'} then
     // quot < 2^{n(k-1)+1} + 2^{n' + n(k-1)} = (2+2^{n'}) 2^{n(k-1)} < 2^{n'+1} * 2^{n(k-1)} <= 2^{quot_max_bits - n(k-1)} * 2^{n(k-1)}
@@ -69,26 +61,17 @@ pub fn crt<'a, F: PrimeField>(
     // we need to find `out_vec` as a proper BigInt with k limbs
     // we need to find `quot_vec` as a proper BigInt with k limbs
 
-    // we need to constrain that `sum_i out_vec[i] * 2^{n*i} = out_native` in `F`
-    // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
-    let (out_val, out_vec, quot_vec) = if let Some(a_big) = value_to_option(a.value.as_ref()) {
-        let (quot_val, out_val) = a_big.div_mod_floor(modulus);
+    let (quot_val, out_val) = a.value.div_mod_floor(modulus);
 
-        debug_assert!(out_val < (BigInt::one() << (n * k)));
-        debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
+    debug_assert!(out_val < (BigInt::one() << (n * k)));
+    debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
 
-        (
-            Value::known(out_val.clone()),
-            // decompose_bigint_option just throws away signed limbs in index >= k
-            decompose_bigint_option::<F>(Value::known(&out_val), k, n),
-            decompose_bigint_option::<F>(Value::known(&quot_val), k, n),
-        )
-    } else {
-        (Value::unknown(), vec![Value::unknown(); k], vec![Value::unknown(); k])
-    };
+    // decompose_bigint just throws away signed limbs in index >= k
+    let out_vec = decompose_bigint::<F>(&out_val, k, n);
+    let quot_vec = decompose_bigint::<F>(&quot_val, k, n);
 
-    // let out_native = out_val.as_ref().map(|a| bigint_to_fe::<F>(a));
-    // let quot_native = quot_val.map(|a| bigint_to_fe::<F>(&a));
+    // we need to constrain that `sum_i out_vec[i] * 2^{n*i} = out_native` in `F`
+    // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
 
     // assert!(modulus < &(BigUint::one() << (n * k)));
     assert_eq!(mod_vec.len(), k);
@@ -107,76 +90,46 @@ pub fn crt<'a, F: PrimeField>(
     let mut quot_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut out_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut check_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
-    let mut tmp_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
 
-    // match chip.strategy {
     // strategies where we carry out school-book multiplication in some form:
     //    BigIntStrategy::Simple => {
     for (i, (a_limb, (quot_v, out_v))) in
         a.truncation.limbs.iter().zip(quot_vec.into_iter().zip(out_vec.into_iter())).enumerate()
     {
-        let (quot_cell, out_cell, check_cell) = {
-            let prod = range.gate().inner_product_left(
-                ctx,
-                quot_assigned.iter().map(|a| Existing(a)).chain(iter::once(Witness(quot_v))),
-                mod_vec[..=i].iter().rev().map(|c| Constant(*c)),
-                &mut tmp_assigned,
-            );
-            // let gate_index = prod.column();
-
-            let quot_cell = tmp_assigned.pop().unwrap();
-            let out_cell;
-            let check_cell;
-            // perform step 2: compute prod - a + out
-            let temp1 = prod.value().zip(a_limb.value()).map(|(prod, a)| *prod - a);
-            let check_val = temp1 + out_v;
-
-            // This is to take care of edge case where we switch columns to handle overlap
-            let alloc = ctx.advice_alloc.get_mut(range.gate().context_id()).unwrap();
-            if alloc.1 + 6 >= ctx.max_rows {
-                // edge case, we need to copy the last `prod` cell
-                // dbg!(*alloc);
-                alloc.1 = 0;
-                alloc.0 += 1;
-                range.gate().assign_region_last(ctx, [Existing(&prod)], []);
-            }
-            match range.strategy() {
-                RangeStrategy::Vertical => {
-                    // transpose of:
-                    // | prod | -1 | a | prod - a | 1 | out | prod - a + out
-                    // where prod is at relative row `offset`
-                    let mut assignments = range.gate().assign_region(
-                        ctx,
-                        [
-                            Constant(-F::one()),
-                            Existing(a_limb),
-                            Witness(temp1),
-                            Constant(F::one()),
-                            Witness(out_v),
-                            Witness(check_val),
-                        ],
-                        [(-1, None), (2, None)],
-                    );
-                    check_cell = assignments.pop().unwrap();
-                    out_cell = assignments.pop().unwrap();
-                }
-                RangeStrategy::PlonkPlus => {
-                    // | prod | a | out | prod - a + out |
-                    // selector columns:
-                    // | 1    | 0 | 0   |
-                    // | 0    | -1| 1   |
-                    let mut assignments = range.gate().assign_region(
-                        ctx,
-                        [Existing(a_limb), Witness(out_v), Witness(check_val)],
-                        [(-1, Some([F::zero(), -F::one(), F::one()]))],
-                    );
-                    check_cell = assignments.pop().unwrap();
-                    out_cell = assignments.pop().unwrap();
-                }
+        let (prod, new_quot_cell) = range.gate().inner_product_left_last(
+            ctx,
+            quot_assigned.iter().map(|a| Existing(*a)).chain(iter::once(Witness(quot_v))),
+            mod_vec[..=i].iter().rev().map(|c| Constant(*c)),
+        );
+        // let gate_index = prod.column();
+
+        let out_cell;
+        let check_cell;
+        // perform step 2: compute prod - a + out
+        let temp1 = *prod.value() - a_limb.value();
+        let check_val = temp1 + out_v;
+
+        match range.strategy() {
+            RangeStrategy::Vertical => {
+                // transpose of:
+                // | prod | -1 | a | prod - a | 1 | out | prod - a + out
+                // where prod is at relative row `offset`
+                ctx.assign_region(
+                    [
+                        Constant(-F::one()),
+                        Existing(*a_limb),
+                        Witness(temp1),
+                        Constant(F::one()),
+                        Witness(out_v),
+                        Witness(check_val),
+                    ],
+                    [-1, 2], // note the NEGATIVE index! this is using gate overlapping with the previous inner product call
+                );
+                check_cell = ctx.last().unwrap();
+                out_cell = ctx.get(-2);
             }
-            (quot_cell, out_cell, check_cell)
-        };
-        quot_assigned.push(quot_cell);
+        }
+        quot_assigned.push(new_quot_cell);
         out_assigned.push(out_cell);
         check_assigned.push(check_cell);
     }
@@ -186,32 +139,21 @@ pub fn crt<'a, F: PrimeField>(
     // range check limbs of `out` are in [0, 2^n) except last limb should be in [0, 2^out_last_limb_bits)
     for (out_index, out_cell) in out_assigned.iter().enumerate() {
         let limb_bits = if out_index == k - 1 { out_last_limb_bits } else { n };
-        range.range_check(ctx, out_cell, limb_bits);
+        range.range_check(ctx, *out_cell, limb_bits);
     }
 
     // range check that quot_cell in quot_assigned is in [-2^n, 2^n) except for last cell check it's in [-2^quot_last_limb_bits, 2^quot_last_limb_bits)
     for (q_index, quot_cell) in quot_assigned.iter().enumerate() {
         let limb_bits = if q_index == k - 1 { quot_last_limb_bits } else { n };
-        let limb_base = if q_index == k - 1 {
-            biguint_to_fe(&(BigUint::one() << limb_bits))
-        } else {
-            limb_bases[1]
-        };
+        let limb_base =
+            if q_index == k - 1 { range.gate().pow_of_two()[limb_bits] } else { limb_bases[1] };
 
         // compute quot_cell + 2^n and range check with n + 1 bits
-        let quot_shift = {
-            let out_val = quot_cell.value().map(|a| limb_base + a);
-            // | quot_cell | 2^n | 1 | quot_cell + 2^n |
-            range.gate().assign_region_last(
-                ctx,
-                [Existing(quot_cell), Constant(limb_base), Constant(F::one()), Witness(out_val)],
-                [(0, None)],
-            )
-        };
-        range.range_check(ctx, &quot_shift, limb_bits + 1);
+        let quot_shift = range.gate().add(ctx, *quot_cell, Constant(limb_base));
+        range.range_check(ctx, quot_shift, limb_bits + 1);
     }
 
-    let check_overflow_int = &OverflowInteger::construct(
+    let check_overflow_int = OverflowInteger::construct(
         check_assigned,
         max(max(limb_bits, a.truncation.max_limb_bits) + 1, 2 * n + k_bits),
     );
@@ -226,40 +168,30 @@ pub fn crt<'a, F: PrimeField>(
         limb_base_big,
     );
 
-    // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-    let out_native_assigned = OverflowInteger::<F>::evaluate(
+    // Constrain `quot_native = sum_i quot_assigned[i] * 2^{n*i}` in `F`
+    let quot_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &out_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        quot_assigned,
+        limb_bases.iter().copied(),
     );
 
-    // Constrain `quot_native = sum_i quot_assigned[i] * 2^{n*i}` in `F`
-    let quot_native_assigned = OverflowInteger::<F>::evaluate(
+    // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
+    let out_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &quot_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        out_assigned.iter().copied(),
+        limb_bases.iter().copied(),
     );
 
-    // TODO: we can save 1 cell by connecting `out_native_assigned` computation with the following:
+    // We save 1 cell by connecting `out_native` computation with the following:
 
     // Check `out + modulus * quotient - a = 0` in native field
     // | out | modulus | quotient | a |
-    let _native_computation = range.gate().assign_region_last(
-        ctx,
-        [
-            Existing(&out_native_assigned),
-            Constant(mod_native),
-            Existing(&quot_native_assigned),
-            Existing(&a.native),
-        ],
-        [(0, None)],
+    ctx.assign_region(
+        [Constant(mod_native), Existing(quot_native), Existing(a.native)],
+        [-1], // negative index because -1 relative offset is `out_native` assigned value
     );
 
-    CRTInteger::construct(
-        OverflowInteger::construct(out_assigned, limb_bits),
-        out_native_assigned,
-        out_val,
-    )
+    CRTInteger::construct(OverflowInteger::construct(out_assigned, limb_bits), out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs b/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
index 38453da0..db6f9084 100644
--- a/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
+++ b/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
@@ -1,12 +1,11 @@
 use super::{check_carry_to_zero, CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{biguint_to_fe, decompose_bigint_option, value_to_option, PrimeField},
+    utils::{decompose_bigint, BigPrimeField},
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
+use num_bigint::BigInt;
 use num_integer::Integer;
 use num_traits::{One, Signed, Zero};
 use std::{cmp::max, iter};
@@ -14,11 +13,10 @@ use std::{cmp::max, iter};
 // same as carry_mod::crt but `out = 0` so no need to range check
 //
 // Assumption: the leading two bits (in big endian) are 1, and `a.max_size <= 2^{n * k - 1 + F::NUM_BITS - 2}` (A weaker assumption is also enough)
-pub fn crt<'a, F: PrimeField>(
+pub fn crt<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
-    // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     k_bits: usize, // = a.len().bits()
     modulus: &BigInt,
     mod_vec: &[F],
@@ -31,17 +29,7 @@ pub fn crt<'a, F: PrimeField>(
     let k = a.truncation.limbs.len();
     let trunc_len = n * k;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("check_carry_mod(crt) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        // safety check:
-        a.value
-            .as_ref()
-            .map(|a| assert!(a.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2));
-    }
+    debug_assert!(a.value.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2);
 
     // see carry_mod.rs for explanation
     let quot_max_bits = trunc_len - 1 + (F::NUM_BITS as usize) - 1 - (modulus.bits() as usize);
@@ -53,19 +41,15 @@ pub fn crt<'a, F: PrimeField>(
     // we need to find `quot_native` as a native F element
 
     // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
-    let quot_vec = if let Some(a_big) = value_to_option(a.value.as_ref()) {
-        let (quot_val, _out_val) = a_big.div_mod_floor(modulus);
+    let (quot_val, _out_val) = a.value.div_mod_floor(modulus);
 
-        // only perform safety checks in display mode so we can turn them off in production
-        debug_assert_eq!(_out_val, BigInt::zero());
-        debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
+    // only perform safety checks in display mode so we can turn them off in production
+    debug_assert_eq!(_out_val, BigInt::zero());
+    debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
 
-        decompose_bigint_option::<F>(Value::known(&quot_val), k, n)
-    } else {
-        vec![Value::unknown(); k]
-    };
+    let quot_vec = decompose_bigint::<F>(&quot_val, k, n);
 
-    //assert!(modulus < &(BigUint::one() << (n * k)));
+    debug_assert!(modulus < &(BigInt::one() << (n * k)));
 
     // We need to show `modulus * quotient - a` is:
     // - congruent to `0 (mod 2^trunc_len)`
@@ -81,43 +65,24 @@ pub fn crt<'a, F: PrimeField>(
 
     let mut quot_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut check_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
-    let mut tmp_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
 
     // match chip.strategy {
     //    BigIntStrategy::Simple => {
     for (i, (a_limb, quot_v)) in a.truncation.limbs.iter().zip(quot_vec.into_iter()).enumerate() {
-        let (quot_cell, check_cell) = {
-            let prod = range.gate().inner_product_left(
-                ctx,
-                quot_assigned.iter().map(Existing).chain(iter::once(Witness(quot_v))),
-                mod_vec[0..=i].iter().rev().map(|c| Constant(*c)),
-                &mut tmp_assigned,
-            );
-
-            let quot_cell = tmp_assigned.pop().unwrap();
-            // perform step 2: compute prod - a + out
-            // transpose of:
-            // | prod | -1 | a | prod - a |
-
-            // This is to take care of edge case where we switch columns to handle overlap
-            let alloc = ctx.advice_alloc.get_mut(range.gate().context_id()).unwrap();
-            if alloc.1 + 3 >= ctx.max_rows {
-                // edge case, we need to copy the last `prod` cell
-                alloc.1 = 0;
-                alloc.0 += 1;
-                range.gate().assign_region_last(ctx, vec![Existing(&prod)], vec![]);
-            }
-
-            let check_val = prod.value().zip(a_limb.value()).map(|(prod, a)| *prod - a);
-            let check_cell = range.gate().assign_region_last(
-                ctx,
-                vec![Constant(-F::one()), Existing(a_limb), Witness(check_val)],
-                vec![(-1, None)],
-            );
-
-            (quot_cell, check_cell)
-        };
-        quot_assigned.push(quot_cell);
+        let (prod, new_quot_cell) = range.gate().inner_product_left_last(
+            ctx,
+            quot_assigned.iter().map(|x| Existing(*x)).chain(iter::once(Witness(quot_v))),
+            mod_vec[0..=i].iter().rev().map(|c| Constant(*c)),
+        );
+
+        // perform step 2: compute prod - a + out
+        // transpose of:
+        // | prod | -1 | a | prod - a |
+        let check_val = *prod.value() - a_limb.value();
+        let check_cell = ctx
+            .assign_region_last([Constant(-F::one()), Existing(*a_limb), Witness(check_val)], [-1]);
+
+        quot_assigned.push(new_quot_cell);
         check_assigned.push(check_cell);
     }
     //    }
@@ -126,35 +91,16 @@ pub fn crt<'a, F: PrimeField>(
     // range check that quot_cell in quot_assigned is in [-2^n, 2^n) except for last cell check it's in [-2^quot_last_limb_bits, 2^quot_last_limb_bits)
     for (q_index, quot_cell) in quot_assigned.iter().enumerate() {
         let limb_bits = if q_index == k - 1 { quot_last_limb_bits } else { n };
-        let limb_base = if q_index == k - 1 {
-            biguint_to_fe(&(BigUint::one() << limb_bits))
-        } else {
-            limb_bases[1]
-        };
+        let limb_base =
+            if q_index == k - 1 { range.gate().pow_of_two()[limb_bits] } else { limb_bases[1] };
 
         // compute quot_cell + 2^n and range check with n + 1 bits
-        let quot_shift = {
-            // TODO: unnecessary clone
-            let out_val = quot_cell.value().map(|a| limb_base + a);
-            // | quot_cell | 2^n | 1 | quot_cell + 2^n |
-            range.gate().assign_region_last(
-                ctx,
-                vec![
-                    Existing(quot_cell),
-                    Constant(limb_base),
-                    Constant(F::one()),
-                    Witness(out_val),
-                ],
-                vec![(0, None)],
-            )
-        };
-        range.range_check(ctx, &quot_shift, limb_bits + 1);
+        let quot_shift = range.gate().add(ctx, *quot_cell, Constant(limb_base));
+        range.range_check(ctx, quot_shift, limb_bits + 1);
     }
 
-    let check_overflow_int = &OverflowInteger::construct(
-        check_assigned,
-        max(a.truncation.max_limb_bits, 2 * n + k_bits),
-    );
+    let check_overflow_int =
+        OverflowInteger::construct(check_assigned, max(a.truncation.max_limb_bits, 2 * n + k_bits));
 
     // check that `modulus * quotient - a == 0 mod 2^{trunc_len}` after carry
     check_carry_to_zero::truncate::<F>(
@@ -167,23 +113,17 @@ pub fn crt<'a, F: PrimeField>(
     );
 
     // Constrain `quot_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-    let quot_native_assigned = OverflowInteger::<F>::evaluate(
+    let quot_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &quot_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        quot_assigned,
+        limb_bases.iter().copied(),
     );
 
     // Check `0 + modulus * quotient - a = 0` in native field
     // | 0 | modulus | quotient | a |
-    let _native_computation = range.gate().assign_region(
-        ctx,
-        vec![
-            Constant(F::zero()),
-            Constant(mod_native),
-            Existing(&quot_native_assigned),
-            Existing(&a.native),
-        ],
-        vec![(0, None)],
+    ctx.assign_region(
+        [Constant(F::zero()), Constant(mod_native), Existing(quot_native), Existing(a.native)],
+        [0],
     );
 }
diff --git a/halo2-ecc/src/bigint/check_carry_to_zero.rs b/halo2-ecc/src/bigint/check_carry_to_zero.rs
index e718b128..fa2f5648 100644
--- a/halo2-ecc/src/bigint/check_carry_to_zero.rs
+++ b/halo2-ecc/src/bigint/check_carry_to_zero.rs
@@ -1,13 +1,11 @@
 use super::OverflowInteger;
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{bigint_to_fe, biguint_to_fe, fe_to_bigint, value_to_option, PrimeField},
+    utils::{bigint_to_fe, fe_to_bigint, BigPrimeField},
     Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
-use num_traits::One;
+use num_bigint::BigInt;
 
 // check that `a` carries to `0 mod 2^{a.limb_bits * a.limbs.len()}`
 // same as `assign` above except we need to provide `c_{k - 1}` witness as well
@@ -26,10 +24,10 @@ use num_traits::One;
 // a_i * 2^{n*w} + a_{i - 1} * 2^{n*(w-1)} + ... + a_{i - w} + c_{i - w - 1} = c_i * 2^{n*(w+1)}
 // which is valid as long as `(m - n + EPSILON) + n * (w+1) < native_modulus::<F>().bits() - 1`
 // so we only need to range check `c_i` every `w + 1` steps, starting with `i = w`
-pub fn truncate<'a, F: PrimeField>(
+pub fn truncate<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
     limb_base_big: &BigInt,
@@ -37,27 +35,16 @@ pub fn truncate<'a, F: PrimeField>(
     let k = a.limbs.len();
     let max_limb_bits = a.max_limb_bits;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("check_carry_to_zero(trunc) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-    }
-
-    let mut carries: Vec<Value<BigInt>> = Vec::with_capacity(k);
+    let mut carries = Vec::with_capacity(k);
 
     for a_limb in a.limbs.iter() {
-        let a_val = a_limb.value();
-        let carry = a_val.map(|a_fe| {
-            let a_val_big = fe_to_bigint(a_fe);
-            if carries.is_empty() {
-                // warning: using >> on negative integer produces undesired effect
-                a_val_big / limb_base_big
-            } else {
-                let carry_val = value_to_option(carries.last().unwrap().as_ref()).unwrap();
-                (a_val_big + carry_val) / limb_base_big
-            }
-        });
+        let a_val_big = fe_to_bigint(a_limb.value());
+        let carry = if let Some(carry_val) = carries.last() {
+            (a_val_big + carry_val) / limb_base_big
+        } else {
+            // warning: using >> on negative integer produces undesired effect
+            a_val_big / limb_base_big
+        };
         carries.push(carry);
     }
 
@@ -69,44 +56,30 @@ pub fn truncate<'a, F: PrimeField>(
     // `window = w + 1` valid as long as `range_bits + n * (w+1) < native_modulus::<F>().bits() - 1`
     // let window = (F::NUM_BITS as usize - 2 - range_bits) / limb_bits;
     // assert!(window > 0);
+    // In practice, we are currently always using window = 1 so the above is commented out
 
-    // TODO: maybe we can also cache these bigints
-    let shift_val = biguint_to_fe::<F>(&(BigUint::one() << range_bits));
+    let shift_val = range.gate().pow_of_two()[range_bits];
     // let num_windows = (k - 1) / window + 1; // = ((k - 1) - (window - 1) + window - 1) / window + 1;
 
     let mut previous = None;
-    for (a_limb, carry) in a.limbs.iter().zip(carries.iter()) {
-        let neg_carry_val = carry.as_ref().map(|c| bigint_to_fe::<F>(&-c));
-        let neg_carry = range
-            .gate()
-            .assign_region(
-                ctx,
-                vec![
-                    Existing(a_limb),
-                    Witness(neg_carry_val),
-                    Constant(limb_base),
-                    previous.as_ref().map(Existing).unwrap_or_else(|| Constant(F::zero())),
-                ],
-                vec![(0, None)],
-            )
-            .into_iter()
-            .nth(1)
-            .unwrap();
+    for (a_limb, carry) in a.limbs.into_iter().zip(carries.into_iter()) {
+        let neg_carry_val = bigint_to_fe(&-carry);
+        ctx.assign_region(
+            [
+                Existing(a_limb),
+                Witness(neg_carry_val),
+                Constant(limb_base),
+                previous.map(Existing).unwrap_or_else(|| Constant(F::zero())),
+            ],
+            [0],
+        );
+        let neg_carry = ctx.get(-3);
 
         // i in 0..num_windows {
         // let idx = std::cmp::min(window * i + window - 1, k - 1);
         // let carry_cell = &neg_carry_assignments[idx];
-        let shifted_carry = {
-            let shift_carry_val = Value::known(shift_val) + neg_carry.value();
-            let cells = vec![
-                Existing(&neg_carry),
-                Constant(F::one()),
-                Constant(shift_val),
-                Witness(shift_carry_val),
-            ];
-            range.gate().assign_region_last(ctx, cells, vec![(0, None)])
-        };
-        range.range_check(ctx, &shifted_carry, range_bits + 1);
+        let shifted_carry = range.gate().add(ctx, neg_carry, Constant(shift_val));
+        range.range_check(ctx, shifted_carry, range_bits + 1);
 
         previous = Some(neg_carry);
     }
diff --git a/halo2-ecc/src/bigint/mod.rs b/halo2-ecc/src/bigint/mod.rs
index 41e080d5..a8c93bd2 100644
--- a/halo2-ecc/src/bigint/mod.rs
+++ b/halo2-ecc/src/bigint/mod.rs
@@ -1,17 +1,12 @@
-use crate::halo2_proofs::{
-    circuit::{Cell, Value},
-    plonk::ConstraintSystem,
-};
+use crate::halo2_proofs::circuit::Cell;
 use halo2_base::{
-    gates::flex_gate::{FlexGateConfig, GateInstructions},
-    utils::{biguint_to_fe, decompose_biguint, fe_to_biguint, PrimeField},
+    gates::flex_gate::GateInstructions,
+    utils::{biguint_to_fe, decompose_biguint, fe_to_biguint, BigPrimeField, ScalarField},
     AssignedValue, Context,
-    QuantumCell::{Constant, Existing, Witness},
+    QuantumCell::Constant,
 };
-use itertools::Itertools;
 use num_bigint::{BigInt, BigUint};
 use num_traits::Zero;
-use std::{marker::PhantomData, rc::Rc};
 
 pub mod add_no_carry;
 pub mod big_is_equal;
@@ -45,51 +40,50 @@ impl Default for BigIntStrategy {
 }
 
 #[derive(Clone, Debug)]
-pub struct OverflowInteger<'v, F: PrimeField> {
-    pub limbs: Vec<AssignedValue<'v, F>>,
+pub struct OverflowInteger<F: ScalarField> {
+    pub limbs: Vec<AssignedValue<F>>,
     // max bits of a limb, ignoring sign
     pub max_limb_bits: usize,
     // the standard limb bit that we use for pow of two limb base - to reduce overhead we just assume this is inferred from context (e.g., the chip stores it), so we stop storing it here
     // pub limb_bits: usize,
 }
 
-impl<'v, F: PrimeField> OverflowInteger<'v, F> {
-    pub fn construct(limbs: Vec<AssignedValue<'v, F>>, max_limb_bits: usize) -> Self {
+impl<F: ScalarField> OverflowInteger<F> {
+    pub fn construct(limbs: Vec<AssignedValue<F>>, max_limb_bits: usize) -> Self {
         Self { limbs, max_limb_bits }
     }
 
     // convenience function for testing
     #[cfg(test)]
-    pub fn to_bigint(&self, limb_bits: usize) -> Value<BigInt> {
+    pub fn to_bigint(&self, limb_bits: usize) -> BigInt
+    where
+        F: BigPrimeField,
+    {
         use halo2_base::utils::fe_to_bigint;
 
-        self.limbs.iter().rev().fold(Value::known(BigInt::zero()), |acc, acell| {
-            acc.zip(acell.value()).map(|(acc, x)| (acc << limb_bits) + fe_to_bigint(x))
-        })
+        self.limbs
+            .iter()
+            .rev()
+            .fold(BigInt::zero(), |acc, acell| (acc << limb_bits) + fe_to_bigint(acell.value()))
     }
 
     pub fn evaluate(
         gate: &impl GateInstructions<F>,
-        // chip: &BigIntConfig<F>,
-        ctx: &mut Context<'_, F>,
-        limbs: &[AssignedValue<'v, F>],
+        ctx: &mut Context<F>,
+        limbs: impl IntoIterator<Item = AssignedValue<F>>,
         limb_bases: impl IntoIterator<Item = F>,
-    ) -> AssignedValue<'v, F> {
+    ) -> AssignedValue<F> {
         // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-        gate.inner_product(
-            ctx,
-            limbs.iter().map(|a| Existing(a)),
-            limb_bases.into_iter().map(|c| Constant(c)),
-        )
+        gate.inner_product(ctx, limbs, limb_bases.into_iter().map(|c| Constant(c)))
     }
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedOverflowInteger<F: PrimeField> {
+pub struct FixedOverflowInteger<F: ScalarField> {
     pub limbs: Vec<F>,
 }
 
-impl<F: PrimeField> FixedOverflowInteger<F> {
+impl<F: BigPrimeField> FixedOverflowInteger<F> {
     pub fn construct(limbs: Vec<F>) -> Self {
         Self { limbs }
     }
@@ -109,30 +103,25 @@ impl<F: PrimeField> FixedOverflowInteger<F> {
             .fold(BigUint::zero(), |acc, x| (acc << limb_bits) + fe_to_biguint(x))
     }
 
-    pub fn assign<'v>(
-        self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
-        limb_bits: usize,
-    ) -> OverflowInteger<'v, F> {
-        let assigned_limbs = gate.assign_region(ctx, self.limbs.into_iter().map(Constant), vec![]);
+    pub fn assign(self, ctx: &mut Context<F>, limb_bits: usize) -> OverflowInteger<F> {
+        let assigned_limbs = self.limbs.into_iter().map(|limb| ctx.load_constant(limb)).collect();
         OverflowInteger::construct(assigned_limbs, limb_bits)
     }
 
     /// only use case is when coeffs has only a single 1, rest are 0
-    pub fn select_by_indicator<'v>(
+    pub fn select_by_indicator(
         gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
+        ctx: &mut Context<F>,
         a: &[Self],
-        coeffs: &[AssignedValue<'v, F>],
+        coeffs: &[AssignedValue<F>],
         limb_bits: usize,
-    ) -> OverflowInteger<'v, F> {
+    ) -> OverflowInteger<F> {
         let k = a[0].limbs.len();
 
         let out_limbs = (0..k)
             .map(|idx| {
                 let int_limbs = a.iter().map(|a| Constant(a.limbs[idx]));
-                gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+                gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
             })
             .collect();
 
@@ -141,7 +130,7 @@ impl<F: PrimeField> FixedOverflowInteger<F> {
 }
 
 #[derive(Clone, Debug)]
-pub struct CRTInteger<'v, F: PrimeField> {
+pub struct CRTInteger<F: ScalarField> {
     // keep track of an integer `a` using CRT as `a mod 2^t` and `a mod n`
     // where `t = truncation.limbs.len() * truncation.limb_bits`
     //       `n = modulus::<Fn>`
@@ -153,31 +142,31 @@ pub struct CRTInteger<'v, F: PrimeField> {
 
     // the IMPLICIT ASSUMPTION: `value (mod 2^t) = truncation` && `value (mod n) = native`
     // this struct should only be used if the implicit assumption above is satisfied
-    pub truncation: OverflowInteger<'v, F>,
-    pub native: AssignedValue<'v, F>,
-    pub value: Value<BigInt>,
+    pub truncation: OverflowInteger<F>,
+    pub native: AssignedValue<F>,
+    pub value: BigInt,
 }
 
-impl<'v, F: PrimeField> CRTInteger<'v, F> {
+impl<F: ScalarField> CRTInteger<F> {
     pub fn construct(
-        truncation: OverflowInteger<'v, F>,
-        native: AssignedValue<'v, F>,
-        value: Value<BigInt>,
+        truncation: OverflowInteger<F>,
+        native: AssignedValue<F>,
+        value: BigInt,
     ) -> Self {
         Self { truncation, native, value }
     }
 
-    pub fn native(&self) -> &AssignedValue<'v, F> {
+    pub fn native(&self) -> &AssignedValue<F> {
         &self.native
     }
 
-    pub fn limbs(&self) -> &[AssignedValue<'v, F>] {
+    pub fn limbs(&self) -> &[AssignedValue<F>] {
         self.truncation.limbs.as_slice()
     }
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedCRTInteger<F: PrimeField> {
+pub struct FixedCRTInteger<F: ScalarField> {
     // keep track of an integer `a` using CRT as `a mod 2^t` and `a mod n`
     // where `t = truncation.limbs.len() * truncation.limb_bits`
     //       `n = modulus::<Fn>`
@@ -194,13 +183,13 @@ pub struct FixedCRTInteger<F: PrimeField> {
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedAssignedCRTInteger<F: PrimeField> {
+pub struct FixedAssignedCRTInteger<F: ScalarField> {
     pub truncation: FixedOverflowInteger<F>,
     pub limb_fixed_cells: Vec<Cell>,
     pub value: BigUint,
 }
 
-impl<F: PrimeField> FixedCRTInteger<F> {
+impl<F: BigPrimeField> FixedCRTInteger<F> {
     pub fn construct(truncation: FixedOverflowInteger<F>, value: BigUint) -> Self {
         Self { truncation, value }
     }
@@ -212,90 +201,14 @@ impl<F: PrimeField> FixedCRTInteger<F> {
         Self { truncation, value }
     }
 
-    pub fn assign<'a>(
+    pub fn assign(
         self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
+        ctx: &mut Context<F>,
         limb_bits: usize,
         native_modulus: &BigUint,
-    ) -> CRTInteger<'a, F> {
-        let assigned_truncation = self.truncation.assign(gate, ctx, limb_bits);
-        let assigned_native = {
-            let native_cells = vec![Constant(biguint_to_fe(&(&self.value % native_modulus)))];
-            gate.assign_region_last(ctx, native_cells, vec![])
-        };
-        CRTInteger::construct(assigned_truncation, assigned_native, Value::known(self.value.into()))
-    }
-
-    pub fn assign_without_caching<'a>(
-        self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
-        limb_bits: usize,
-        native_modulus: &BigUint,
-    ) -> CRTInteger<'a, F> {
-        let fixed_cells = self
-            .truncation
-            .limbs
-            .iter()
-            .map(|limb| ctx.assign_fixed_without_caching(*limb))
-            .collect_vec();
-        let assigned_limbs = gate.assign_region(
-            ctx,
-            self.truncation.limbs.into_iter().map(|v| Witness(Value::known(v))),
-            vec![],
-        );
-        for (cell, acell) in fixed_cells.iter().zip(assigned_limbs.iter()) {
-            #[cfg(feature = "halo2-axiom")]
-            ctx.region.constrain_equal(cell, acell.cell());
-            #[cfg(feature = "halo2-pse")]
-            ctx.region.constrain_equal(*cell, acell.cell()).unwrap();
-        }
-        let assigned_native = {
-            let native_val = biguint_to_fe(&(&self.value % native_modulus));
-            let cell = ctx.assign_fixed_without_caching(native_val);
-            let acell =
-                gate.assign_region_last(ctx, vec![Witness(Value::known(native_val))], vec![]);
-
-            #[cfg(feature = "halo2-axiom")]
-            ctx.region.constrain_equal(&cell, acell.cell());
-            #[cfg(feature = "halo2-pse")]
-            ctx.region.constrain_equal(cell, acell.cell()).unwrap();
-
-            acell
-        };
-        CRTInteger::construct(
-            OverflowInteger::construct(assigned_limbs, limb_bits),
-            assigned_native,
-            Value::known(self.value.into()),
-        )
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-#[allow(dead_code)]
-pub struct BigIntConfig<F: PrimeField> {
-    // everything is empty if strategy is `Simple` or `SimplePlus`
-    strategy: BigIntStrategy,
-    context_id: Rc<String>,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField> BigIntConfig<F> {
-    pub fn configure(
-        _meta: &mut ConstraintSystem<F>,
-        strategy: BigIntStrategy,
-        _limb_bits: usize,
-        _num_limbs: usize,
-        _gate: &FlexGateConfig<F>,
-        context_id: String,
-    ) -> Self {
-        // let mut q_dot_constant = HashMap::new();
-        /*
-        match strategy {
-            _ => {}
-        }
-        */
-        Self { strategy, _marker: PhantomData, context_id: Rc::new(context_id) }
+    ) -> CRTInteger<F> {
+        let assigned_truncation = self.truncation.assign(ctx, limb_bits);
+        let assigned_native = ctx.load_constant(biguint_to_fe(&(&self.value % native_modulus)));
+        CRTInteger::construct(assigned_truncation, assigned_native, self.value.into())
     }
 }
diff --git a/halo2-ecc/src/bigint/mul_no_carry.rs b/halo2-ecc/src/bigint/mul_no_carry.rs
index 637c17e6..b6d5e745 100644
--- a/halo2-ecc/src/bigint/mul_no_carry.rs
+++ b/halo2-ecc/src/bigint/mul_no_carry.rs
@@ -1,35 +1,27 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context, QuantumCell::Existing};
 
-pub fn truncate<'v, F: PrimeField>(
+pub fn truncate<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    // _chip: &BigIntConfig<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     num_limbs_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
+) -> OverflowInteger<F> {
     let k = a.limbs.len();
-    assert!(k > 0);
     assert_eq!(k, b.limbs.len());
+    debug_assert!(k > 0);
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("mul_no_carry(truncate) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        assert!(
-            num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits <= F::NUM_BITS as usize - 2
-        );
-    }
+    debug_assert!(
+        num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits <= F::NUM_BITS as usize - 2
+    );
 
     let out_limbs = (0..k)
         .map(|i| {
             gate.inner_product(
                 ctx,
-                a.limbs[..=i].iter().map(Existing),
-                b.limbs[..=i].iter().rev().map(Existing),
+                a.limbs[..=i].iter().copied(),
+                b.limbs[..=i].iter().rev().map(|x| Existing(*x)),
             )
         })
         .collect();
@@ -37,17 +29,16 @@ pub fn truncate<'v, F: PrimeField>(
     OverflowInteger::construct(out_limbs, num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     num_limbs_log2_ceil: usize,
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     let out_trunc = truncate::<F>(gate, ctx, &a.truncation, &b.truncation, num_limbs_log2_ceil);
-    let out_native = gate.mul(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref() * b.value.as_ref();
+    let out_native = gate.mul(ctx, a.native, b.native);
+    let out_val = &a.value * &b.value;
 
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/negative.rs b/halo2-ecc/src/bigint/negative.rs
index 60183c3f..45a7d817 100644
--- a/halo2-ecc/src/bigint/negative.rs
+++ b/halo2-ecc/src/bigint/negative.rs
@@ -1,11 +1,11 @@
 use super::OverflowInteger;
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    let out_limbs = a.limbs.iter().map(|limb| gate.neg(ctx, Existing(limb))).collect();
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    let out_limbs = a.limbs.into_iter().map(|limb| gate.neg(ctx, limb)).collect();
     OverflowInteger::construct(out_limbs, a.max_limb_bits)
 }
diff --git a/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs b/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
index 1c64e24f..579aff01 100644
--- a/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
+++ b/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
@@ -1,49 +1,43 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::GateInstructions,
-    utils::{log2_ceil, PrimeField},
+    utils::{log2_ceil, ScalarField},
     Context,
-    QuantumCell::{Constant, Existing, Witness},
+    QuantumCell::Constant,
 };
 use std::cmp::max;
 
 /// compute a * c + b = b + a * c
 // this is uniquely suited for our simple gate
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     c_f: F,
     c_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
 
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| {
-            let out_val = a_limb.value().zip(b_limb.value()).map(|(a, b)| c_f * a + b);
-            gate.assign_region_last(
-                ctx,
-                vec![Existing(b_limb), Existing(a_limb), Constant(c_f), Witness(out_val)],
-                vec![(0, None)],
-            )
-        })
+        .map(|(&a_limb, &b_limb)| gate.mul_add(ctx, a_limb, Constant(c_f), b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits + c_log2_ceil, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+/// compute a * c + b = b + a * c
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     c: i64,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
 
     let (c_f, c_abs) = if c >= 0 {
         let c_abs = u64::try_from(c).unwrap();
@@ -54,14 +48,7 @@ pub fn crt<'v, F: PrimeField>(
     };
 
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation, c_f, log2_ceil(c_abs));
-    let out_native = {
-        let out_val = b.native.value().zip(a.native.value()).map(|(b, a)| c_f * a + b);
-        gate.assign_region_last(
-            ctx,
-            vec![Existing(&b.native), Existing(&a.native), Constant(c_f), Witness(out_val)],
-            vec![(0, None)],
-        )
-    };
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a * c + b);
+    let out_native = gate.mul_add(ctx, a.native, Constant(c_f), b.native);
+    let out_val = &a.value * c + &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/scalar_mul_no_carry.rs b/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
index 4aff4b0c..60029e92 100644
--- a/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
+++ b/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
@@ -1,29 +1,28 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::GateInstructions,
-    utils::{log2_ceil, PrimeField},
+    utils::{log2_ceil, ScalarField},
     Context,
-    QuantumCell::{Constant, Existing},
+    QuantumCell::Constant,
 };
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
     c_f: F,
     c_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
-    let out_limbs =
-        a.limbs.iter().map(|limb| gate.mul(ctx, Existing(limb), Constant(c_f))).collect();
+) -> OverflowInteger<F> {
+    let out_limbs = a.limbs.into_iter().map(|limb| gate.mul(ctx, limb, Constant(c_f))).collect();
     OverflowInteger::construct(out_limbs, a.max_limb_bits + c_log2_ceil)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     c: i64,
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     let (c_f, c_abs) = if c >= 0 {
         let c_abs = u64::try_from(c).unwrap();
         (F::from(c_abs), c_abs)
@@ -32,15 +31,11 @@ pub fn crt<'v, F: PrimeField>(
         (-F::from(c_abs), c_abs)
     };
 
-    let out_limbs = a
-        .truncation
-        .limbs
-        .iter()
-        .map(|limb| gate.mul(ctx, Existing(limb), Constant(c_f)))
-        .collect();
+    let out_limbs =
+        a.truncation.limbs.iter().map(|limb| gate.mul(ctx, *limb, Constant(c_f))).collect();
 
-    let out_native = gate.mul(ctx, Existing(&a.native), Constant(c_f));
-    let out_val = a.value.as_ref().map(|a| a * c);
+    let out_native = gate.mul(ctx, a.native, Constant(c_f));
+    let out_val = &a.value * c;
 
     CRTInteger::construct(
         OverflowInteger::construct(out_limbs, a.truncation.max_limb_bits + log2_ceil(c_abs)),
diff --git a/halo2-ecc/src/bigint/select.rs b/halo2-ecc/src/bigint/select.rs
index aa296164..1146eeb5 100644
--- a/halo2-ecc/src/bigint/select.rs
+++ b/halo2-ecc/src/bigint/select.rs
@@ -1,41 +1,39 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-    sel: &AssignedValue<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
+    b: OverflowInteger<F>,
+    sel: AssignedValue<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let out_limbs = a
         .limbs
-        .iter()
-        .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.select(ctx, Existing(a_limb), Existing(b_limb), Existing(sel)))
+        .into_iter()
+        .zip(b.limbs.into_iter())
+        .map(|(a_limb, b_limb)| gate.select(ctx, a_limb, b_limb, sel))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits))
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-    sel: &AssignedValue<'v, F>,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+    sel: AssignedValue<F>,
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
     let out_limbs = a
         .truncation
         .limbs
         .iter()
         .zip(b.truncation.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.select(ctx, Existing(a_limb), Existing(b_limb), Existing(sel)))
+        .map(|(&a_limb, &b_limb)| gate.select(ctx, a_limb, b_limb, sel))
         .collect();
 
     let out_trunc = OverflowInteger::construct(
@@ -43,13 +41,7 @@ pub fn crt<'v, F: PrimeField>(
         max(a.truncation.max_limb_bits, b.truncation.max_limb_bits),
     );
 
-    let out_native = gate.select(ctx, Existing(&a.native), Existing(&b.native), Existing(sel));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).zip(sel.value()).map(|((a, b), s)| {
-        if s.is_zero_vartime() {
-            b.clone()
-        } else {
-            a.clone()
-        }
-    });
+    let out_native = gate.select(ctx, a.native, b.native, sel);
+    let out_val = if sel.value().is_zero_vartime() { b.value.clone() } else { a.value.clone() };
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/select_by_indicator.rs b/halo2-ecc/src/bigint/select_by_indicator.rs
index 87597804..30aa5ab2 100644
--- a/halo2-ecc/src/bigint/select_by_indicator.rs
+++ b/halo2-ecc/src/bigint/select_by_indicator.rs
@@ -1,25 +1,22 @@
 use super::{CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 use num_bigint::BigInt;
 use num_traits::Zero;
 use std::cmp::max;
 
 /// only use case is when coeffs has only a single 1, rest are 0
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &[OverflowInteger<'v, F>],
-    coeffs: &[AssignedValue<'v, F>],
-) -> OverflowInteger<'v, F> {
+    ctx: &mut Context<F>,
+    a: &[OverflowInteger<F>],
+    coeffs: &[AssignedValue<F>],
+) -> OverflowInteger<F> {
     let k = a[0].limbs.len();
 
     let out_limbs = (0..k)
         .map(|idx| {
-            let int_limbs = a.iter().map(|a| Existing(&a.limbs[idx]));
-            gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+            let int_limbs = a.iter().map(|a| a.limbs[idx]);
+            gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
         })
         .collect();
 
@@ -29,20 +26,20 @@ pub fn assign<'v, F: PrimeField>(
 }
 
 /// only use case is when coeffs has only a single 1, rest are 0
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &[CRTInteger<'v, F>],
-    coeffs: &[AssignedValue<'v, F>],
+    ctx: &mut Context<F>,
+    a: &[CRTInteger<F>],
+    coeffs: &[AssignedValue<F>],
     limb_bases: &[F],
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     assert_eq!(a.len(), coeffs.len());
     let k = a[0].truncation.limbs.len();
 
     let out_limbs = (0..k)
         .map(|idx| {
-            let int_limbs = a.iter().map(|a| Existing(&a.truncation.limbs[idx]));
-            gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+            let int_limbs = a.iter().map(|a| a.truncation.limbs[idx]);
+            gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
         })
         .collect();
 
@@ -50,19 +47,22 @@ pub fn crt<'v, F: PrimeField>(
 
     let out_trunc = OverflowInteger::construct(out_limbs, max_limb_bits);
     let out_native = if a.len() > k {
-        OverflowInteger::<F>::evaluate(gate, ctx, &out_trunc.limbs, limb_bases[..k].iter().cloned())
+        OverflowInteger::<F>::evaluate(
+            gate,
+            ctx,
+            out_trunc.limbs.iter().copied(),
+            limb_bases[..k].iter().copied(),
+        )
     } else {
-        let a_native = a.iter().map(|x| Existing(&x.native));
-        gate.select_by_indicator(ctx, a_native, coeffs.iter())
+        let a_native = a.iter().map(|x| x.native);
+        gate.select_by_indicator(ctx, a_native, coeffs.iter().copied())
     };
-    let out_val = a.iter().zip(coeffs.iter()).fold(Value::known(BigInt::zero()), |acc, (x, y)| {
-        acc.zip(x.value.as_ref()).zip(y.value()).map(|((a, x), y)| {
-            if y.is_zero_vartime() {
-                a
-            } else {
-                x.clone()
-            }
-        })
+    let out_val = a.iter().zip(coeffs.iter()).fold(BigInt::zero(), |acc, (x, y)| {
+        if y.value().is_zero_vartime() {
+            acc
+        } else {
+            x.value.clone()
+        }
     });
 
     CRTInteger::construct(out_trunc, out_native, out_val)
diff --git a/halo2-ecc/src/bigint/sub.rs b/halo2-ecc/src/bigint/sub.rs
index 5e987f0c..2d4d83ff 100644
--- a/halo2-ecc/src/bigint/sub.rs
+++ b/halo2-ecc/src/bigint/sub.rs
@@ -1,61 +1,54 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::PrimeField,
+    utils::ScalarField,
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
 
 /// Should only be called on integers a, b in proper representation with all limbs having at most `limb_bits` number of bits
-pub fn assign<'a, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
-    b: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> (OverflowInteger<'a, F>, AssignedValue<'a, F>) {
-    assert!(a.max_limb_bits <= limb_bits);
-    assert!(b.max_limb_bits <= limb_bits);
-    assert_eq!(a.limbs.len(), b.limbs.len());
+) -> (OverflowInteger<F>, AssignedValue<F>) {
+    debug_assert!(a.max_limb_bits <= limb_bits);
+    debug_assert!(b.max_limb_bits <= limb_bits);
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let k = a.limbs.len();
     let mut out_limbs = Vec::with_capacity(k);
 
     let mut borrow: Option<AssignedValue<F>> = None;
-    for (a_limb, b_limb) in a.limbs.iter().zip(b.limbs.iter()) {
+    for (&a_limb, &b_limb) in a.limbs.iter().zip(b.limbs.iter()) {
         let (bottom, lt) = match borrow {
             None => {
-                let lt = range.is_less_than(ctx, Existing(a_limb), Existing(b_limb), limb_bits);
-                (b_limb.clone(), lt)
+                let lt = range.is_less_than(ctx, a_limb, b_limb, limb_bits);
+                (b_limb, lt)
             }
             Some(borrow) => {
-                let b_plus_borrow = range.gate().add(ctx, Existing(b_limb), Existing(&borrow));
-                let lt = range.is_less_than(
-                    ctx,
-                    Existing(a_limb),
-                    Existing(&b_plus_borrow),
-                    limb_bits + 1,
-                );
+                let b_plus_borrow = range.gate().add(ctx, b_limb, borrow);
+                let lt = range.is_less_than(ctx, a_limb, b_plus_borrow, limb_bits + 1);
                 (b_plus_borrow, lt)
             }
         };
         let out_limb = {
             // | a | lt | 2^n | a + lt * 2^n | -1 | bottom | a + lt * 2^n - bottom
-            let a_with_borrow_val =
-                a_limb.value().zip(lt.value()).map(|(a, lt)| limb_base * lt + a);
-            let out_val = a_with_borrow_val.zip(bottom.value()).map(|(ac, b)| ac - b);
-            range.gate().assign_region_last(
-                ctx,
-                vec![
+            let a_with_borrow_val = limb_base * lt.value() + a_limb.value();
+            let out_val = a_with_borrow_val - bottom.value();
+            ctx.assign_region_last(
+                [
                     Existing(a_limb),
-                    Existing(&lt),
+                    Existing(lt),
                     Constant(limb_base),
                     Witness(a_with_borrow_val),
                     Constant(-F::one()),
-                    Existing(&bottom),
+                    Existing(bottom),
                     Witness(out_val),
                 ],
-                vec![(0, None), (3, None)],
+                [0, 3],
             )
         };
         out_limbs.push(out_limb);
@@ -65,17 +58,17 @@ pub fn assign<'a, F: PrimeField>(
 }
 
 // returns (a-b, underflow), where underflow is nonzero iff a < b
-pub fn crt<'a, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
-    b: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> (CRTInteger<'a, F>, AssignedValue<'a, F>) {
+) -> (CRTInteger<F>, AssignedValue<F>) {
     let (out_trunc, underflow) =
         assign::<F>(range, ctx, &a.truncation, &b.truncation, limb_bits, limb_base);
-    let out_native = range.gate().sub(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a - b);
+    let out_native = range.gate().sub(ctx, a.native, b.native);
+    let out_val = &a.value - &b.value;
     (CRTInteger::construct(out_trunc, out_native, out_val), underflow)
 }
diff --git a/halo2-ecc/src/bigint/sub_no_carry.rs b/halo2-ecc/src/bigint/sub_no_carry.rs
index 2226027d..ae4bb8a3 100644
--- a/halo2-ecc/src/bigint/sub_no_carry.rs
+++ b/halo2-ecc/src/bigint/sub_no_carry.rs
@@ -1,32 +1,32 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.sub(ctx, Existing(a_limb), Existing(b_limb)))
+        .map(|(&a_limb, &b_limb)| gate.sub(ctx, a_limb, b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> CRTInteger<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> CRTInteger<F> {
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.sub(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a - b);
+    let out_native = gate.sub(ctx, a.native, b.native);
+    let out_val = &a.value - &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bn254/configs/msm_circuit.config b/halo2-ecc/src/bn254/configs/msm_circuit.config
deleted file mode 100644
index 9246e19f..00000000
--- a/halo2-ecc/src/bn254/configs/msm_circuit.config
+++ /dev/null
@@ -1 +0,0 @@
-{"strategy":"Simple","degree":20,"num_advice":10,"num_lookup_advice":2,"num_fixed":1,"lookup_bits":19,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/final_exp.rs b/halo2-ecc/src/bn254/final_exp.rs
index e131f7d5..9ab45daa 100644
--- a/halo2-ecc/src/bn254/final_exp.rs
+++ b/halo2-ecc/src/bn254/final_exp.rs
@@ -5,34 +5,34 @@ use crate::halo2_proofs::{
 };
 use crate::{
     ecc::get_naf,
-    fields::{fp12::mul_no_carry_w6, FieldChip, FieldExtPoint},
+    fields::{fp12::mul_no_carry_w6, FieldChip, FieldExtPoint, PrimeField},
 };
 use halo2_base::{
     gates::GateInstructions,
-    utils::{fe_to_biguint, modulus, PrimeField},
+    utils::{fe_to_biguint, modulus},
     Context,
-    QuantumCell::{Constant, Existing},
+    QuantumCell::Constant,
 };
 use num_bigint::BigUint;
 
 const XI_0: i64 = 9;
 
-impl<'a, F: PrimeField> Fp12Chip<'a, F> {
+impl<'chip, F: PrimeField> Fp12Chip<'chip, F> {
     // computes a ** (p ** power)
     // only works for p = 3 (mod 4) and p = 1 (mod 6)
-    pub fn frobenius_map<'v>(
+    pub fn frobenius_map(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
         power: usize,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         assert_eq!(modulus::<Fq>() % 4u64, BigUint::from(3u64));
         assert_eq!(modulus::<Fq>() % 6u64, BigUint::from(1u64));
         assert_eq!(a.coeffs.len(), 12);
         let pow = power % 12;
         let mut out_fp2 = Vec::with_capacity(6);
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
         for i in 0..6 {
             let frob_coeff = FROBENIUS_COEFF_FQ12_C1[pow].pow_vartime([i as u64]);
             // possible optimization (not implemented): load `frob_coeff` as we multiply instead of loading first
@@ -68,12 +68,12 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // exp is in little-endian
-    pub fn pow<'v>(
+    pub fn pow(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
         exp: Vec<u64>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         let mut res = a.clone();
         let mut is_started = false;
         let naf = get_naf(exp);
@@ -106,10 +106,10 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
     /// in = g0 + g2 w + g4 w^2 + g1 w^3 + g3 w^4 + g5 w^5 where g_i = g_i0 + g_i1 * u are elements of Fp2
     /// out = Compress(in) = [ g2, g3, g4, g5 ]
-    pub fn cyclotomic_compress<'v>(
+    pub fn cyclotomic_compress(
         &self,
-        a: &FieldExtPoint<FpPoint<'v, F>>,
-    ) -> Vec<FieldExtPoint<FpPoint<'v, F>>> {
+        a: &FieldExtPoint<FpPoint<F>>,
+    ) -> Vec<FieldExtPoint<FpPoint<F>>> {
         let g2 = FieldExtPoint::construct(vec![a.coeffs[1].clone(), a.coeffs[1 + 6].clone()]);
         let g3 = FieldExtPoint::construct(vec![a.coeffs[4].clone(), a.coeffs[4 + 6].clone()]);
         let g4 = FieldExtPoint::construct(vec![a.coeffs[2].clone(), a.coeffs[2 + 6].clone()]);
@@ -129,14 +129,14 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     ///     if g2 = 0:
     ///         g1 = (2 g4 * g5)/g3
     ///         g0 = (2 g1^2 - 3 g3 * g4) * c + 1    
-    pub fn cyclotomic_decompress<'v>(
+    pub fn cyclotomic_decompress(
         &self,
-        ctx: &mut Context<'v, F>,
-        compression: Vec<FieldExtPoint<FpPoint<'v, F>>>,
-    ) -> FieldExtPoint<FpPoint<'v, F>> {
-        let [g2, g3, g4, g5]: [FieldExtPoint<FpPoint<'v, F>>; 4] = compression.try_into().unwrap();
+        ctx: &mut Context<F>,
+        compression: Vec<FieldExtPoint<FpPoint<F>>>,
+    ) -> FieldExtPoint<FpPoint<F>> {
+        let [g2, g3, g4, g5]: [FieldExtPoint<FpPoint<F>>; 4] = compression.try_into().unwrap();
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
         let g5_sq = fp2_chip.mul_no_carry(ctx, &g5, &g5);
         let g5_sq_c = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, &g5_sq);
 
@@ -156,7 +156,7 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
         let g2_is_zero = fp2_chip.is_zero(ctx, &g2);
         // resulting `g1` is already in "carried" format (witness is in `[0, p)`)
-        let g1 = fp2_chip.select(ctx, &g1_0, &g1_1, &g2_is_zero);
+        let g1 = fp2_chip.select(ctx, &g1_0, &g1_1, g2_is_zero);
 
         // share the computation of 2 g1^2 between the two cases
         let g1_sq = fp2_chip.mul_no_carry(ctx, &g1, &g1);
@@ -166,20 +166,16 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
         let g3_g4 = fp2_chip.mul_no_carry(ctx, &g3, &g4);
         let g3_g4_3 = fp2_chip.scalar_mul_no_carry(ctx, &g3_g4, 3);
         let temp = fp2_chip.add_no_carry(ctx, &g1_sq_2, &g2_g5);
-        let temp = fp2_chip.select(ctx, &g1_sq_2, &temp, &g2_is_zero);
+        let temp = fp2_chip.select(ctx, &g1_sq_2, &temp, g2_is_zero);
         let temp = fp2_chip.sub_no_carry(ctx, &temp, &g3_g4_3);
         let mut g0 = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, &temp);
 
         // compute `g0 + 1`
-        g0.coeffs[0].truncation.limbs[0] = fp2_chip.range().gate.add(
-            ctx,
-            Existing(&g0.coeffs[0].truncation.limbs[0]),
-            Constant(F::one()),
-        );
-        g0.coeffs[0].native =
-            fp2_chip.range().gate.add(ctx, Existing(&g0.coeffs[0].native), Constant(F::one()));
+        g0.coeffs[0].truncation.limbs[0] =
+            fp2_chip.gate().add(ctx, g0.coeffs[0].truncation.limbs[0], Constant(F::one()));
+        g0.coeffs[0].native = fp2_chip.gate().add(ctx, g0.coeffs[0].native, Constant(F::one()));
         g0.coeffs[0].truncation.max_limb_bits += 1;
-        g0.coeffs[0].value = g0.coeffs[0].value.as_ref().map(|v| v + 1usize);
+        g0.coeffs[0].value += 1usize;
 
         // finally, carry g0
         g0 = fp2_chip.carry_mod(ctx, &g0);
@@ -217,18 +213,18 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     //  A_ij = (g_i + g_j)(g_i + c g_j)
     //  B_ij = g_i g_j
 
-    pub fn cyclotomic_square<'v>(
+    pub fn cyclotomic_square(
         &self,
-        ctx: &mut Context<'v, F>,
-        compression: &[FieldExtPoint<FpPoint<'v, F>>],
-    ) -> Vec<FieldExtPoint<FpPoint<'v, F>>> {
+        ctx: &mut Context<F>,
+        compression: &[FieldExtPoint<FpPoint<F>>],
+    ) -> Vec<FieldExtPoint<FpPoint<F>>> {
         assert_eq!(compression.len(), 4);
         let g2 = &compression[0];
         let g3 = &compression[1];
         let g4 = &compression[2];
         let g5 = &compression[3];
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
 
         let g2_plus_g3 = fp2_chip.add_no_carry(ctx, g2, g3);
         let cg3 = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, g3);
@@ -266,12 +262,12 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // exp is in little-endian
-    pub fn cyclotomic_pow<'v>(
+    pub fn cyclotomic_pow(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: FieldExtPoint<FpPoint<'v, F>>,
+        ctx: &mut Context<F>,
+        a: FieldExtPoint<FpPoint<F>>,
         exp: Vec<u64>,
-    ) -> FieldExtPoint<FpPoint<'v, F>> {
+    ) -> FieldExtPoint<FpPoint<F>> {
         let mut compression = self.cyclotomic_compress(&a);
         let mut out = None;
         let mut is_started = false;
@@ -304,11 +300,11 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
     #[allow(non_snake_case)]
     // use equation for (p^4 - p^2 + 1)/r in Section 5 of https://eprint.iacr.org/2008/490.pdf for BN curves
-    pub fn hard_part_BN<'v>(
+    pub fn hard_part_BN(
         &self,
-        ctx: &mut Context<'v, F>,
-        m: <Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        m: <Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         // x = BN_X
 
         // m^p
@@ -372,25 +368,24 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // out = in^{ (q^6 - 1)*(q^2 + 1) }
-    pub fn easy_part<'v>(
+    pub fn easy_part(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         // a^{q^6} = conjugate of a
         let f1 = self.conjugate(ctx, a);
         let f2 = self.divide(ctx, &f1, a);
         let f3 = self.frobenius_map(ctx, &f2, 2);
-        let f = self.mul(ctx, &f3, &f2);
-        f
+        self.mul(ctx, &f3, &f2)
     }
 
     // out = in^{(q^12 - 1)/r}
-    pub fn final_exp<'v>(
+    pub fn final_exp(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         let f0 = self.easy_part(ctx, a);
         let f = self.hard_part_BN(ctx, f0);
         f
diff --git a/halo2-ecc/src/bn254/mod.rs b/halo2-ecc/src/bn254/mod.rs
index 5f5db57b..6640f729 100644
--- a/halo2-ecc/src/bn254/mod.rs
+++ b/halo2-ecc/src/bn254/mod.rs
@@ -7,11 +7,11 @@ use crate::{
 pub mod final_exp;
 pub mod pairing;
 
-type FpChip<F> = fp::FpConfig<F, Fq>;
-type FpPoint<'v, F> = CRTInteger<'v, F>;
-type FqPoint<'v, F> = FieldExtPoint<FpPoint<'v, F>>;
-type Fp2Chip<'a, F> = fp2::Fp2Chip<'a, F, FpChip<F>, Fq2>;
-type Fp12Chip<'a, F> = fp12::Fp12Chip<'a, F, FpChip<F>, Fq12, 9>;
+pub type FpChip<'range, F> = fp::FpChip<'range, F, Fq>;
+pub type FpPoint<F> = CRTInteger<F>;
+pub type FqPoint<F> = FieldExtPoint<FpPoint<F>>;
+pub type Fp2Chip<'chip, F> = fp2::Fp2Chip<'chip, F, FpChip<'chip, F>, Fq2>;
+pub type Fp12Chip<'chip, F> = fp12::Fp12Chip<'chip, F, FpChip<'chip, F>, Fq12, 9>;
 
 #[cfg(test)]
 pub(crate) mod tests;
diff --git a/halo2-ecc/src/bn254/pairing.rs b/halo2-ecc/src/bn254/pairing.rs
index 2502ea48..cc4c9a87 100644
--- a/halo2-ecc/src/bn254/pairing.rs
+++ b/halo2-ecc/src/bn254/pairing.rs
@@ -1,20 +1,14 @@
 #![allow(non_snake_case)]
 use super::{Fp12Chip, Fp2Chip, FpChip, FpPoint, FqPoint};
-use crate::halo2_proofs::{
-    circuit::Value,
-    halo2curves::bn256::{self, G1Affine, G2Affine, SIX_U_PLUS_2_NAF},
-    halo2curves::bn256::{Fq, Fq2, FROBENIUS_COEFF_FQ12_C1},
-    plonk::ConstraintSystem,
+use crate::halo2_proofs::halo2curves::bn256::{
+    G1Affine, G2Affine, FROBENIUS_COEFF_FQ12_C1, SIX_U_PLUS_2_NAF,
 };
 use crate::{
     ecc::{EcPoint, EccChip},
-    fields::{fp::FpStrategy, fp12::mul_no_carry_w6},
-    fields::{FieldChip, FieldExtPoint},
-};
-use halo2_base::{
-    utils::{biguint_to_fe, fe_to_biguint, PrimeField},
-    Context,
+    fields::fp12::mul_no_carry_w6,
+    fields::{FieldChip, FieldExtPoint, PrimeField},
 };
+use halo2_base::Context;
 use num_bigint::BigUint;
 
 const XI_0: i64 = 9;
@@ -27,12 +21,12 @@ const XI_0: i64 = 9;
 //  line_{Psi(Q0), Psi(Q1)}(P) where Psi(x,y) = (w^2 x, w^3 y)
 //  - equals w^3 (y_1 - y_2) X + w^2 (x_2 - x_1) Y + w^5 (x_1 y_2 - x_2 y_1) =: out3 * w^3 + out2 * w^2 + out5 * w^5 where out2, out3, out5 are Fp2 points
 // Output is [None, None, out2, out3, None, out5] as vector of `Option<FqPoint>`s
-pub fn sparse_line_function_unequal<'a, F: PrimeField>(
+pub fn sparse_line_function_unequal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    Q: (&EcPoint<F, FqPoint<'a, F>>, &EcPoint<F, FqPoint<'a, F>>),
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> Vec<Option<FqPoint<'a, F>>> {
+    ctx: &mut Context<F>,
+    Q: (&EcPoint<F, FqPoint<F>>, &EcPoint<F, FqPoint<F>>),
+    P: &EcPoint<F, FpPoint<F>>,
+) -> Vec<Option<FqPoint<F>>> {
     let (x_1, y_1) = (&Q.0.x, &Q.0.y);
     let (x_2, y_2) = (&Q.1.x, &Q.1.y);
     let (X, Y) = (&P.x, &P.y);
@@ -66,12 +60,12 @@ pub fn sparse_line_function_unequal<'a, F: PrimeField>(
 //  line_{Psi(Q), Psi(Q)}(P) where Psi(x,y) = (w^2 x, w^3 y)
 //  - equals (3x^3 - 2y^2)(XI_0 + u) + w^4 (-3 x^2 * Q.x) + w^3 (2 y * Q.y) =: out0 + out4 * w^4 + out3 * w^3 where out0, out3, out4 are Fp2 points
 // Output is [out0, None, None, out3, out4, None] as vector of `Option<FqPoint>`s
-pub fn sparse_line_function_equal<'a, F: PrimeField>(
+pub fn sparse_line_function_equal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    Q: &EcPoint<F, FqPoint<'a, F>>,
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> Vec<Option<FqPoint<'a, F>>> {
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
+) -> Vec<Option<FqPoint<F>>> {
     let (x, y) = (&Q.x, &Q.y);
     assert_eq!(x.coeffs.len(), 2);
     assert_eq!(y.coeffs.len(), 2);
@@ -101,12 +95,12 @@ pub fn sparse_line_function_equal<'a, F: PrimeField>(
 
 // multiply Fp12 point `a` with Fp12 point `b` where `b` is len 6 vector of Fp2 points, where some are `None` to represent zero.
 // Assumes `b` is not vector of all `None`s
-pub fn sparse_fp12_multiply<'a, F: PrimeField>(
+pub fn sparse_fp12_multiply<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    a: &FqPoint<'a, F>,
-    b_fp2_coeffs: &Vec<Option<FqPoint<'a, F>>>,
-) -> FieldExtPoint<FpPoint<'a, F>> {
+    ctx: &mut Context<F>,
+    a: &FqPoint<F>,
+    b_fp2_coeffs: &Vec<Option<FqPoint<F>>>,
+) -> FieldExtPoint<FpPoint<F>> {
     assert_eq!(a.coeffs.len(), 12);
     assert_eq!(b_fp2_coeffs.len(), 6);
     let mut a_fp2_coeffs = Vec::with_capacity(6);
@@ -168,13 +162,13 @@ pub fn sparse_fp12_multiply<'a, F: PrimeField>(
 // - P is point in E(Fp)
 // Output:
 // - out = g * l_{Psi(Q0), Psi(Q1)}(P) as Fp12 point
-pub fn fp12_multiply_with_line_unequal<'a, F: PrimeField>(
+pub fn fp12_multiply_with_line_unequal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    g: &FqPoint<'a, F>,
-    Q: (&EcPoint<F, FqPoint<'a, F>>, &EcPoint<F, FqPoint<'a, F>>),
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> FqPoint<'a, F> {
+    ctx: &mut Context<F>,
+    g: &FqPoint<F>,
+    Q: (&EcPoint<F, FqPoint<F>>, &EcPoint<F, FqPoint<F>>),
+    P: &EcPoint<F, FpPoint<F>>,
+) -> FqPoint<F> {
     let line = sparse_line_function_unequal::<F>(fp2_chip, ctx, Q, P);
     sparse_fp12_multiply::<F>(fp2_chip, ctx, g, &line)
 }
@@ -185,13 +179,13 @@ pub fn fp12_multiply_with_line_unequal<'a, F: PrimeField>(
 // - P is point in E(Fp)
 // Output:
 // - out = g * l_{Psi(Q), Psi(Q)}(P) as Fp12 point
-pub fn fp12_multiply_with_line_equal<'a, F: PrimeField>(
+pub fn fp12_multiply_with_line_equal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    g: &FqPoint<'a, F>,
-    Q: &EcPoint<F, FqPoint<'a, F>>,
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> FqPoint<'a, F> {
+    ctx: &mut Context<F>,
+    g: &FqPoint<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
+) -> FqPoint<F> {
     let line = sparse_line_function_equal::<F>(fp2_chip, ctx, Q, P);
     sparse_fp12_multiply::<F>(fp2_chip, ctx, g, &line)
 }
@@ -214,13 +208,13 @@ pub fn fp12_multiply_with_line_equal<'a, F: PrimeField>(
 //  - `0 <= loop_count < r` and `loop_count < p` (to avoid [loop_count]Q' = Frob_p(Q'))
 //  - x^3 + b = 0 has no solution in Fp2, i.e., the y-coordinate of Q cannot be 0.
 
-pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    P: &EcPoint<F, FpPoint<'b, F>>,
+pub fn miller_loop_BN<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
     pseudo_binary_encoding: &[i8],
-) -> FqPoint<'b, F> {
+) -> FqPoint<F> {
     let mut i = pseudo_binary_encoding.len() - 1;
     while pseudo_binary_encoding[i] == 0 {
         i -= 1;
@@ -257,7 +251,7 @@ pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
 
     loop {
         if i != last_index - 1 {
-            let fp12_chip = Fp12Chip::<F>::construct(ecc_chip.field_chip.fp_chip);
+            let fp12_chip = Fp12Chip::<F>::new(ecc_chip.field_chip.fp_chip);
             let f_sq = fp12_chip.mul(ctx, &f, &f);
             f = fp12_multiply_with_line_equal::<F>(ecc_chip.field_chip(), ctx, &f_sq, &R, P);
         }
@@ -299,12 +293,12 @@ pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
 
 // let pairs = [(a_i, b_i)], a_i in G_1, b_i in G_2
 // output is Prod_i e'(a_i, b_i), where e'(a_i, b_i) is the output of `miller_loop_BN(b_i, a_i)`
-pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    pairs: Vec<(&EcPoint<F, FpPoint<'b, F>>, &EcPoint<F, FqPoint<'b, F>>)>,
+pub fn multi_miller_loop_BN<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    pairs: Vec<(&EcPoint<F, FpPoint<F>>, &EcPoint<F, FqPoint<F>>)>,
     pseudo_binary_encoding: &[i8],
-) -> FqPoint<'b, F> {
+) -> FqPoint<F> {
     let mut i = pseudo_binary_encoding.len() - 1;
     while pseudo_binary_encoding[i] == 0 {
         i -= 1;
@@ -344,7 +338,7 @@ pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
 
     i -= 1;
     let mut r = pairs.iter().map(|pair| pair.1.clone()).collect::<Vec<_>>();
-    let fp12_chip = Fp12Chip::<F>::construct(ecc_chip.field_chip.fp_chip);
+    let fp12_chip = Fp12Chip::<F>::new(ecc_chip.field_chip.fp_chip);
     loop {
         if i != last_index - 1 {
             f = fp12_chip.mul(ctx, &f, &f);
@@ -401,13 +395,13 @@ pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
 // - coeff[1][2], coeff[1][3] as assigned cells: this is an optimization to avoid loading new constants
 // Output:
 // - (coeff[1][2] * x^p, coeff[1][3] * y^p) point in E(Fp2)
-pub fn twisted_frobenius<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    c2: &FqPoint<'b, F>,
-    c3: &FqPoint<'b, F>,
-) -> EcPoint<F, FqPoint<'b, F>> {
+pub fn twisted_frobenius<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    c2: &FqPoint<F>,
+    c3: &FqPoint<F>,
+) -> EcPoint<F, FqPoint<F>> {
     assert_eq!(c2.coeffs.len(), 2);
     assert_eq!(c3.coeffs.len(), 2);
 
@@ -424,13 +418,13 @@ pub fn twisted_frobenius<'a, 'b, F: PrimeField>(
 // - Q = (x, y) point in E(Fp2)
 // Output:
 // - (coeff[1][2] * x^p, coeff[1][3] * -y^p) point in E(Fp2)
-pub fn neg_twisted_frobenius<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    c2: &FqPoint<'b, F>,
-    c3: &FqPoint<'b, F>,
-) -> EcPoint<F, FqPoint<'b, F>> {
+pub fn neg_twisted_frobenius<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    c2: &FqPoint<F>,
+    c3: &FqPoint<F>,
+) -> EcPoint<F, FqPoint<F>> {
     assert_eq!(c2.coeffs.len(), 2);
     assert_eq!(c3.coeffs.len(), 2);
 
@@ -442,80 +436,38 @@ pub fn neg_twisted_frobenius<'a, 'b, F: PrimeField>(
 }
 
 // To avoid issues with mutably borrowing twice (not allowed in Rust), we only store fp_chip and construct g2_chip and fp12_chip in scope when needed for temporary mutable borrows
-pub struct PairingChip<'a, F: PrimeField> {
-    pub fp_chip: &'a FpChip<F>,
+pub struct PairingChip<'chip, F: PrimeField> {
+    pub fp_chip: &'chip FpChip<'chip, F>,
 }
 
-impl<'a, F: PrimeField> PairingChip<'a, F> {
-    pub fn construct(fp_chip: &'a FpChip<F>) -> Self {
+impl<'chip, F: PrimeField> PairingChip<'chip, F> {
+    pub fn new(fp_chip: &'chip FpChip<F>) -> Self {
         Self { fp_chip }
     }
 
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        context_id: usize,
-        k: usize,
-    ) -> FpChip<F> {
-        FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            halo2_base::utils::modulus::<Fq>(),
-            context_id,
-            k,
-        )
+    pub fn load_private_g1(&self, ctx: &mut Context<F>, point: G1Affine) -> EcPoint<F, FpPoint<F>> {
+        let g1_chip = EccChip::new(self.fp_chip);
+        g1_chip.load_private(ctx, (point.x, point.y))
     }
 
-    pub fn load_private_g1<'v>(
+    pub fn load_private_g2(
         &self,
-        ctx: &mut Context<'_, F>,
-        point: Value<G1Affine>,
-    ) -> EcPoint<F, FpPoint<'v, F>> {
-        // go from pse/pairing::bn256::Fq to forked Fq
-        let convert_fp = |x: bn256::Fq| biguint_to_fe(&fe_to_biguint(&x));
-        let g1_chip = EccChip::construct(self.fp_chip.clone());
-        g1_chip
-            .load_private(ctx, (point.map(|pt| convert_fp(pt.x)), point.map(|pt| convert_fp(pt.y))))
-    }
-
-    pub fn load_private_g2<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        point: Value<G2Affine>,
-    ) -> EcPoint<F, FieldExtPoint<FpPoint<'v, F>>> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
-        // go from pse/pairing::bn256::Fq2 to forked public Fq2
-        let convert_fp2 = |c0: bn256::Fq, c1: bn256::Fq| Fq2 {
-            c0: biguint_to_fe(&fe_to_biguint(&c0)),
-            c1: biguint_to_fe(&fe_to_biguint(&c1)),
-        };
-        let x = point.map(|pt| convert_fp2(pt.x.c0, pt.x.c1));
-        let y = point.map(|pt| convert_fp2(pt.y.c0, pt.y.c1));
-
-        g2_chip.load_private(ctx, (x, y))
+        ctx: &mut Context<F>,
+        point: G2Affine,
+    ) -> EcPoint<F, FieldExtPoint<FpPoint<F>>> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
+        g2_chip.load_private(ctx, (point.x, point.y))
     }
 
-    pub fn miller_loop<'v>(
+    pub fn miller_loop(
         &self,
-        ctx: &mut Context<'v, F>,
-        Q: &EcPoint<F, FqPoint<'v, F>>,
-        P: &EcPoint<F, FpPoint<'v, F>>,
-    ) -> FqPoint<'v, F> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
+        ctx: &mut Context<F>,
+        Q: &EcPoint<F, FqPoint<F>>,
+        P: &EcPoint<F, FpPoint<F>>,
+    ) -> FqPoint<F> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
         miller_loop_BN::<F>(
             &g2_chip,
             ctx,
@@ -525,13 +477,13 @@ impl<'a, F: PrimeField> PairingChip<'a, F> {
         )
     }
 
-    pub fn multi_miller_loop<'v>(
+    pub fn multi_miller_loop(
         &self,
-        ctx: &mut Context<'v, F>,
-        pairs: Vec<(&EcPoint<F, FpPoint<'v, F>>, &EcPoint<F, FqPoint<'v, F>>)>,
-    ) -> FqPoint<'v, F> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
+        ctx: &mut Context<F>,
+        pairs: Vec<(&EcPoint<F, FpPoint<F>>, &EcPoint<F, FqPoint<F>>)>,
+    ) -> FqPoint<F> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
         multi_miller_loop_BN::<F>(
             &g2_chip,
             ctx,
@@ -540,20 +492,20 @@ impl<'a, F: PrimeField> PairingChip<'a, F> {
         )
     }
 
-    pub fn final_exp<'v>(&self, ctx: &mut Context<'v, F>, f: &FqPoint<'v, F>) -> FqPoint<'v, F> {
-        let fp12_chip = Fp12Chip::<F>::construct(self.fp_chip);
+    pub fn final_exp(&self, ctx: &mut Context<F>, f: &FqPoint<F>) -> FqPoint<F> {
+        let fp12_chip = Fp12Chip::<F>::new(self.fp_chip);
         fp12_chip.final_exp(ctx, f)
     }
 
     // optimal Ate pairing
-    pub fn pairing<'v>(
+    pub fn pairing(
         &self,
-        ctx: &mut Context<'v, F>,
-        Q: &EcPoint<F, FqPoint<'v, F>>,
-        P: &EcPoint<F, FpPoint<'v, F>>,
-    ) -> FqPoint<'v, F> {
+        ctx: &mut Context<F>,
+        Q: &EcPoint<F, FqPoint<F>>,
+        P: &EcPoint<F, FpPoint<F>>,
+    ) -> FqPoint<F> {
         let f0 = self.miller_loop(ctx, Q, P);
-        let fp12_chip = Fp12Chip::<F>::construct(self.fp_chip);
+        let fp12_chip = Fp12Chip::<F>::new(self.fp_chip);
         // final_exp implemented in final_exp module
         fp12_chip.final_exp(ctx, &f0)
     }
diff --git a/halo2-ecc/src/bn254/results/msm_bench_internal.csv b/halo2-ecc/src/bn254/results/msm_bench_internal.csv
deleted file mode 100644
index 173d5ce1..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_internal.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,89,12,1,16,88,3,100,4,20.523902161s,30976,20.769379ms
-18,45,6,1,17,88,3,100,4,14.604765528s,15808,63.163377ms
-19,22,3,1,18,90,3,100,4,9.98081942s,7936,9.676845ms
-20,11,2,1,19,90,3,100,4,10.668871495s,4352,6.639454ms
-21,6,1,1,20,88,3,100,4,13.530348447s,2496,5.640048ms
-21,21,3,1,20,88,3,400,4,35.876681956s,7712,8.85568ms
diff --git a/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv b/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
deleted file mode 100644
index 49ab0447..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,96,12,1,16,88,3,100,4,56.599245791s,33056,20.015083ms
-18,48,6,1,17,88,3,100,4,58.546402708s,16736,11.798ms
-19,24,3,1,18,90,3,100,4,61.127382s,8512,6.766125ms
-20,12,2,1,19,90,3,100,4,72.688734375s,4704,5.345125ms
-21,6,1,1,20,88,3,100,4,84.217528875s,2496,3.600333ms
diff --git a/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv b/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
deleted file mode 100644
index 9179220f..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,80,12,1,16,88,3,100,4,50.274314958s,36128,17.630791ms
-18,40,6,1,17,88,3,100,4,50.396009708s,18272,10.080583ms
-19,20,3,1,18,90,3,100,4,51.876326291s,9280,6.106458ms
-20,10,2,1,19,90,3,100,4,63.421609541s,5088,4.518875ms
-21,5,1,1,20,88,3,100,4,81.70901675s,2752,4.345875ms
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/results/pairing_bench_results.txt b/halo2-ecc/src/bn254/results/pairing_bench_results.txt
deleted file mode 100644
index 09371a73..00000000
--- a/halo2-ecc/src/bn254/results/pairing_bench_results.txt
+++ /dev/null
@@ -1,692 +0,0 @@
----------------------- degree = 22 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 27.278246583s
-Time elapsed in generating vkey: 13.992930625s
-test bn254::tests::bench_pairing has been running for over 60 seconds
-Time elapsed in generating pkey: 45.861797958s
-Time elapsed in filling circuit: 243.584µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x23f8716681b3e55143936e24d3079b55ffe7c32514d3173641e2dfb3d8a3cf43,
-                c1: 0x0e9b64529161e90b93578425fba75c6bae408067d455b43a8d677ff96ff4b48a,
-            },
-            c1: Fq2 {
-                c0: 0x25c4d1d4420efbde924c9d584c8ee8849790b5cc8333bc367416c5ba8ae9b4a7,
-                c1: 0x1894c7927cfc56dd4f7bc27f8b0e738f83d49115289e028370a3650153e1382a,
-            },
-            c2: Fq2 {
-                c0: 0x12e2be8f7be66544d2d17c27ff5565254b007c19a106908d0c8c5ef2533527ad,
-                c1: 0x24dd9c30ab633691db6a9b8c18c55b778f4c8fe59e01ff9bbcb9f97047c1b43c,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x254654440f137e53b4493944308d8bd8e7fa497e13bceaf35403b43f77cbcc46,
-                c1: 0x0bf54ea3d4fb2ff28168286aec8186cd51fac9e3385d9e13cb76aa971b585927,
-            },
-            c1: Fq2 {
-                c0: 0x29a510bde714c164057fcbd4549c2dfc86676586259026478a91b446d2e7a6cc,
-                c1: 0x0da6ab5a9bb17606af566b4442deb87cafb60c4fba72c94bc138a0dd004c6aa2,
-            },
-            c2: Fq2 {
-                c0: 0x0f06348301fa8f811bfaa3fec3539035f469c57e94f92bee40c90d0c293f65ee,
-                c1: 0x0670aebbdf131f76e53f43db7d5fa877e422b9c01c06dd6d95c9508c397b1715,
-            },
-        },
-    },
-)
-circuit f: [
-    "23f8716681b3e55143936e24d3079b55ffe7c32514d3173641e2dfb3d8a3cf43",
-    "254654440f137e53b4493944308d8bd8e7fa497e13bceaf35403b43f77cbcc46",
-    "25c4d1d4420efbde924c9d584c8ee8849790b5cc8333bc367416c5ba8ae9b4a7",
-    "29a510bde714c164057fcbd4549c2dfc86676586259026478a91b446d2e7a6cc",
-    "12e2be8f7be66544d2d17c27ff5565254b007c19a106908d0c8c5ef2533527ad",
-    "f06348301fa8f811bfaa3fec3539035f469c57e94f92bee40c90d0c293f65ee",
-    "e9b64529161e90b93578425fba75c6bae408067d455b43a8d677ff96ff4b48a",
-    "bf54ea3d4fb2ff28168286aec8186cd51fac9e3385d9e13cb76aa971b585927",
-    "1894c7927cfc56dd4f7bc27f8b0e738f83d49115289e028370a3650153e1382a",
-    "da6ab5a9bb17606af566b4442deb87cafb60c4fba72c94bc138a0dd004c6aa2",
-    "24dd9c30ab633691db6a9b8c18c55b778f4c8fe59e01ff9bbcb9f97047c1b43c",
-    "670aebbdf131f76e53f43db7d5fa877e422b9c01c06dd6d95c9508c397b1715",
-]
-Using:
-advice columns: 1
-special lookup advice columns: 0
-fixed columns: 1
-lookup bits: 21
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 3103660
-minimum rows used by an advice column: 3103660
-total cells used: 3103660
-cells used in special lookup column: 0
-maximum rows used by a fixed column: 123
-Proving time: 108.858797333s
-Verify time: 6.111ms
----------------------- degree = 21 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 14.360272041s
-Time elapsed in generating vkey: 13.883735625s
-Time elapsed in generating pkey: 28.111832s
-Time elapsed in filling circuit: 177.542µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x22ad6747989f8e27f11c763a86a2ff7ba59143261d2b51847fe89f61236b6923,
-                c1: 0x01479c3ec4201f5ac83097e7450193d946cf98ceb6f0f4a393e507c00f5c30e3,
-            },
-            c1: Fq2 {
-                c0: 0x08155e00679fae3e1eead89cbd825fcc0fb8f69d37460501fc2d67aba3a4c965,
-                c1: 0x1d442215d46a21dd8971cb8d8bd1078a4f117191b6659afacaa3ed8dd5ceea58,
-            },
-            c2: Fq2 {
-                c0: 0x29eb579a4d76f02089a837062ea2d888395ff69bd3e18da435ca8f8dad3dd8a0,
-                c1: 0x173c006e100b050f7d14d432033b4e75834f142918ec2f1b3e53eabc01aeba82,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x0f50f5f331fc4cd6ffdf0be5d9155267f624f44921b1cbda440557e133e44ce4,
-                c1: 0x07bb802031009b543aeb5aab1f9ed97cc1e366fbe76c91121646aab734a09967,
-            },
-            c1: Fq2 {
-                c0: 0x1a4a1607542a0c80f8be2a58495f0de2ae09b3cb7498f8a3f2c38547c182bc67,
-                c1: 0x22c61e14a0bb8ec22e2bd50d098fd07607373942550d245ffbaf4c5c11184390,
-            },
-            c2: Fq2 {
-                c0: 0x242ad4b8a7720b04c984fa5d037197e052b46208deb39530e96987a2c7aff545,
-                c1: 0x10481d8523aba6e0874cca444df2b50124f97878ccb1ac380c003faf150a1e13,
-            },
-        },
-    },
-)
-circuit f: [
-    "22ad6747989f8e27f11c763a86a2ff7ba59143261d2b51847fe89f61236b6923",
-    "f50f5f331fc4cd6ffdf0be5d9155267f624f44921b1cbda440557e133e44ce4",
-    "8155e00679fae3e1eead89cbd825fcc0fb8f69d37460501fc2d67aba3a4c965",
-    "1a4a1607542a0c80f8be2a58495f0de2ae09b3cb7498f8a3f2c38547c182bc67",
-    "29eb579a4d76f02089a837062ea2d888395ff69bd3e18da435ca8f8dad3dd8a0",
-    "242ad4b8a7720b04c984fa5d037197e052b46208deb39530e96987a2c7aff545",
-    "1479c3ec4201f5ac83097e7450193d946cf98ceb6f0f4a393e507c00f5c30e3",
-    "7bb802031009b543aeb5aab1f9ed97cc1e366fbe76c91121646aab734a09967",
-    "1d442215d46a21dd8971cb8d8bd1078a4f117191b6659afacaa3ed8dd5ceea58",
-    "22c61e14a0bb8ec22e2bd50d098fd07607373942550d245ffbaf4c5c11184390",
-    "173c006e100b050f7d14d432033b4e75834f142918ec2f1b3e53eabc01aeba82",
-    "10481d8523aba6e0874cca444df2b50124f97878ccb1ac380c003faf150a1e13",
-]
-Using:
-advice columns: 2
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 20
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 1551833
-minimum rows used by an advice column: 1551827
-total cells used: 3103660
-cells used in special lookup column: 308580
-maximum rows used by a fixed column: 122
-Proving time: 63.351649125s
-Verify time: 6.473708ms
----------------------- degree = 20 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 6.695991083s
-Time elapsed in generating vkey: 11.556116042s
-Time elapsed in generating pkey: 19.479236833s
-Time elapsed in filling circuit: 226.917µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x022bb48caccf55d91d55ff55d710e39558a25ba3b6930d92fec4127c265865aa,
-                c1: 0x04318ee2315b10db240f026aa76a2e4f9c1875965a46ca293cf0e276d65006a8,
-            },
-            c1: Fq2 {
-                c0: 0x0a0ca918d2d0b7111dc22357c18376922308df36a000928d4c02c5f388afbf4e,
-                c1: 0x23d5eaad802ba805dfcc6f005eef6d13ffaa64641cb4a205f7ba70b40cca8751,
-            },
-            c2: Fq2 {
-                c0: 0x14c2d1b10c7970409bf8c4bf564f17f059b81fd34ef7b86485626781d7c71b18,
-                c1: 0x038357ea89bfa04a495788198b3e3b155e3ccae084d7e7062a2fdcc4d1f4e922,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x2bbb58af82344ee1bf2a42ca85eb30461bd7d218c06b753460aa691e178d0200,
-                c1: 0x2ccf4d8ef48c734f1bb67de0b4dfe6c1ea9ccc09f2fc3ec83aebc0d728ee16bc,
-            },
-            c1: Fq2 {
-                c0: 0x1a4d77be8aacdd1579871582b7d546754be44f30f35f431e5f6e7e9d2eda0914,
-                c1: 0x0f367615d4dbc13994439c14b9165fbd9ef1c7d8dd4b8a38e6340ca763f3dab0,
-            },
-            c2: Fq2 {
-                c0: 0x24fbcd5185511db58ebfccb2e67089a10adba2187c08aba4326d72a04c1bc8f9,
-                c1: 0x18295c30d104fcc7cf9acbd5c867b6bb73d29e338f9a81a8e190366c7ee5c22d,
-            },
-        },
-    },
-)
-circuit f: [
-    "22bb48caccf55d91d55ff55d710e39558a25ba3b6930d92fec4127c265865aa",
-    "2bbb58af82344ee1bf2a42ca85eb30461bd7d218c06b753460aa691e178d0200",
-    "a0ca918d2d0b7111dc22357c18376922308df36a000928d4c02c5f388afbf4e",
-    "1a4d77be8aacdd1579871582b7d546754be44f30f35f431e5f6e7e9d2eda0914",
-    "14c2d1b10c7970409bf8c4bf564f17f059b81fd34ef7b86485626781d7c71b18",
-    "24fbcd5185511db58ebfccb2e67089a10adba2187c08aba4326d72a04c1bc8f9",
-    "4318ee2315b10db240f026aa76a2e4f9c1875965a46ca293cf0e276d65006a8",
-    "2ccf4d8ef48c734f1bb67de0b4dfe6c1ea9ccc09f2fc3ec83aebc0d728ee16bc",
-    "23d5eaad802ba805dfcc6f005eef6d13ffaa64641cb4a205f7ba70b40cca8751",
-    "f367615d4dbc13994439c14b9165fbd9ef1c7d8dd4b8a38e6340ca763f3dab0",
-    "38357ea89bfa04a495788198b3e3b155e3ccae084d7e7062a2fdcc4d1f4e922",
-    "18295c30d104fcc7cf9acbd5c867b6bb73d29e338f9a81a8e190366c7ee5c22d",
-]
-Using:
-advice columns: 4
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 19
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 790857
-minimum rows used by an advice column: 790848
-total cells used: 3163402
-cells used in special lookup column: 328494
-maximum rows used by a fixed column: 124
-Proving time: 40.914635041s
-Verify time: 3.633167ms
----------------------- degree = 19 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 3.455247625s
-Time elapsed in generating vkey: 8.962057125s
-Time elapsed in generating pkey: 13.224039791s
-Time elapsed in filling circuit: 232.709µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x2ade60c33f9a7b1acd140334d69b2186701493103dfe9f2384e352d9796bc645,
-                c1: 0x124a21c731563d40b8e118d7ecfb3cdb50daec7ce2423378a1e7579267440be2,
-            },
-            c1: Fq2 {
-                c0: 0x0f6b8a821874a1860d72530a4d7df4f25f08ff55398eaafab066e8e1a84ba470,
-                c1: 0x1e6efc75b0312943b255eea0cffb66ba481e92a4330acbe7d05e3a885082b740,
-            },
-            c2: Fq2 {
-                c0: 0x28a05f0adde4bdc662f2b960ef376dc117320332195be6d676101462371505f8,
-                c1: 0x198b381f93299dff95093ec8d74fccc25f892594fa685d76f279ebc67f13813f,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x0045bbe8b6c32da3b11e8d815ded2f1b1ab50c7d68a5e3b3d1e44e21121383c4,
-                c1: 0x1be966a56fa5e2a21cbe45788431e40745255ff787adc64e035a97777791d867,
-            },
-            c1: Fq2 {
-                c0: 0x26ef726f580d504d182217ba7be760090f11ec28f4bdd2f1ac2b943e49ce120d,
-                c1: 0x14d6f5d9083460e35d2458b875867e1fee78fce756c89b49e721b6a1608a87fb,
-            },
-            c2: Fq2 {
-                c0: 0x12adfa0800a551b4a199a0f3ade37c3542fb1242edf5eafabfe7f498f48cf877,
-                c1: 0x22c04697e24856621b46040d887b21fca95c866a4d871597f374387f011c4edb,
-            },
-        },
-    },
-)
-circuit f: [
-    "2ade60c33f9a7b1acd140334d69b2186701493103dfe9f2384e352d9796bc645",
-    "45bbe8b6c32da3b11e8d815ded2f1b1ab50c7d68a5e3b3d1e44e21121383c4",
-    "f6b8a821874a1860d72530a4d7df4f25f08ff55398eaafab066e8e1a84ba470",
-    "26ef726f580d504d182217ba7be760090f11ec28f4bdd2f1ac2b943e49ce120d",
-    "28a05f0adde4bdc662f2b960ef376dc117320332195be6d676101462371505f8",
-    "12adfa0800a551b4a199a0f3ade37c3542fb1242edf5eafabfe7f498f48cf877",
-    "124a21c731563d40b8e118d7ecfb3cdb50daec7ce2423378a1e7579267440be2",
-    "1be966a56fa5e2a21cbe45788431e40745255ff787adc64e035a97777791d867",
-    "1e6efc75b0312943b255eea0cffb66ba481e92a4330acbe7d05e3a885082b740",
-    "14d6f5d9083460e35d2458b875867e1fee78fce756c89b49e721b6a1608a87fb",
-    "198b381f93299dff95093ec8d74fccc25f892594fa685d76f279ebc67f13813f",
-    "22c04697e24856621b46040d887b21fca95c866a4d871597f374387f011c4edb",
-]
-Using:
-advice columns: 7
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 18
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 452273
-minimum rows used by an advice column: 452262
-total cells used: 3165880
-cells used in special lookup column: 309000
-maximum rows used by a fixed column: 121
-Proving time: 29.8487535s
-Verify time: 4.560708ms
----------------------- degree = 18 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 1.841088083s
-Time elapsed in generating vkey: 8.741611583s
-Time elapsed in generating pkey: 10.582710834s
-Time elapsed in filling circuit: 126.166µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x1dc9ee36016f9ea889bd56ba2b94df07e3e0daa5b9c00f3dc264e1a1efc21385,
-                c1: 0x0e919401ce3c8c939ba0cc7dc25ee919e38c2a8dff85985ba533670f0ca94410,
-            },
-            c1: Fq2 {
-                c0: 0x14a115385087b77e61873fe0db0615a8ff4f22a7b3a412e95203d36b10ad46d1,
-                c1: 0x27cf2ae2f8f8588f22d939f759ca37d0c77d5fdfc61c4194761bfc4dc22c5175,
-            },
-            c2: Fq2 {
-                c0: 0x1b530d147488378d4956ae1d570209cc2c05a8d9fbeb935b6a20b10185178092,
-                c1: 0x1fd389970c3bc02a1b57cfe114630127d34f35e8291e468205e4f999a4e4ffae,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x283c2a15ce4fb8ed8fc2df9071182a0df68789dfb1576008221d910e4deba67f,
-                c1: 0x286a770c6015d6451541ce952dc70b447f3742a75529957ca4155aa7d24d6f12,
-            },
-            c1: Fq2 {
-                c0: 0x2d951b3f59ce21e12cc7167473c3c47601758101c3693b3fc23b28321b6bb5a9,
-                c1: 0x1bae636562fa5fa38856b8dc4be9bd7ed46e89f72440da50ba266786250cfba6,
-            },
-            c2: Fq2 {
-                c0: 0x2a61ed99d11015c08d4b7fef72b520834460c6754eae89b5e3d0e668d95bc5cf,
-                c1: 0x25578eadd72707995b8311db6600f6eccfd03231175779bbbbff128bb28b1684,
-            },
-        },
-    },
-)
-circuit f: [
-    "1dc9ee36016f9ea889bd56ba2b94df07e3e0daa5b9c00f3dc264e1a1efc21385",
-    "283c2a15ce4fb8ed8fc2df9071182a0df68789dfb1576008221d910e4deba67f",
-    "14a115385087b77e61873fe0db0615a8ff4f22a7b3a412e95203d36b10ad46d1",
-    "2d951b3f59ce21e12cc7167473c3c47601758101c3693b3fc23b28321b6bb5a9",
-    "1b530d147488378d4956ae1d570209cc2c05a8d9fbeb935b6a20b10185178092",
-    "2a61ed99d11015c08d4b7fef72b520834460c6754eae89b5e3d0e668d95bc5cf",
-    "e919401ce3c8c939ba0cc7dc25ee919e38c2a8dff85985ba533670f0ca94410",
-    "286a770c6015d6451541ce952dc70b447f3742a75529957ca4155aa7d24d6f12",
-    "27cf2ae2f8f8588f22d939f759ca37d0c77d5fdfc61c4194761bfc4dc22c5175",
-    "1bae636562fa5fa38856b8dc4be9bd7ed46e89f72440da50ba266786250cfba6",
-    "1fd389970c3bc02a1b57cfe114630127d34f35e8291e468205e4f999a4e4ffae",
-    "25578eadd72707995b8311db6600f6eccfd03231175779bbbbff128bb28b1684",
-]
-Using:
-advice columns: 13
-special lookup advice columns: 2
-fixed columns: 1
-lookup bits: 17
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 251554
-minimum rows used by an advice column: 251543
-total cells used: 3270142
-cells used in special lookup column: 364074
-maximum rows used by a fixed column: 124
-Proving time: 25.221047792s
-Verify time: 6.036083ms
----------------------- degree = 17 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 902.645375ms
-Time elapsed in generating vkey: 9.427334041s
-Time elapsed in generating pkey: 9.44115625s
-Time elapsed in filling circuit: 157.709µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x2f6b5fb559355078b85623bc1314b30c03eee6d39fd0fa7a18c5dd7797610748,
-                c1: 0x1677069d40821974bdf767cf3d07dc7dbd9aa24a700d847928f28b8506ae0ee2,
-            },
-            c1: Fq2 {
-                c0: 0x28f407b2820778fe3ddb941c30ddbfb4444519b3a040cdc957c9b53f26c11514,
-                c1: 0x02d10f22fc47193dd4c87e6886adab94890ac7e7dc4797185db00bede7257f0a,
-            },
-            c2: Fq2 {
-                c0: 0x23d1256f3e68b2a4e459411e579f7b9003cec25940f501f513f94dbdc258a815,
-                c1: 0x211c94870a2d8ef5aa31f8d0f3370f7b5baee369d56c14d50094061593820895,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x132ecd2ac18921c2bf93fa25705efc55a028ac277381d61ddc2964d90e4b1b7d,
-                c1: 0x07cb8e1067f0249fea57384457aa428310f9f389e8206219bdec48af133e5ccf,
-            },
-            c1: Fq2 {
-                c0: 0x2e05ca4dced66f3951a3627bd7092b3e50abd7ad12380d231f27b4fe1b8c1081,
-                c1: 0x26d7f859fede048d5d7259e96644f01878ea3b010cea4c2a699506495916285b,
-            },
-            c2: Fq2 {
-                c0: 0x24137579a8c915ec245765a2d9466d9775a397ef714e233c2833ea31b935a002,
-                c1: 0x081076c36be6a1301eab412b3846e8551763b7240d637aff1c8414d5eb9534b6,
-            },
-        },
-    },
-)
-circuit f: [
-    "2f6b5fb559355078b85623bc1314b30c03eee6d39fd0fa7a18c5dd7797610748",
-    "132ecd2ac18921c2bf93fa25705efc55a028ac277381d61ddc2964d90e4b1b7d",
-    "28f407b2820778fe3ddb941c30ddbfb4444519b3a040cdc957c9b53f26c11514",
-    "2e05ca4dced66f3951a3627bd7092b3e50abd7ad12380d231f27b4fe1b8c1081",
-    "23d1256f3e68b2a4e459411e579f7b9003cec25940f501f513f94dbdc258a815",
-    "24137579a8c915ec245765a2d9466d9775a397ef714e233c2833ea31b935a002",
-    "1677069d40821974bdf767cf3d07dc7dbd9aa24a700d847928f28b8506ae0ee2",
-    "7cb8e1067f0249fea57384457aa428310f9f389e8206219bdec48af133e5ccf",
-    "2d10f22fc47193dd4c87e6886adab94890ac7e7dc4797185db00bede7257f0a",
-    "26d7f859fede048d5d7259e96644f01878ea3b010cea4c2a699506495916285b",
-    "211c94870a2d8ef5aa31f8d0f3370f7b5baee369d56c14d50094061593820895",
-    "81076c36be6a1301eab412b3846e8551763b7240d637aff1c8414d5eb9534b6",
-]
-Using:
-advice columns: 26
-special lookup advice columns: 3
-fixed columns: 1
-lookup bits: 16
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 127100
-minimum rows used by an advice column: 127084
-total cells used: 3304378
-cells used in special lookup column: 375486
-maximum rows used by a fixed column: 124
-Suggestions:
-Have you tried using 26 advice columns?
-Have you tried using 3 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 21.984741416s
-Verify time: 9.293792ms
----------------------- degree = 16 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 480.439875ms
-Time elapsed in generating vkey: 9.627523708s
-test bn254::tests::bench_pairing has been running for over 60 seconds
-Time elapsed in generating pkey: 8.621027917s
-Time elapsed in filling circuit: 132.291µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x05fa25049d6fa380dd098e4db39cbc79ed45abb26bed7eec92affe6c56a7dfc0,
-                c1: 0x0f358196c1ec664b5dd6830ba49182f2105b80109d4ff41921c0d62c63d76c04,
-            },
-            c1: Fq2 {
-                c0: 0x04e06254d755c656aab953f287b774040901d00d2cb73217505b3b08035b3d24,
-                c1: 0x0c21194592f686402b84fed5a02784fc7cb276fb4f9bfd4402f403f528669395,
-            },
-            c2: Fq2 {
-                c0: 0x22e815ce69f4837104a94592dd4097c331f25b1622bc830196cd6f2edfd2975e,
-                c1: 0x10ca8d29eae1de72055333b9fafe49eda152f89f73538a61bc044fe764cb1e7a,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1ad38e7cceecc2cab831f9140255c1386c697ba7bdc922b6792266e1dd864ddb,
-                c1: 0x29b5248c5b7567b81792b7811def89f1a69c98bbbe8c5c85cbf3b420c48c071b,
-            },
-            c1: Fq2 {
-                c0: 0x08aef22052f4b02f02590fcd20fc3d4a6a08c28969a77f97710238a6f02c5858,
-                c1: 0x29b2268d4f95279da44634aa984d6a68bdc92769bdaf488ef2463baa04fc8aab,
-            },
-            c2: Fq2 {
-                c0: 0x27a760074bc589429b62fbe7c91632148fca433c5d8bd3aaf6d9b22e21d6811a,
-                c1: 0x1ac058c71295b3f6539fe4b0248f05da1e8c42c3c8e19ab635e4fe279142f350,
-            },
-        },
-    },
-)
-circuit f: [
-    "5fa25049d6fa380dd098e4db39cbc79ed45abb26bed7eec92affe6c56a7dfc0",
-    "1ad38e7cceecc2cab831f9140255c1386c697ba7bdc922b6792266e1dd864ddb",
-    "4e06254d755c656aab953f287b774040901d00d2cb73217505b3b08035b3d24",
-    "8aef22052f4b02f02590fcd20fc3d4a6a08c28969a77f97710238a6f02c5858",
-    "22e815ce69f4837104a94592dd4097c331f25b1622bc830196cd6f2edfd2975e",
-    "27a760074bc589429b62fbe7c91632148fca433c5d8bd3aaf6d9b22e21d6811a",
-    "f358196c1ec664b5dd6830ba49182f2105b80109d4ff41921c0d62c63d76c04",
-    "29b5248c5b7567b81792b7811def89f1a69c98bbbe8c5c85cbf3b420c48c071b",
-    "c21194592f686402b84fed5a02784fc7cb276fb4f9bfd4402f403f528669395",
-    "29b2268d4f95279da44634aa984d6a68bdc92769bdaf488ef2463baa04fc8aab",
-    "10ca8d29eae1de72055333b9fafe49eda152f89f73538a61bc044fe764cb1e7a",
-    "1ac058c71295b3f6539fe4b0248f05da1e8c42c3c8e19ab635e4fe279142f350",
-]
-Using:
-advice columns: 51
-special lookup advice columns: 6
-fixed columns: 1
-lookup bits: 15
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 64994
-minimum rows used by an advice column: 64975
-total cells used: 3314074
-cells used in special lookup column: 358398
-maximum rows used by a fixed column: 121
-Suggestions:
-Have you tried using 51 advice columns?
-Have you tried using 6 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 20.796019042s
-Verify time: 14.581125ms
----------------------- degree = 15 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 259.291208ms
-Time elapsed in generating vkey: 10.518665125s
-Time elapsed in generating pkey: 8.573772417s
-Time elapsed in filling circuit: 132.416µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x12667b6947e8758e8b58f334c77592aefcc7ed257ed4c5b89d06079a3767fa2c,
-                c1: 0x1d9d38b3ffae375807bf68c3ed7bc8721d31b48641913fc6b762f3c8aa8712c8,
-            },
-            c1: Fq2 {
-                c0: 0x001fbcce1b77e363c5b595e762b0ea8a97c40e0d7d773c7f5af88c458acabb36,
-                c1: 0x1a891a0f23f9b9afe9c825d227786fd0772cb1be65d98047b44bd84d34610ff4,
-            },
-            c2: Fq2 {
-                c0: 0x02cc1a3715d113e79d83d9e5a2f0c7c62d947ae0732e848c63af34c91b4dffdb,
-                c1: 0x25235641f460563a5a84b1d09bea2ae36c9da6e0658f4f69a3b260986fac0be6,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x24508435ac8500731c958875652476306507746838c6a6751ac7da75413bf448,
-                c1: 0x0fe0778ce2f8a314e97775fb7a6f597351058ee63b350a5dc475c008d44c59c8,
-            },
-            c1: Fq2 {
-                c0: 0x248ecfeb01938008b3f4358997c69e7b123e06505fe3b9e2a10a186848d4dc36,
-                c1: 0x02d4435962585d1b92b4ed87bf1629ec33ac31e89e0ad9c3900c485f4cc96ff5,
-            },
-            c2: Fq2 {
-                c0: 0x291792a23dcc866c0f4a807f19366ab587d62484fa0c8df5d79269661e880018,
-                c1: 0x203eed4d7cb2b3089f65a83773fa48e3a8c79305929c29788f32fab0bce17659,
-            },
-        },
-    },
-)
-circuit f: [
-    "12667b6947e8758e8b58f334c77592aefcc7ed257ed4c5b89d06079a3767fa2c",
-    "24508435ac8500731c958875652476306507746838c6a6751ac7da75413bf448",
-    "1fbcce1b77e363c5b595e762b0ea8a97c40e0d7d773c7f5af88c458acabb36",
-    "248ecfeb01938008b3f4358997c69e7b123e06505fe3b9e2a10a186848d4dc36",
-    "2cc1a3715d113e79d83d9e5a2f0c7c62d947ae0732e848c63af34c91b4dffdb",
-    "291792a23dcc866c0f4a807f19366ab587d62484fa0c8df5d79269661e880018",
-    "1d9d38b3ffae375807bf68c3ed7bc8721d31b48641913fc6b762f3c8aa8712c8",
-    "fe0778ce2f8a314e97775fb7a6f597351058ee63b350a5dc475c008d44c59c8",
-    "1a891a0f23f9b9afe9c825d227786fd0772cb1be65d98047b44bd84d34610ff4",
-    "2d4435962585d1b92b4ed87bf1629ec33ac31e89e0ad9c3900c485f4cc96ff5",
-    "25235641f460563a5a84b1d09bea2ae36c9da6e0658f4f69a3b260986fac0be6",
-    "203eed4d7cb2b3089f65a83773fa48e3a8c79305929c29788f32fab0bce17659",
-]
-Using:
-advice columns: 106
-special lookup advice columns: 14
-fixed columns: 1
-lookup bits: 14
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 32757
-minimum rows used by an advice column: 32734
-total cells used: 3470860
-cells used in special lookup column: 430980
-maximum rows used by a fixed column: 126
-Suggestions:
-Have you tried using 106 advice columns?
-Have you tried using 14 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 23.438619875s
-Verify time: 25.235459ms
----------------------- degree = 14 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 121.8535ms
-Time elapsed in generating vkey: 12.176505416s
-Time elapsed in generating pkey: 9.08229325s
-Time elapsed in filling circuit: 225.417µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x058742387deecf38a640a1d950f087c4133e9b04c28c64107651788816d19e17,
-                c1: 0x2d300ef28ee6280b7daa508ff4a2c802e4c74daf963dd4da66b25c43cc01a890,
-            },
-            c1: Fq2 {
-                c0: 0x11ccac753ce7c2f373544f01ff370629ba335688e35a7f7831a8d43c99dfe063,
-                c1: 0x1208b3837c8829bddc9abd52e38f07e6cf847edb4124f820162b570b754b307a,
-            },
-            c2: Fq2 {
-                c0: 0x0478c3cd6061ccb56cba3d7dd9468db138fa76d8032dd3aaf3b96c4dae3cdf0e,
-                c1: 0x2497ad50d789c263a47310f084003a9b85e53c9e4859fbf25f67bdba9297c5e2,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1014151950b5258db9fc15b68037b0a3945303a16c38f70d508c79bf948b8289,
-                c1: 0x2558754a750d53d5e33f7a199c4b539bf1628beaff1340b3a4f749d4a66ac2bf,
-            },
-            c1: Fq2 {
-                c0: 0x23c09080e8d84838d9a05c89c110a2d5788e18b1b67f2a8ac9b65584f4bf35dc,
-                c1: 0x172cee9f44a3964dd47ba733f384a2d4e92e44e21d54244adb89705c6abdb902,
-            },
-            c2: Fq2 {
-                c0: 0x2588706eaef78e17546dae15833ff04b2f0e728261134f7505bce4cca690c174,
-                c1: 0x0d9991fe897d2dc6b432bb9680658a197689bd50b2ce020af9d789b7b2893929,
-            },
-        },
-    },
-)
-circuit f: [
-    "58742387deecf38a640a1d950f087c4133e9b04c28c64107651788816d19e17",
-    "1014151950b5258db9fc15b68037b0a3945303a16c38f70d508c79bf948b8289",
-    "11ccac753ce7c2f373544f01ff370629ba335688e35a7f7831a8d43c99dfe063",
-    "23c09080e8d84838d9a05c89c110a2d5788e18b1b67f2a8ac9b65584f4bf35dc",
-    "478c3cd6061ccb56cba3d7dd9468db138fa76d8032dd3aaf3b96c4dae3cdf0e",
-    "2588706eaef78e17546dae15833ff04b2f0e728261134f7505bce4cca690c174",
-    "2d300ef28ee6280b7daa508ff4a2c802e4c74daf963dd4da66b25c43cc01a890",
-    "2558754a750d53d5e33f7a199c4b539bf1628beaff1340b3a4f749d4a66ac2bf",
-    "1208b3837c8829bddc9abd52e38f07e6cf847edb4124f820162b570b754b307a",
-    "172cee9f44a3964dd47ba733f384a2d4e92e44e21d54244adb89705c6abdb902",
-    "2497ad50d789c263a47310f084003a9b85e53c9e4859fbf25f67bdba9297c5e2",
-    "d9991fe897d2dc6b432bb9680658a197689bd50b2ce020af9d789b7b2893929",
-]
-Using:
-advice columns: 213
-special lookup advice columns: 26
-fixed columns: 1
-lookup bits: 13
-limb bits: 91
-num limbs: 3
-maximum rows used by an advice column: 16354
-minimum rows used by an advice column: 16329
-total cells used: 3480556
-cells used in special lookup column: 413892
-maximum rows used by a fixed column: 123
-Suggestions:
-Have you tried using 213 advice columns?
-Have you tried using 26 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 24.405766042s
-Verify time: 60.342208ms
----------------------- degree = 13 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 72.068125ms
-Time elapsed in generating vkey: 14.526949125s
-Time elapsed in generating pkey: 9.846653125s
-Time elapsed in filling circuit: 130.083µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x1638bb2426c4d48ca6700f3e4e2cc368a080453bfb24354f1cc2993d623adebb,
-                c1: 0x1794dbdefc6661d4912240a3c6af6f96a4b6ce94f8de3d12c1be1a19a39f7c2f,
-            },
-            c1: Fq2 {
-                c0: 0x03d44155bf3610456062d73e1d5acc91b6396336fac651c0a4eb09f1911a0402,
-                c1: 0x240fcaffc3feb0322791e710fd0d557c3c7c0f82afe1766d35c84ac06c2fb175,
-            },
-            c2: Fq2 {
-                c0: 0x0ed1775fab5830bf0b46b1ab2dba21c2daeeeef48e82ed177c617984a292de8a,
-                c1: 0x276a48d858ed9858dc3c74addae7746c867e82ec1550d76ca3be4c57ad5a04e6,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1f96d977e7d8e633b44eec985debfd5b687667dc54fd4fbca31f53f1dca5c5eb,
-                c1: 0x2bc6d16be77e9ceb4c077611a5570a53561e32ac82899d9cb390284bf060ab95,
-            },
-            c1: Fq2 {
-                c0: 0x0f7fb989442a1a81ca7656368988fe4a94f3a505d7d733b73afe723617e48481,
-                c1: 0x24d845ded32529b7caf59eb04951a6d8bf2fbe7960db5536286620d0b239847a,
-            },
-            c2: Fq2 {
-                c0: 0x118fa4c34c476ad206983f9cc1067fcfe9157113c1bf903a1ded48e3f6a0a171,
-                c1: 0x1a4768079c715ddce2adc1c1b13cb42b5c16a5d1c538362d0b940ba0cf428ee2,
-            },
-        },
-    },
-)
-circuit f: [
-    "1638bb2426c4d48ca6700f3e4e2cc368a080453bfb24354f1cc2993d623adebb",
-    "1f96d977e7d8e633b44eec985debfd5b687667dc54fd4fbca31f53f1dca5c5eb",
-    "3d44155bf3610456062d73e1d5acc91b6396336fac651c0a4eb09f1911a0402",
-    "f7fb989442a1a81ca7656368988fe4a94f3a505d7d733b73afe723617e48481",
-    "ed1775fab5830bf0b46b1ab2dba21c2daeeeef48e82ed177c617984a292de8a",
-    "118fa4c34c476ad206983f9cc1067fcfe9157113c1bf903a1ded48e3f6a0a171",
-    "1794dbdefc6661d4912240a3c6af6f96a4b6ce94f8de3d12c1be1a19a39f7c2f",
-    "2bc6d16be77e9ceb4c077611a5570a53561e32ac82899d9cb390284bf060ab95",
-    "240fcaffc3feb0322791e710fd0d557c3c7c0f82afe1766d35c84ac06c2fb175",
-    "24d845ded32529b7caf59eb04951a6d8bf2fbe7960db5536286620d0b239847a",
-    "276a48d858ed9858dc3c74addae7746c867e82ec1550d76ca3be4c57ad5a04e6",
-    "1a4768079c715ddce2adc1c1b13cb42b5c16a5d1c538362d0b940ba0cf428ee2",
-]
-Using:
-advice columns: 446
-special lookup advice columns: 60
-fixed columns: 1
-lookup bits: 12
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 8173
-minimum rows used by an advice column: 8147
-total cells used: 3637342
-cells used in special lookup column: 486474
-maximum rows used by a fixed column: 127
-Suggestions:
-Have you tried using 445 advice columns?
-Have you tried using 60 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 28.738033583s
-Verify time: 104.588125ms
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/tests/ec_add.rs b/halo2-ecc/src/bn254/tests/ec_add.rs
index 08dc9fb1..30c52aa5 100644
--- a/halo2-ecc/src/bn254/tests/ec_add.rs
+++ b/halo2-ecc/src/bn254/tests/ec_add.rs
@@ -1,15 +1,19 @@
-use std::env::set_var;
 use std::fs;
-use std::{env::var, fs::File};
+use std::fs::File;
+use std::io::{BufRead, BufReader};
 
 use super::*;
-use crate::fields::FieldChip;
-use crate::halo2_proofs::halo2curves::{bn256::G2Affine, FieldExt};
+use crate::fields::{FieldChip, FpStrategy};
+use crate::halo2_proofs::halo2curves::bn256::G2Affine;
 use group::cofactor::CofactorCurveAffine;
-use halo2_base::SKIP_FIRST_PASS;
+use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+use halo2_base::gates::RangeChip;
+use halo2_base::utils::fs::gen_srs;
+use halo2_base::Context;
+use itertools::Itertools;
 use rand_core::OsRng;
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct CircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -22,270 +26,95 @@ struct CircuitParams {
     batch_size: usize,
 }
 
-#[derive(Clone, Debug)]
-struct Config<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-}
+fn g2_add_test<F: PrimeField>(ctx: &mut Context<F>, params: CircuitParams, _points: Vec<G2Affine>) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<F>::default(params.lookup_bits);
+    let fp_chip = FpChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+    let fp2_chip = Fp2Chip::<F>::new(&fp_chip);
+    let g2_chip = EccChip::new(&fp2_chip);
 
-impl<F: PrimeField> Config<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        Self { fp_chip, batch_size }
-    }
-}
+    let points = _points.iter().map(|pt| g2_chip.assign_point(ctx, *pt)).collect::<Vec<_>>();
 
-struct EcAddCircuit<F: PrimeField> {
-    points: Vec<Option<G2Affine>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
-}
+    let acc = g2_chip.sum::<G2Affine>(ctx, points.iter());
 
-impl<F: PrimeField> Default for EcAddCircuit<F> {
-    fn default() -> Self {
-        Self { points: vec![None; 100], batch_size: 100, _marker: PhantomData }
-    }
-}
-
-impl Circuit<Fr> for EcAddCircuit<Fr> {
-    type Config = Config<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            points: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
-        }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("EC_ADD_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/ec_add_circuit.config".to_string());
-        let params: CircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        Config::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.points.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-        let fp2_chip = Fp2Chip::<Fr>::construct(&config.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip.clone());
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "G2 add",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let display = self.points[0].is_some();
-                let points = self
-                    .points
-                    .iter()
-                    .cloned()
-                    .map(|pt| {
-                        g2_chip.assign_point(ctx, pt.map(Value::known).unwrap_or(Value::unknown()))
-                    })
-                    .collect::<Vec<_>>();
-
-                let acc = g2_chip.sum::<G2Affine>(ctx, points.iter());
-
-                #[cfg(feature = "display")]
-                if display {
-                    let answer = self
-                        .points
-                        .iter()
-                        .fold(G2Affine::identity(), |a, b| (a + b.unwrap()).to_affine());
-                    let x = fp2_chip.get_assigned_value(&acc.x);
-                    let y = fp2_chip.get_assigned_value(&acc.y);
-                    x.map(|x| assert_eq!(answer.x, x));
-                    y.map(|y| assert_eq!(answer.y, y));
-                }
-
-                config.fp_chip.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                if display {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+    let answer = _points.iter().fold(G2Affine::identity(), |a, b| (a + b).to_affine());
+    let x = fp2_chip.get_assigned_value(&acc.x);
+    let y = fp2_chip.get_assigned_value(&acc.y);
+    assert_eq!(answer.x, x);
+    assert_eq!(answer.y, y);
 }
 
 #[test]
 fn test_ec_add() {
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/ec_add_circuit.config");
-    set_var("EC_ADD_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .unwrap_or_else(|_| panic!("{folder:?} file should exist"));
-    let params: CircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
+    let path = "configs/bn254/ec_add_circuit.config";
+    let params: CircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
 
-    let mut rng = OsRng;
-
-    let mut points = Vec::new();
-    for _ in 0..params.batch_size {
-        let new_pt = Some(G2Affine::random(&mut rng));
-        points.push(new_pt);
-    }
+    let k = params.degree;
+    let points = (0..params.batch_size).map(|_| G2Affine::random(OsRng)).collect_vec();
 
-    let circuit =
-        EcAddCircuit::<Fr> { points, batch_size: params.batch_size, _marker: PhantomData };
+    let mut builder = GateThreadBuilder::<Fr>::mock();
+    g2_add_test(builder.main(0), params, points);
 
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    builder.config(k as usize, Some(20));
+    let circuit = RangeCircuitBuilder::mock(builder);
+    MockProver::run(k, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[test]
 fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
-    use std::io::BufRead;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_ec_add.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_ec_add.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
 
-    folder.push("results/ec_add_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let results_path = "results/bn254/ec_add_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
-
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
+    fs::create_dir_all("data").unwrap();
 
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: CircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
         let mut rng = OsRng;
 
-        {
-            folder.pop();
-            folder.push("configs/ec_add_circuit.tmp.config");
-            set_var("EC_ADD_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
         let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
+        let params = gen_srs(k);
         end_timer!(params_time);
 
-        let circuit = EcAddCircuit::<Fr> {
-            points: vec![None; bench_params.batch_size],
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
+        let start0 = start_timer!(|| "Witness generation for empty circuit");
+        let circuit = {
+            let points = vec![G2Affine::generator(); bench_params.batch_size];
+            let mut builder = GateThreadBuilder::<Fr>::keygen();
+            g2_add_test(builder.main(0), bench_params, points);
+            builder.config(k as usize, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         };
+        end_timer!(start0);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut points = Vec::new();
-        for _ in 0..bench_params.batch_size {
-            let new_pt = Some(G2Affine::random(&mut rng));
-            points.push(new_pt);
-        }
-
-        let proof_circuit = EcAddCircuit::<Fr> {
-            points,
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
 
         // create a proof
+        let points = (0..bench_params.batch_size).map(|_| G2Affine::random(&mut rng)).collect_vec();
         let proof_time = start_timer!(|| "Proving time");
+        let proof_circuit = {
+            let mut builder = GateThreadBuilder::<Fr>::prover();
+            g2_add_test(builder.main(0), bench_params, points);
+            builder.config(k as usize, Some(20));
+            RangeCircuitBuilder::prover(builder, break_points)
+        };
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -299,8 +128,8 @@ fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "ec_add_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/ec_add_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -309,27 +138,27 @@ fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.limb_bits,
                 bench_params.num_limbs,
                 bench_params.batch_size,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("EC_ADD_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/bn254/tests/fixed_base_msm.rs b/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
index c7239d9d..f16560f4 100644
--- a/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
+++ b/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
@@ -1,12 +1,30 @@
-use std::{env::var, fs::File};
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    sync::Mutex,
+};
 
 #[allow(unused_imports)]
 use crate::ecc::fixed_base::FixedEcPoint;
+use crate::fields::{FpStrategy, PrimeField};
 
 use super::*;
-use halo2_base::{halo2_proofs::halo2curves::bn256::G1, SKIP_FIRST_PASS};
-
-#[derive(Serialize, Deserialize, Debug)]
+#[allow(unused_imports)]
+use ff::PrimeField as _;
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    halo2_proofs::halo2curves::bn256::G1,
+    utils::fs::gen_srs,
+};
+use rand_core::OsRng;
+
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct MSMCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -21,274 +39,108 @@ struct MSMCircuitParams {
     clump_factor: usize,
 }
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-    _radix: usize,
-    _clump_factor: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        _radix: usize,
-        _clump_factor: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, _radix, _clump_factor }
+fn fixed_base_msm_test(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let scalars_assigned = scalars
+        .iter()
+        .map(|scalar| vec![builder.main(0).load_witness(*scalar)])
+        .collect::<Vec<_>>();
+    drop(builder);
+
+    let msm = ecc_chip.fixed_base_msm(thread_pool, &bases, scalars_assigned, Fr::NUM_BITS as usize);
+
+    let mut elts: Vec<G1> = Vec::new();
+    for (base, scalar) in bases.iter().zip(scalars.iter()) {
+        elts.push(base * scalar);
     }
-}
+    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
 
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<G1Affine>,
-    scalars: Vec<Option<Fr>>,
-    _marker: PhantomData<F>,
+    let msm_x = msm.x.value;
+    let msm_y = msm.y.value;
+    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
+    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
 }
 
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: self.bases.clone(),
-            scalars: vec![None; self.scalars.len()],
-            _marker: PhantomData,
+fn random_fixed_base_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..params.batch_size).map(|_| (G1Affine::random(OsRng), Fr::random(OsRng))).unzip();
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fixed_base_msm_test(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("FIXED_MSM_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/fixed_msm_circuit.config".to_string());
-        let params: MSMCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            params.radix,
-            params.clump_factor,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "fixed base msm",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-                let witness_time = start_timer!(|| "Witness generation");
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config
-                        .fp_chip
-                        .range
-                        .gate
-                        .assign_witnesses(ctx, vec![scalar.map_or(Value::unknown(), Value::known)]);
-                    scalars_assigned.push(assignment);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-
-                // baseline
-                /*
-                let msm = {
-                    let sm = self.bases.iter().zip(scalars_assigned.iter()).map(|(base, scalar)|
-                        ecc_chip.fixed_base_scalar_mult(ctx, &FixedEcPoint::<Fr, G1Affine>::from_g1(base, config.fp_chip.num_limbs, config.fp_chip.limb_bits), scalar, Fr::NUM_BITS as usize, 4)).collect::<Vec<_>>();
-                    ecc_chip.sum::<G1Affine>(ctx, sm.iter())
-                };
-                */
-
-                let msm = ecc_chip.fixed_base_msm::<G1Affine>(
-                    ctx,
-                    &self.bases,
-                    &scalars_assigned,
-                    Fr::NUM_BITS as usize,
-                    config._radix,
-                    config._clump_factor,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                #[cfg(feature = "display")]
-                if self.scalars[0].is_some() {
-                    let mut elts: Vec<G1> = Vec::new();
-                    for (base, scalar) in self.bases.iter().zip(&self.scalars) {
-                        elts.push(base * biguint_to_fe::<Fr>(&fe_to_biguint(&scalar.unwrap())));
-                    }
-                    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
-
-                    let msm_x = value_to_option(msm.x.value).unwrap();
-                    let msm_y = value_to_option(msm.y.value).unwrap();
-                    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
-                    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
-                }
-
-                #[cfg(feature = "display")]
-                if self.scalars[0].is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
-#[cfg(test)]
 #[test]
 fn test_fixed_base_msm() {
-    use std::env::set_var;
-
-    use crate::halo2_proofs::arithmetic::Field;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/fixed_msm_circuit.config");
-    set_var("FIXED_MSM_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/fixed_msm_circuit.config file should exist");
-    let params: MSMCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = rand::thread_rng();
-
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..params.batch_size {
-        bases.push(G1Affine::random(&mut rng));
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-
-    let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/fixed_msm_circuit.config";
+    let params: MSMCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_fixed_base_msm_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
-#[cfg(test)]
 #[test]
 fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
-    use std::{
-        env::{set_var, var},
-        fs,
-        io::BufRead,
-    };
-
-    use halo2_base::utils::fs::gen_srs;
-    use rand_core::OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_fixed_msm.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/fixed_msm_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_fixed_msm.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/fixed_msm_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: MSMCircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-        let mut rng = OsRng;
-
-        {
-            folder.pop();
-            folder.push("configs/fixed_msm_circuit.tmp.config");
-            set_var("FIXED_MSM_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params = gen_srs(bench_params.degree);
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
+        let rng = OsRng;
 
+        let params = gen_srs(k);
         println!("{bench_params:?}");
 
-        let mut bases = Vec::new();
-        let mut scalars = Vec::new();
-        for _idx in 0..bench_params.batch_size {
-            bases.push(G1Affine::random(&mut rng));
-
-            let new_scalar = Some(Fr::random(&mut rng));
-            scalars.push(new_scalar);
-        }
         let circuit =
-            MSMCircuit::<Fr> { bases, scalars: vec![None; scalars.len()], _marker: PhantomData };
+            random_fixed_base_msm_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
@@ -298,9 +150,15 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let circuit = MSMCircuit::<Fr> { scalars, ..circuit };
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit = random_fixed_base_msm_circuit(
+            bench_params,
+            CircuitBuilderStage::Prover,
+            Some(break_points),
+        );
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -308,14 +166,15 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            MSMCircuit<Fr>,
+            _,
         >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/
+                msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -324,27 +183,27 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.limb_bits,
                 bench_params.num_limbs,
                 bench_params.batch_size,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("FIXED_MSM_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/bn254/tests/mod.rs b/halo2-ecc/src/bn254/tests/mod.rs
index 763bd127..b373d51e 100644
--- a/halo2-ecc/src/bn254/tests/mod.rs
+++ b/halo2-ecc/src/bn254/tests/mod.rs
@@ -1,34 +1,25 @@
 #![allow(non_snake_case)]
-use ark_std::{end_timer, start_timer};
-use group::Curve;
-use serde::{Deserialize, Serialize};
-use std::io::Write;
-use std::marker::PhantomData;
-
 use super::pairing::PairingChip;
 use super::*;
 use crate::halo2_proofs::{
-    circuit::{Layouter, SimpleFloorPlanner, Value},
     dev::MockProver,
     halo2curves::bn256::{pairing, Bn256, Fr, G1Affine},
     plonk::*,
-    poly::commitment::{Params, ParamsProver},
+    poly::commitment::ParamsProver,
     poly::kzg::{
-        commitment::{KZGCommitmentScheme, ParamsKZG},
+        commitment::KZGCommitmentScheme,
         multiopen::{ProverSHPLONK, VerifierSHPLONK},
         strategy::SingleStrategy,
     },
     transcript::{Blake2bRead, Blake2bWrite, Challenge255},
     transcript::{TranscriptReadBuffer, TranscriptWriterBuffer},
 };
-use crate::{ecc::EccChip, fields::fp::FpStrategy};
-use halo2_base::{
-    gates::GateInstructions,
-    utils::{biguint_to_fe, fe_to_biguint, value_to_option, PrimeField},
-    QuantumCell::Witness,
-};
-use num_bigint::BigUint;
-use num_traits::Num;
+use crate::{ecc::EccChip, fields::PrimeField};
+use ark_std::{end_timer, start_timer};
+use group::Curve;
+use halo2_base::utils::fe_to_biguint;
+use serde::{Deserialize, Serialize};
+use std::io::Write;
 
 pub mod ec_add;
 pub mod fixed_base_msm;
diff --git a/halo2-ecc/src/bn254/tests/msm.rs b/halo2-ecc/src/bn254/tests/msm.rs
index 4195c0f8..269c757c 100644
--- a/halo2-ecc/src/bn254/tests/msm.rs
+++ b/halo2-ecc/src/bn254/tests/msm.rs
@@ -1,11 +1,25 @@
-use std::{env::var, fs::File};
-
-use crate::halo2_proofs::arithmetic::FieldExt;
-use halo2_base::SKIP_FIRST_PASS;
+use crate::fields::FpStrategy;
+use ff::{Field, PrimeField};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    utils::fs::gen_srs,
+};
+use rand_core::OsRng;
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    sync::Mutex,
+};
 
 use super::*;
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct MSMCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -19,346 +33,133 @@ struct MSMCircuitParams {
     window_bits: usize,
 }
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
+fn msm_test(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
     window_bits: usize,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(0);
+    let scalars_assigned =
+        scalars.iter().map(|scalar| vec![ctx.load_witness(*scalar)]).collect::<Vec<_>>();
+    let bases_assigned =
+        bases.iter().map(|base| ecc_chip.load_private(ctx, (base.x, base.y))).collect::<Vec<_>>();
+    drop(builder);
+
+    let msm = ecc_chip.variable_base_msm_in::<G1Affine>(
+        thread_pool,
+        &bases_assigned,
+        scalars_assigned,
+        Fr::NUM_BITS as usize,
+        window_bits,
+        0,
+    );
+
+    let msm_answer = bases
+        .iter()
+        .zip(scalars.iter())
+        .map(|(base, scalar)| base * scalar)
+        .reduce(|a, b| a + b)
+        .unwrap()
+        .to_affine();
+
+    let msm_x = msm.x.value;
+    let msm_y = msm.y.value;
+    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
+    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
 }
 
-impl<F: PrimeField> MSMConfig<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        window_bits: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, window_bits }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<Option<G1Affine>>,
-    scalars: Vec<Option<Fr>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField> Default for MSMCircuit<F> {
-    fn default() -> Self {
-        Self {
-            bases: vec![None; 10],
-            scalars: vec![None; 10],
-            batch_size: 10,
-            _marker: PhantomData,
+fn random_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..params.batch_size).map(|_| (G1Affine::random(OsRng), Fr::random(OsRng))).unzip();
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    msm_test(&builder, params, bases, scalars, params.window_bits);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-}
-
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: vec![None; self.batch_size],
-            scalars: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("MSM_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/msm_circuit.config".to_string());
-        let params: MSMCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            params.window_bits,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "MSM",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let witness_time = start_timer!(|| "Witness generation");
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config.fp_chip.range.gate.assign_region_smart(
-                        ctx,
-                        vec![Witness(scalar.map_or(Value::unknown(), Value::known))],
-                        vec![],
-                        vec![],
-                        vec![],
-                    );
-                    scalars_assigned.push(vec![assignment.last().unwrap().clone()]);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-                let mut bases_assigned = Vec::new();
-                for base in &self.bases {
-                    let base_assigned = ecc_chip.load_private(
-                        ctx,
-                        (
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.x))))
-                                .unwrap_or(Value::unknown()),
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.y))))
-                                .unwrap_or(Value::unknown()),
-                        ),
-                    );
-                    bases_assigned.push(base_assigned);
-                }
-
-                let msm = ecc_chip.variable_base_msm::<G1Affine>(
-                    ctx,
-                    &bases_assigned,
-                    &scalars_assigned,
-                    254,
-                    config.window_bits,
-                );
-
-                ecc_chip.field_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                if self.scalars[0].is_some() {
-                    let mut elts = Vec::new();
-                    for (base, scalar) in self.bases.iter().zip(&self.scalars) {
-                        elts.push(base.unwrap() * scalar.unwrap());
-                    }
-                    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
-
-                    let msm_x = value_to_option(msm.x.value).unwrap();
-                    let msm_y = value_to_option(msm.y.value).unwrap();
-                    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
-                    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
-                }
-
-                #[cfg(feature = "display")]
-                if self.bases[0].is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
-#[cfg(test)]
 #[test]
 fn test_msm() {
-    use std::env::set_var;
-
-    use crate::halo2_proofs::arithmetic::Field;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/msm_circuit.config");
-    set_var("MSM_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/msm_circuit.config file should exist");
-    let params: MSMCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = rand::thread_rng();
-
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..params.batch_size {
-        let new_pt = Some(G1Affine::random(&mut rng));
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-
-    let circuit =
-        MSMCircuit::<Fr> { bases, scalars, batch_size: params.batch_size, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/msm_circuit.config";
+    let params: MSMCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_msm_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
-#[cfg(test)]
 #[test]
 fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
-    use std::{env::set_var, fs, io::BufRead};
-
-    use rand_core::OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_msm.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/msm_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_msm.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/msm_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: MSMCircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-        let mut rng = OsRng;
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
+        let rng = OsRng;
 
-        {
-            folder.pop();
-            folder.push("configs/msm_circuit.tmp.config");
-            set_var("MSM_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
-        end_timer!(params_time);
+        let params = gen_srs(k);
+        println!("{bench_params:?}");
 
-        let circuit = MSMCircuit::<Fr> {
-            bases: vec![None; bench_params.batch_size],
-            scalars: vec![None; bench_params.batch_size],
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
+        let circuit = random_msm_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
 
-        /*
-        let vk_size = {
-            folder.push(format!(
-                "msm_circuit_{}_{}_{}_{}_{}_{}_{}_{}_{}.vkey",
-                bench_params.degree,
-                bench_params.num_advice,
-                bench_params.num_lookup_advice,
-                bench_params.num_fixed,
-                bench_params.lookup_bits,
-                bench_params.limb_bits,
-                bench_params.num_limbs,
-                bench_params.batch_size,
-                bench_params.window_bits,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            vk.write(&mut fd).unwrap();
-            fd.metadata().unwrap().len()
-        };
-        */
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut bases = Vec::new();
-        let mut scalars = Vec::new();
-        for _idx in 0..bench_params.batch_size {
-            let new_pt = Some(G1Affine::random(&mut rng));
-            bases.push(new_pt);
-
-            let new_scalar = Some(Fr::random(&mut rng));
-            scalars.push(new_scalar);
-        }
-
-        println!("{bench_params:?}");
-        let proof_circuit = MSMCircuit::<Fr> {
-            bases,
-            scalars,
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
-
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit =
+            random_msm_circuit(bench_params, CircuitBuilderStage::Prover, Some(break_points));
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -366,14 +167,14 @@ fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            MSMCircuit<Fr>,
-        >(&params, &pk, &[proof_circuit], &[&[]], rng, &mut transcript)?;
+            _,
+        >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -383,29 +184,28 @@ fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.num_limbs,
                 bench_params.batch_size,
                 bench_params.window_bits
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
 
-        fs::remove_file(var("MSM_CONFIG").unwrap())?;
-
         writeln!(
             fs_results,
             "{},{},{},{},{},{},{},{},{},{:?},{},{:?}",
diff --git a/halo2-ecc/src/bn254/tests/pairing.rs b/halo2-ecc/src/bn254/tests/pairing.rs
index 20e5be89..e8194f58 100644
--- a/halo2-ecc/src/bn254/tests/pairing.rs
+++ b/halo2-ecc/src/bn254/tests/pairing.rs
@@ -1,14 +1,26 @@
 use std::{
-    env::{set_var, var},
     fs::{self, File},
+    io::{BufRead, BufReader},
 };
 
 use super::*;
-use crate::halo2_proofs::halo2curves::bn256::G2Affine;
-use halo2_base::SKIP_FIRST_PASS;
+use crate::fields::FieldChip;
+use crate::{fields::FpStrategy, halo2_proofs::halo2curves::bn256::G2Affine};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    halo2_proofs::poly::kzg::multiopen::{ProverGWC, VerifierGWC},
+    utils::fs::gen_srs,
+    Context,
+};
 use rand_core::OsRng;
 
-#[derive(Serialize, Deserialize)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct PairingCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -20,257 +32,111 @@ struct PairingCircuitParams {
     num_limbs: usize,
 }
 
-#[derive(Default)]
-struct PairingCircuit<F: PrimeField> {
-    P: Option<G1Affine>,
-    Q: Option<G2Affine>,
-    _marker: PhantomData<F>,
+fn pairing_test<F: PrimeField>(
+    ctx: &mut Context<F>,
+    params: PairingCircuitParams,
+    P: G1Affine,
+    Q: G2Affine,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<F>::default(params.lookup_bits);
+    let fp_chip = FpChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+    let chip = PairingChip::new(&fp_chip);
+
+    let P_assigned = chip.load_private_g1(ctx, P);
+    let Q_assigned = chip.load_private_g2(ctx, Q);
+
+    // test optimal ate pairing
+    let f = chip.pairing(ctx, &Q_assigned, &P_assigned);
+
+    let actual_f = pairing(&P, &Q);
+    let fp12_chip = Fp12Chip::new(&fp_chip);
+    // cannot directly compare f and actual_f because `Gt` has private field `Fq12`
+    assert_eq!(format!("Gt({:?})", fp12_chip.get_assigned_value(&f)), format!("{actual_f:?}"));
 }
 
-impl<F: PrimeField> Circuit<F> for PairingCircuit<F> {
-    type Config = FpChip<F>;
-    type FloorPlanner = SimpleFloorPlanner; // V1;
-
-    fn without_witnesses(&self) -> Self {
-        Self::default()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        let path = var("PAIRING_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/pairing_circuit.config".to_string());
-        let params: PairingCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        PairingChip::<F>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        config.range.load_lookup_table(&mut layouter)?;
-        let chip = PairingChip::<F>::construct(&config);
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "pairing",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.new_context(region);
-                let ctx = &mut aux;
-
-                let P_assigned =
-                    chip.load_private_g1(ctx, self.P.map(Value::known).unwrap_or(Value::unknown()));
-                let Q_assigned =
-                    chip.load_private_g2(ctx, self.Q.map(Value::known).unwrap_or(Value::unknown()));
-
-                /*
-                // test miller loop without final exp
-                {
-                    let f = chip.miller_loop(ctx, &Q_assigned, &P_assigned)?;
-                    for fc in &f.coeffs {
-                        assert_eq!(fc.value, fc.truncation.to_bigint());
-                    }
-                    if self.P != None {
-                        let actual_f = multi_miller_loop(&[(
-                            &self.P.unwrap(),
-                            &G2Prepared::from_affine(self.Q.unwrap()),
-                        )]);
-                        let f_val: Vec<String> =
-                            f.coeffs.iter().map(|x| x.value.clone().unwrap().to_str_radix(16)).collect();
-                        println!("single miller loop:");
-                        println!("actual f: {:#?}", actual_f);
-                        println!("circuit f: {:#?}", f_val);
-                    }
-                }
-                */
-
-                // test optimal ate pairing
-                {
-                    let f = chip.pairing(ctx, &Q_assigned, &P_assigned);
-                    #[cfg(feature = "display")]
-                    for fc in &f.coeffs {
-                        assert_eq!(
-                            value_to_option(fc.value.clone()),
-                            value_to_option(fc.truncation.to_bigint(chip.fp_chip.limb_bits))
-                        );
-                    }
-                    #[cfg(feature = "display")]
-                    if self.P.is_some() {
-                        let actual_f = pairing(&self.P.unwrap(), &self.Q.unwrap());
-                        let f_val: Vec<String> = f
-                            .coeffs
-                            .iter()
-                            .map(|x| value_to_option(x.value.clone()).unwrap().to_str_radix(16))
-                            //.map(|x| x.to_bigint().clone().unwrap().to_str_radix(16))
-                            .collect();
-                        println!("optimal ate pairing:");
-                        println!("actual f: {actual_f:#?}");
-                        println!("circuit f: {f_val:#?}");
-                    }
-                }
-
-                // IMPORTANT: this copies cells to the lookup advice column to perform range check lookups
-                // This is not optional.
-                config.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                if self.P.is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+fn random_pairing_circuit(
+    params: PairingCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let mut builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+
+    let P = G1Affine::random(OsRng);
+    let Q = G2Affine::random(OsRng);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    pairing_test::<Fr>(builder.main(0), params, P, Q);
+
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
+        }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 #[test]
 fn test_pairing() {
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/pairing_circuit.config");
-    set_var("PAIRING_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/pairing_circuit.config file should exist");
-    let params: PairingCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = OsRng;
-
-    let P = Some(G1Affine::random(&mut rng));
-    let Q = Some(G2Affine::random(&mut rng));
-
-    let circuit = PairingCircuit::<Fr> { P, Q, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/pairing_circuit.config";
+    let params: PairingCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_pairing_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[test]
 fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
-    use std::io::BufRead;
-
-    use crate::halo2_proofs::poly::kzg::multiopen::{ProverGWC, VerifierGWC};
-
-    let mut rng = OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_pairing.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/pairing_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let rng = OsRng;
+    let config_path = "configs/bn254/bench_pairing.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/pairing_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: PairingCircuitParams =
             serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-
-        {
-            folder.pop();
-            folder.push("configs/pairing_circuit.tmp.config");
-            set_var("PAIRING_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
 
-        let circuit = PairingCircuit::<Fr>::default();
-        end_timer!(params_time);
+        let params = gen_srs(k);
+        let circuit = random_pairing_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
 
-        /*
-        let vk_size = {
-            folder.push(format!(
-                "pairing_circuit_{}_{}_{}_{}_{}_{}_{}.vkey",
-                bench_params.degree,
-                bench_params.num_advice,
-                bench_params.num_lookup_advice,
-                bench_params.num_fixed,
-                bench_params.lookup_bits,
-                bench_params.limb_bits,
-                bench_params.num_limbs
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            vk.write(&mut fd).unwrap();
-            fd.metadata().unwrap().len()
-        };
-        */
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut rng = OsRng;
-        let P = Some(G1Affine::random(&mut rng));
-        let Q = Some(G2Affine::random(&mut rng));
-        let proof_circuit = PairingCircuit::<Fr> { P, Q, _marker: PhantomData };
-
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit =
+            random_pairing_circuit(bench_params, CircuitBuilderStage::Prover, Some(break_points));
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -278,14 +144,14 @@ fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            PairingCircuit<Fr>,
-        >(&params, &pk, &[proof_circuit], &[&[]], rng, &mut transcript)?;
+            _,
+        >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "pairing_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/pairing_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -293,27 +159,27 @@ fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.lookup_bits,
                 bench_params.limb_bits,
                 bench_params.num_limbs
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierGWC<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("PAIRING_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/ecc/ecdsa.rs b/halo2-ecc/src/ecc/ecdsa.rs
index 005f5c39..874c185f 100644
--- a/halo2-ecc/src/ecc/ecdsa.rs
+++ b/halo2-ecc/src/ecc/ecdsa.rs
@@ -1,10 +1,9 @@
 use crate::bigint::{big_less_than, CRTInteger};
-use crate::fields::{fp::FpConfig, FieldChip};
+use crate::fields::{fp::FpChip, FieldChip, PrimeField};
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{modulus, CurveAffineExt, PrimeField},
+    utils::CurveAffineExt,
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 
 use super::fixed_base;
@@ -14,25 +13,21 @@ use super::{ec_add_unequal, scalar_multiply, EcPoint};
 // p = coordinate field modulus
 // n = scalar field modulus
 // Only valid when p is very close to n in size (e.g. for Secp256k1)
-pub fn ecdsa_verify_no_pubkey_check<'v, F: PrimeField, CF: PrimeField, SF: PrimeField, GA>(
-    base_chip: &FpConfig<F, CF>,
-    ctx: &mut Context<'v, F>,
-    pubkey: &EcPoint<F, <FpConfig<F, CF> as FieldChip<F>>::FieldPoint<'v>>,
-    r: &CRTInteger<'v, F>,
-    s: &CRTInteger<'v, F>,
-    msghash: &CRTInteger<'v, F>,
+pub fn ecdsa_verify_no_pubkey_check<F: PrimeField, CF: PrimeField, SF: PrimeField, GA>(
+    base_chip: &FpChip<F, CF>,
+    ctx: &mut Context<F>,
+    pubkey: &EcPoint<F, <FpChip<F, CF> as FieldChip<F>>::FieldPoint>,
+    r: &CRTInteger<F>,
+    s: &CRTInteger<F>,
+    msghash: &CRTInteger<F>,
     var_window_bits: usize,
     fixed_window_bits: usize,
-) -> AssignedValue<'v, F>
+) -> AssignedValue<F>
 where
     GA: CurveAffineExt<Base = CF, ScalarExt = SF>,
 {
-    let scalar_chip = FpConfig::<F, SF>::construct(
-        base_chip.range.clone(),
-        base_chip.limb_bits,
-        base_chip.num_limbs,
-        modulus::<SF>(),
-    );
+    let scalar_chip =
+        FpChip::<F, SF>::new(base_chip.range, base_chip.limb_bits, base_chip.num_limbs);
     let n = scalar_chip.load_constant(ctx, scalar_chip.p.to_biguint().unwrap());
 
     // check r,s are in [1, n - 1]
@@ -50,7 +45,7 @@ where
         base_chip,
         ctx,
         &GA::generator(),
-        &u1.truncation.limbs,
+        u1.truncation.limbs.clone(),
         base_chip.limb_bits,
         fixed_window_bits,
     );
@@ -58,7 +53,7 @@ where
         base_chip,
         ctx,
         pubkey,
-        &u2.truncation.limbs,
+        u2.truncation.limbs.clone(),
         base_chip.limb_bits,
         var_window_bits,
     );
@@ -69,7 +64,7 @@ where
     // coordinates of u1_mul and u2_mul are in proper bigint form, and lie in but are not constrained to [0, n)
     // we therefore need hard inequality here
     let u1_u2_x_eq = base_chip.is_equal(ctx, &u1_mul.x, &u2_mul.x);
-    let u1_u2_not_neg = base_chip.range.gate().not(ctx, Existing(&u1_u2_x_eq));
+    let u1_u2_not_neg = base_chip.range.gate().not(ctx, u1_u2_x_eq);
 
     // compute (x1, y1) = u1 * G + u2 * pubkey and check (r mod n) == x1 as integers
     // WARNING: For optimization reasons, does not reduce x1 mod n, which is
@@ -98,10 +93,10 @@ where
     );
 
     // check (r in [1, n - 1]) and (s in [1, n - 1]) and (u1_mul != - u2_mul) and (r == x1 mod n)
-    let res1 = base_chip.range.gate().and(ctx, Existing(&r_valid), Existing(&s_valid));
-    let res2 = base_chip.range.gate().and(ctx, Existing(&res1), Existing(&u1_small));
-    let res3 = base_chip.range.gate().and(ctx, Existing(&res2), Existing(&u2_small));
-    let res4 = base_chip.range.gate().and(ctx, Existing(&res3), Existing(&u1_u2_not_neg));
-    let res5 = base_chip.range.gate().and(ctx, Existing(&res4), Existing(&equal_check));
+    let res1 = base_chip.gate().and(ctx, r_valid, s_valid);
+    let res2 = base_chip.gate().and(ctx, res1, u1_small);
+    let res3 = base_chip.gate().and(ctx, res2, u2_small);
+    let res4 = base_chip.gate().and(ctx, res3, u1_u2_not_neg);
+    let res5 = base_chip.gate().and(ctx, res4, equal_check);
     res5
 }
diff --git a/halo2-ecc/src/ecc/fixed_base.rs b/halo2-ecc/src/ecc/fixed_base.rs
index 4b9bedb6..440f6993 100644
--- a/halo2-ecc/src/ecc/fixed_base.rs
+++ b/halo2-ecc/src/ecc/fixed_base.rs
@@ -3,17 +3,18 @@ use super::{ec_add_unequal, ec_select, ec_select_from_bits, EcPoint, EccChip};
 use crate::halo2_proofs::arithmetic::CurveAffine;
 use crate::{
     bigint::{CRTInteger, FixedCRTInteger},
-    fields::{PrimeFieldChip, Selectable},
+    fields::{PrimeField, PrimeFieldChip, Selectable},
 };
 use group::Curve;
+use halo2_base::gates::builder::GateThreadBuilder;
 use halo2_base::{
-    gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, CurveAffineExt, PrimeField},
+    gates::GateInstructions,
+    utils::{fe_to_biguint, CurveAffineExt},
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use itertools::Itertools;
-use num_bigint::BigUint;
+use rayon::prelude::*;
+use std::sync::Mutex;
 use std::{cmp::min, marker::PhantomData};
 
 // this only works for curves GA with base field of prime order
@@ -39,41 +40,12 @@ where
         Self::construct(x, y)
     }
 
-    pub fn assign<'v, FC>(
-        self,
-        chip: &FC,
-        ctx: &mut Context<'_, F>,
-        native_modulus: &BigUint,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign<FC>(self, chip: &FC, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
     where
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>,
     {
-        let assigned_x = self.x.assign(chip.range().gate(), ctx, chip.limb_bits(), native_modulus);
-        let assigned_y = self.y.assign(chip.range().gate(), ctx, chip.limb_bits(), native_modulus);
-        EcPoint::construct(assigned_x, assigned_y)
-    }
-
-    pub fn assign_without_caching<'v, FC>(
-        self,
-        chip: &FC,
-        ctx: &mut Context<'_, F>,
-        native_modulus: &BigUint,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
-    where
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>,
-    {
-        let assigned_x = self.x.assign_without_caching(
-            chip.range().gate(),
-            ctx,
-            chip.limb_bits(),
-            native_modulus,
-        );
-        let assigned_y = self.y.assign_without_caching(
-            chip.range().gate(),
-            ctx,
-            chip.limb_bits(),
-            native_modulus,
-        );
+        let assigned_x = self.x.assign(ctx, chip.limb_bits(), chip.native_modulus());
+        let assigned_y = self.y.assign(ctx, chip.limb_bits(), chip.native_modulus());
         EcPoint::construct(assigned_x, assigned_y)
     }
 }
@@ -86,27 +58,27 @@ where
 // - `scalar_i < 2^{max_bits} for all i` (constrained by num_to_bits)
 // - `max_bits <= modulus::<F>.bits()`
 
-pub fn scalar_multiply<'v, F, FC, C>(
+pub fn scalar_multiply<F, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     point: &C,
-    scalar: &[AssignedValue<'v, F>],
+    scalar: Vec<AssignedValue<F>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     C: CurveAffineExt,
     C::Base: PrimeField,
-    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-        + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
 {
     if point.is_identity().into() {
         let point = FixedEcPoint::from_curve(*point, chip.num_limbs(), chip.limb_bits());
-        return FixedEcPoint::assign(point, chip, ctx, chip.native_modulus());
+        return FixedEcPoint::assign(point, chip, ctx);
     }
-    assert!(!scalar.is_empty());
-    assert!((max_bits as u32) <= F::NUM_BITS);
+    debug_assert!(!scalar.is_empty());
+    debug_assert!((max_bits as u32) <= F::NUM_BITS);
 
     let total_bits = max_bits * scalar.len();
     let num_windows = (total_bits + window_bits - 1) / window_bits;
@@ -142,12 +114,12 @@ where
         .into_iter()
         .map(|point| {
             let point = FixedEcPoint::from_curve(point, chip.num_limbs(), chip.limb_bits());
-            FixedEcPoint::assign(point, chip, ctx, chip.native_modulus())
+            FixedEcPoint::assign(point, chip, ctx)
         })
         .collect_vec();
 
     let bits = scalar
-        .iter()
+        .into_iter()
         .flat_map(|scalar_chunk| chip.gate().num_to_bits(ctx, scalar_chunk, max_bits))
         .collect::<Vec<_>>();
 
@@ -155,29 +127,24 @@ where
     let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
     let mut curr_point = None;
     // `is_started` is just a way to deal with if `curr_point` is actually identity
-    let mut is_started = chip.gate().load_zero(ctx);
+    let mut is_started = ctx.load_zero();
     for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
-        let bit_sum = chip.gate().sum(ctx, bit_window.iter().map(Existing));
+        let bit_sum = chip.gate().sum(ctx, bit_window.iter().copied());
         // are we just adding a window of all 0s? if so, skip
-        let is_zero_window = chip.gate().is_zero(ctx, &bit_sum);
+        let is_zero_window = chip.gate().is_zero(ctx, bit_sum);
         let add_point = ec_select_from_bits::<F, _>(chip, ctx, cached_point_window, bit_window);
         curr_point = if let Some(curr_point) = curr_point {
             let sum = ec_add_unequal(chip, ctx, &curr_point, &add_point, false);
-            let zero_sum = ec_select(chip, ctx, &curr_point, &sum, &is_zero_window);
-            Some(ec_select(chip, ctx, &zero_sum, &add_point, &is_started))
+            let zero_sum = ec_select(chip, ctx, &curr_point, &sum, is_zero_window);
+            Some(ec_select(chip, ctx, &zero_sum, &add_point, is_started))
         } else {
             Some(add_point)
         };
         is_started = {
             // is_started || !is_zero_window
             // (a || !b) = (1-b) + a*b
-            let not_zero_window = chip.gate().not(ctx, Existing(&is_zero_window));
-            chip.gate().mul_add(
-                ctx,
-                Existing(&is_started),
-                Existing(&is_zero_window),
-                Existing(&not_zero_window),
-            )
+            let not_zero_window = chip.gate().not(ctx, is_zero_window);
+            chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
         };
     }
     curr_point.unwrap()
@@ -185,20 +152,20 @@ where
 
 // basically just adding up individual fixed_base::scalar_multiply except that we do all batched normalization of cached points at once to further save inversion time during witness generation
 // we also use the random accumulator for some extra efficiency (which also works in scalar multiply case but that is TODO)
-pub fn msm<'v, F, FC, C>(
+pub fn msm<F, FC, C>(
     chip: &EccChip<F, FC>,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     C: CurveAffineExt,
     C::Base: PrimeField,
-    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-        + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
 {
     assert!((max_scalar_bits_per_cell as u32) <= F::NUM_BITS);
     let scalar_len = scalars[0].len();
@@ -242,16 +209,16 @@ where
         .map(|point| {
             let point =
                 FixedEcPoint::from_curve(point, field_chip.num_limbs(), field_chip.limb_bits());
-            point.assign_without_caching(field_chip, ctx, field_chip.native_modulus())
+            point.assign(field_chip, ctx)
         })
         .collect_vec();
 
     let bits = scalars
-        .iter()
+        .into_iter()
         .flat_map(|scalar| {
             assert_eq!(scalar.len(), scalar_len);
             scalar
-                .iter()
+                .into_iter()
                 .flat_map(|scalar_chunk| {
                     field_chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell)
                 })
@@ -269,32 +236,26 @@ where
             let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
             let mut curr_point = None;
             // `is_started` is just a way to deal with if `curr_point` is actually identity
-            let mut is_started = field_chip.gate().load_zero(ctx);
+            let mut is_started = ctx.load_zero();
             for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
                 let is_zero_window = {
-                    let sum = field_chip.gate().sum(ctx, bit_window.iter().map(Existing));
-                    field_chip.gate().is_zero(ctx, &sum)
+                    let sum = field_chip.gate().sum(ctx, bit_window.iter().copied());
+                    field_chip.gate().is_zero(ctx, sum)
                 };
                 let add_point =
                     ec_select_from_bits::<F, _>(field_chip, ctx, cached_point_window, bit_window);
                 curr_point = if let Some(curr_point) = curr_point {
                     let sum = ec_add_unequal(field_chip, ctx, &curr_point, &add_point, false);
-                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, &is_zero_window);
-                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, &is_started))
+                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, is_zero_window);
+                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, is_started))
                 } else {
                     Some(add_point)
                 };
                 is_started = {
                     // is_started || !is_zero_window
                     // (a || !b) = (1-b) + a*b
-                    let not_zero_window =
-                        field_chip.range().gate().not(ctx, Existing(&is_zero_window));
-                    field_chip.range().gate().mul_add(
-                        ctx,
-                        Existing(&is_started),
-                        Existing(&is_zero_window),
-                        Existing(&not_zero_window),
-                    )
+                    let not_zero_window = field_chip.gate().not(ctx, is_zero_window);
+                    field_chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
                 };
             }
             curr_point.unwrap()
@@ -302,3 +263,122 @@ where
         .collect_vec();
     chip.sum::<C>(ctx, sm.iter())
 }
+
+pub fn msm_par<F, FC, C>(
+    chip: &EccChip<F, FC>,
+    thread_pool: &Mutex<GateThreadBuilder<F>>,
+    points: &[C],
+    scalars: Vec<Vec<AssignedValue<F>>>,
+    max_scalar_bits_per_cell: usize,
+    window_bits: usize,
+    phase: usize,
+) -> EcPoint<F, FC::FieldPoint>
+where
+    F: PrimeField,
+    C: CurveAffineExt,
+    C::Base: PrimeField,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
+{
+    assert!((max_scalar_bits_per_cell as u32) <= F::NUM_BITS);
+    let scalar_len = scalars[0].len();
+    let total_bits = max_scalar_bits_per_cell * scalar_len;
+    let num_windows = (total_bits + window_bits - 1) / window_bits;
+
+    // `cached_points` is a flattened 2d vector
+    // first we compute all cached points in Jacobian coordinates since it's fastest
+    let cached_points_jacobian = points
+        .par_iter()
+        .flat_map(|point| {
+            let base_pt = point.to_curve();
+            // cached_points[idx][i * 2^w + j] holds `[j * 2^(i * w)] * points[idx]` for j in {0, ..., 2^w - 1}
+            let mut increment = base_pt;
+            (0..num_windows)
+                .flat_map(|i| {
+                    let mut curr = increment;
+                    let cache_vec = std::iter::once(increment)
+                        .chain((1..(1usize << min(window_bits, total_bits - i * window_bits))).map(
+                            |_| {
+                                let prev = curr;
+                                curr += increment;
+                                prev
+                            },
+                        ))
+                        .collect_vec();
+                    increment = curr;
+                    cache_vec
+                })
+                .collect_vec()
+        })
+        .collect::<Vec<_>>();
+    // for use in circuits we need affine coordinates, so we do a batch normalize: this is much more efficient than calling `to_affine` one by one since field inversion is very expensive
+    // initialize to all 0s
+    let mut cached_points_affine = vec![C::default(); cached_points_jacobian.len()];
+    C::Curve::batch_normalize(&cached_points_jacobian, &mut cached_points_affine);
+
+    let field_chip = chip.field_chip();
+    let witness_gen_only = thread_pool.lock().unwrap().witness_gen_only();
+
+    let (new_threads, scalar_mults): (Vec<_>, Vec<_>) = cached_points_affine
+        .par_chunks(cached_points_affine.len() / points.len())
+        .zip(scalars.into_par_iter())
+        .map(|(cached_points, scalar)| {
+            let thread_id = thread_pool.lock().unwrap().get_new_thread_id();
+            // thread_pool should be unlocked now
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+
+            let cached_points = cached_points
+                .iter()
+                .map(|point| {
+                    let point = FixedEcPoint::from_curve(
+                        *point,
+                        field_chip.num_limbs(),
+                        field_chip.limb_bits(),
+                    );
+                    point.assign(field_chip, ctx)
+                })
+                .collect_vec();
+            let cached_point_window_rev =
+                cached_points.chunks(1usize << window_bits).into_iter().rev();
+
+            debug_assert_eq!(scalar.len(), scalar_len);
+            let bits = scalar
+                .into_iter()
+                .flat_map(|scalar_chunk| {
+                    field_chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell)
+                })
+                .collect_vec();
+            let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
+            let mut curr_point = None;
+            // `is_started` is just a way to deal with if `curr_point` is actually identity
+            let mut is_started = ctx.load_zero();
+            for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
+                let is_zero_window = {
+                    let sum = field_chip.gate().sum(ctx, bit_window.iter().copied());
+                    field_chip.gate().is_zero(ctx, sum)
+                };
+                let add_point =
+                    ec_select_from_bits::<F, _>(field_chip, ctx, cached_point_window, bit_window);
+                curr_point = if let Some(curr_point) = curr_point {
+                    let sum = ec_add_unequal(field_chip, ctx, &curr_point, &add_point, false);
+                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, is_zero_window);
+                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, is_started))
+                } else {
+                    Some(add_point)
+                };
+                is_started = {
+                    // is_started || !is_zero_window
+                    // (a || !b) = (1-b) + a*b
+                    let not_zero_window = field_chip.gate().not(ctx, is_zero_window);
+                    field_chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
+                };
+            }
+            (thread, curr_point.unwrap())
+        })
+        .unzip();
+    let mut builder = thread_pool.lock().unwrap();
+    builder.threads[phase].extend(new_threads);
+    let ctx = builder.main(phase);
+    chip.sum::<C>(ctx, scalar_mults.iter())
+}
diff --git a/halo2-ecc/src/ecc/fixed_base_pippenger.rs b/halo2-ecc/src/ecc/fixed_base_pippenger.rs
index 1e36bfd1..05d7cf3e 100644
--- a/halo2-ecc/src/ecc/fixed_base_pippenger.rs
+++ b/halo2-ecc/src/ecc/fixed_base_pippenger.rs
@@ -20,14 +20,14 @@ use rand_chacha::ChaCha20Rng;
 // Output:
 // * new_points: length `points.len() * radix`
 // * new_bool_scalars: 2d array `ceil(scalar_bits / radix)` by `points.len() * radix`
-pub fn decompose<'v, F, C>(
+pub fn decompose<F, C>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &Vec<Vec<AssignedValue<'v, F>>>,
+    scalars: &Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     radix: usize,
-) -> (Vec<C::Curve>, Vec<Vec<AssignedValue<'v, F>>>)
+) -> (Vec<C::Curve>, Vec<Vec<AssignedValue<F>>>)
 where
     F: PrimeField,
     C: CurveAffine,
@@ -66,15 +66,15 @@ where
 // Given points[i] and bool_scalars[j][i],
 // compute G'[j] = sum_{i=0..points.len()} points[i] * bool_scalars[j][i]
 // output is [ G'[j] + rand_point ]_{j=0..bool_scalars.len()}, rand_point
-pub fn multi_product<'v, F: PrimeField, FC, C>(
+pub fn multi_product<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: Vec<C::CurveExt>,
-    bool_scalars: Vec<Vec<AssignedValue<'v, F>>>,
+    bool_scalars: Vec<Vec<AssignedValue<F>>>,
     clumping_factor: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, EcPoint<F, FC::FieldPoint<'v>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, EcPoint<F, FC::FieldPoint>)
 where
-    FC: PrimeFieldChip<F, FieldPoint<'v> = CRTInteger<'v, F>>,
+    FC: PrimeFieldChip<F, FieldPoint = CRTInteger<F>>,
     FC::FieldType: PrimeField,
     C: CurveAffine<Base = FC::FieldType>,
 {
@@ -187,17 +187,17 @@ where
     (acc, rand_point)
 }
 
-pub fn multi_exp<'v, F: PrimeField, FC, C>(
+pub fn multi_exp<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &Vec<Vec<AssignedValue<'v, F>>>,
+    scalars: &Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     radix: usize,
     clump_factor: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: PrimeFieldChip<F, FieldPoint<'v> = CRTInteger<'v, F>>,
+    FC: PrimeFieldChip<F, FieldPoint = CRTInteger<F>>,
     FC::FieldType: PrimeField,
     C: CurveAffine<Base = FC::FieldType>,
 {
diff --git a/halo2-ecc/src/ecc/mod.rs b/halo2-ecc/src/ecc/mod.rs
index 7e8baf80..6b1c6655 100644
--- a/halo2-ecc/src/ecc/mod.rs
+++ b/halo2-ecc/src/ecc/mod.rs
@@ -1,18 +1,19 @@
 #![allow(non_snake_case)]
 use crate::bigint::CRTInteger;
-use crate::fields::{fp::FpConfig, FieldChip, PrimeFieldChip, Selectable};
-use crate::halo2_proofs::{arithmetic::CurveAffine, circuit::Value};
+use crate::fields::{fp::FpChip, FieldChip, PrimeField, PrimeFieldChip, Selectable};
+use crate::halo2_proofs::arithmetic::CurveAffine;
 use group::{Curve, Group};
+use halo2_base::gates::builder::GateThreadBuilder;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{modulus, CurveAffineExt, PrimeField},
+    utils::{modulus, CurveAffineExt},
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use itertools::Itertools;
 use rand::SeedableRng;
 use rand_chacha::ChaCha20Rng;
 use std::marker::PhantomData;
+use std::sync::Mutex;
 
 pub mod ecdsa;
 pub mod fixed_base;
@@ -21,7 +22,7 @@ pub mod pippenger;
 
 // EcPoint and EccChip take in a generic `FieldChip` to implement generic elliptic curve operations on arbitrary field extensions (provided chip exists) for short Weierstrass curves (currently further assuming a4 = 0 for optimization purposes)
 #[derive(Debug)]
-pub struct EcPoint<F: PrimeField, FieldPoint: Clone> {
+pub struct EcPoint<F: PrimeField, FieldPoint> {
     pub x: FieldPoint,
     pub y: FieldPoint,
     _marker: PhantomData<F>,
@@ -33,7 +34,7 @@ impl<F: PrimeField, FieldPoint: Clone> Clone for EcPoint<F, FieldPoint> {
     }
 }
 
-impl<F: PrimeField, FieldPoint: Clone> EcPoint<F, FieldPoint> {
+impl<F: PrimeField, FieldPoint> EcPoint<F, FieldPoint> {
     pub fn construct(x: FieldPoint, y: FieldPoint) -> Self {
         Self { x, y, _marker: PhantomData }
     }
@@ -57,18 +58,18 @@ impl<F: PrimeField, FieldPoint: Clone> EcPoint<F, FieldPoint> {
 //  x_3 = lambda^2 - x_1 - x_2 (mod p)
 //  y_3 = lambda (x_1 - x_3) - y_1 mod p
 //
-/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `P.y`
-pub fn ec_add_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
+/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `Q.x`
+pub fn ec_add_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
     is_strict: bool,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+) -> EcPoint<F, FC::FieldPoint> {
     if is_strict {
         // constrains that P.x != Q.x
         let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
-        chip.range().gate().assert_is_const(ctx, &x_is_equal, F::zero());
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
     }
 
     let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
@@ -99,18 +100,18 @@ pub fn ec_add_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
 //  y_3 = lambda (x_1 - x_3) - y_1 mod p
 //  Assumes that P !=Q and Q != (P - Q)
 //
-/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `P.y`
-pub fn ec_sub_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
+/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `Q.x`
+pub fn ec_sub_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
     is_strict: bool,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+) -> EcPoint<F, FC::FieldPoint> {
     if is_strict {
         // constrains that P.x != Q.x
         let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
-        chip.range().gate().assert_is_const(ctx, &x_is_equal, F::zero());
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
     }
 
     let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
@@ -150,11 +151,11 @@ pub fn ec_sub_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
 // we precompute lambda and constrain (2y) * lambda = 3 x^2 (mod p)
 // then we compute x_3 = lambda^2 - 2 x (mod p)
 //                 y_3 = lambda (x - x_3) - y (mod p)
-pub fn ec_double<'v, F: PrimeField, FC: FieldChip<F>>(
+pub fn ec_double<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+) -> EcPoint<F, FC::FieldPoint> {
     // removed optimization that computes `2 * lambda` while assigning witness to `lambda` simultaneously, in favor of readability. The difference is just copying `lambda` once
     let two_y = chip.scalar_mul_no_carry(ctx, &P.y, 2);
     let three_x = chip.scalar_mul_no_carry(ctx, &P.x, 3);
@@ -176,15 +177,74 @@ pub fn ec_double<'v, F: PrimeField, FC: FieldChip<F>>(
     EcPoint::construct(x_3, y_3)
 }
 
-pub fn ec_select<'v, F: PrimeField, FC>(
+/// Implements:
+/// computing 2P + Q = P + Q + P for P = (x0, y0), Q = (x1, y1)
+// using Montgomery ladder(?) to skip intermediate y computation
+// from halo2wrong: https://hackmd.io/ncuKqRXzR-Cw-Au2fGzsMg?view
+// lambda_0 = (y_1 - y_0) / (x_1 - x_0)
+// x_2 = lambda_0^2 - x_0 - x_1
+// lambda_1 = lambda_0 + 2 * y_0 / (x_2 - x_0)
+// x_res = lambda_1^2 - x_0 - x_2
+// y_res = lambda_1 * (x_res - x_0) - y_0
+pub fn ec_double_and_add_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
-    sel: &AssignedValue<'v, F>,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
+    is_strict: bool,
+) -> EcPoint<F, FC::FieldPoint> {
+    if is_strict {
+        // constrains that P.x != Q.x
+        let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
+    }
+
+    let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
+    let dy = chip.sub_no_carry(ctx, &Q.y, &P.y);
+    let lambda_0 = chip.divide(ctx, &dy, &dx);
+
+    //  x_2 = lambda_0^2 - x_0 - x_1 (mod p)
+    let lambda_0_sq = chip.mul_no_carry(ctx, &lambda_0, &lambda_0);
+    let lambda_0_sq_minus_x_0 = chip.sub_no_carry(ctx, &lambda_0_sq, &P.x);
+    let x_2_no_carry = chip.sub_no_carry(ctx, &lambda_0_sq_minus_x_0, &Q.x);
+    let x_2 = chip.carry_mod(ctx, &x_2_no_carry);
+
+    if is_strict {
+        // TODO: when can we remove this check?
+        // constrains that x_2 != x_0
+        let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &x_2);
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
+    }
+    // lambda_1 = lambda_0 + 2 * y_0 / (x_2 - x_0)
+    let two_y_0 = chip.scalar_mul_no_carry(ctx, &P.y, 2);
+    let x_2_minus_x_0 = chip.sub_no_carry(ctx, &x_2, &P.x);
+    let lambda_1_minus_lambda_0 = chip.divide(ctx, &two_y_0, &x_2_minus_x_0);
+    let lambda_1_no_carry = chip.add_no_carry(ctx, &lambda_0, &lambda_1_minus_lambda_0);
+
+    // x_res = lambda_1^2 - x_0 - x_2
+    let lambda_1_sq_nc = chip.mul_no_carry(ctx, &lambda_1_no_carry, &lambda_1_no_carry);
+    let lambda_1_sq_minus_x_0 = chip.sub_no_carry(ctx, &lambda_1_sq_nc, &P.x);
+    let x_res_no_carry = chip.sub_no_carry(ctx, &lambda_1_sq_minus_x_0, &x_2);
+    let x_res = chip.carry_mod(ctx, &x_res_no_carry);
+
+    // y_res = lambda_1 * (x_res - x_0) - y_0
+    let x_res_minus_x_0 = chip.sub_no_carry(ctx, &x_res, &P.x);
+    let lambda_1_x_res_minus_x_0 = chip.mul_no_carry(ctx, &lambda_1_no_carry, &x_res_minus_x_0);
+    let y_res_no_carry = chip.sub_no_carry(ctx, &lambda_1_x_res_minus_x_0, &P.y);
+    let y_res = chip.carry_mod(ctx, &y_res_no_carry);
+
+    EcPoint::construct(x_res, y_res)
+}
+
+pub fn ec_select<F: PrimeField, FC>(
+    chip: &FC,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
+    sel: AssignedValue<F>,
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let Rx = chip.select(ctx, &P.x, &Q.x, sel);
     let Ry = chip.select(ctx, &P.y, &Q.y, sel);
@@ -193,14 +253,14 @@ where
 
 // takes the dot product of points with sel, where each is intepreted as
 // a _vector_
-pub fn ec_select_by_indicator<'v, F: PrimeField, FC>(
+pub fn ec_select_by_indicator<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    coeffs: &[AssignedValue<'v, F>],
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    coeffs: &[AssignedValue<F>],
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let x_coords = points.iter().map(|P| P.x.clone()).collect::<Vec<_>>();
     let y_coords = points.iter().map(|P| P.y.clone()).collect::<Vec<_>>();
@@ -210,14 +270,14 @@ where
 }
 
 // `sel` is little-endian binary
-pub fn ec_select_from_bits<'v, F: PrimeField, FC>(
+pub fn ec_select_from_bits<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    sel: &[AssignedValue<'v, F>],
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    sel: &[AssignedValue<F>],
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let w = sel.len();
     let num_points = points.len();
@@ -234,16 +294,16 @@ where
 // - `scalar_i < 2^{max_bits} for all i` (constrained by num_to_bits)
 // - `max_bits <= modulus::<F>.bits()`
 //   * P has order given by the scalar field modulus
-pub fn scalar_multiply<'v, F: PrimeField, FC>(
+pub fn scalar_multiply<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    scalar: &Vec<AssignedValue<'v, F>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    scalar: Vec<AssignedValue<F>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     assert!(!scalar.is_empty());
     assert!((max_bits as u64) <= modulus::<F>().bits());
@@ -258,24 +318,15 @@ where
         bits.append(&mut new_bits);
     }
     let mut rounded_bits = bits;
-    let zero_cell = chip.gate().load_zero(ctx);
-    for _ in 0..(rounded_bitlen - total_bits) {
-        rounded_bits.push(zero_cell.clone());
-    }
+    let zero_cell = ctx.load_zero();
+    rounded_bits.resize(rounded_bitlen, zero_cell);
 
     // is_started[idx] holds whether there is a 1 in bits with index at least (rounded_bitlen - idx)
     let mut is_started = Vec::with_capacity(rounded_bitlen);
-    for _ in 0..(rounded_bitlen - total_bits) {
-        is_started.push(zero_cell.clone());
-    }
-    is_started.push(zero_cell.clone());
+    is_started.resize(rounded_bitlen - total_bits + 1, zero_cell);
     for idx in 1..total_bits {
-        let or = chip.gate().or(
-            ctx,
-            Existing(&is_started[rounded_bitlen - total_bits + idx - 1]),
-            Existing(&rounded_bits[total_bits - idx]),
-        );
-        is_started.push(or.clone());
+        let or = chip.gate().or(ctx, *is_started.last().unwrap(), rounded_bits[total_bits - idx]);
+        is_started.push(or);
     }
 
     // is_zero_window[idx] is 0/1 depending on whether bits [rounded_bitlen - window_bits * (idx + 1), rounded_bitlen - window_bits * idx) are all 0
@@ -284,10 +335,10 @@ where
         let temp_bits = rounded_bits
             [rounded_bitlen - window_bits * (idx + 1)..rounded_bitlen - window_bits * idx]
             .iter()
-            .map(|x| Existing(x));
+            .copied();
         let bit_sum = chip.gate().sum(ctx, temp_bits);
-        let is_zero = chip.gate().is_zero(ctx, &bit_sum);
-        is_zero_window.push(is_zero.clone());
+        let is_zero = chip.gate().is_zero(ctx, bit_sum);
+        is_zero_window.push(is_zero);
     }
 
     // cached_points[idx] stores idx * P, with cached_points[0] = P
@@ -298,10 +349,10 @@ where
     for idx in 2..cache_size {
         if idx == 2 {
             let double = ec_double(chip, ctx, P /*, b*/);
-            cached_points.push(double.clone());
+            cached_points.push(double);
         } else {
             let new_point = ec_add_unequal(chip, ctx, &cached_points[idx - 1], P, false);
-            cached_points.push(new_point.clone());
+            cached_points.push(new_point);
         }
     }
 
@@ -327,19 +378,16 @@ where
         );
         let mult_and_add = ec_add_unequal(chip, ctx, &mult_point, &add_point, false);
         let is_started_point =
-            ec_select(chip, ctx, &mult_point, &mult_and_add, &is_zero_window[idx]);
+            ec_select(chip, ctx, &mult_point, &mult_and_add, is_zero_window[idx]);
 
         curr_point =
-            ec_select(chip, ctx, &is_started_point, &add_point, &is_started[window_bits * idx]);
+            ec_select(chip, ctx, &is_started_point, &add_point, is_started[window_bits * idx]);
     }
     curr_point
 }
 
-pub fn is_on_curve<'v, F, FC, C>(
-    chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-) where
+pub fn is_on_curve<F, FC, C>(chip: &FC, ctx: &mut Context<F>, P: &EcPoint<F, FC::FieldPoint>)
+where
     F: PrimeField,
     FC: FieldChip<F>,
     C: CurveAffine<Base = FC::FieldType>,
@@ -354,10 +402,7 @@ pub fn is_on_curve<'v, F, FC, C>(
     chip.check_carry_mod_to_zero(ctx, &diff)
 }
 
-pub fn load_random_point<'v, F, FC, C>(
-    chip: &FC,
-    ctx: &mut Context<'v, F>,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+pub fn load_random_point<F, FC, C>(chip: &FC, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     FC: FieldChip<F>,
@@ -365,8 +410,8 @@ where
 {
     let base_point: C = C::CurveExt::random(ChaCha20Rng::from_entropy()).to_affine();
     let (x, y) = base_point.into_coordinates();
-    let pt_x = FC::fe_to_witness(&Value::known(x));
-    let pt_y = FC::fe_to_witness(&Value::known(y));
+    let pt_x = FC::fe_to_witness(&x);
+    let pt_y = FC::fe_to_witness(&y);
     let base = {
         let x_overflow = chip.load_private(ctx, pt_x);
         let y_overflow = chip.load_private(ctx, pt_y);
@@ -383,16 +428,16 @@ where
 // Input:
 // - `scalars` is vector of same length as `P`
 // - each `scalar` in `scalars` satisfies same assumptions as in `scalar_multiply` above
-pub fn multi_scalar_multiply<'v, F: PrimeField, FC, C>(
+pub fn multi_scalar_multiply<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    P: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
     let k = P.len();
@@ -406,17 +451,15 @@ where
     let num_windows = (total_bits + window_bits - 1) / window_bits;
     let rounded_bitlen = num_windows * window_bits;
 
-    let zero_cell = chip.gate().load_zero(ctx);
+    let zero_cell = ctx.load_zero();
     let rounded_bits = scalars
-        .iter()
+        .into_iter()
         .flat_map(|scalar| {
-            assert_eq!(scalar.len(), scalar_len);
+            debug_assert_eq!(scalar.len(), scalar_len);
             scalar
-                .iter()
+                .into_iter()
                 .flat_map(|scalar_chunk| chip.gate().num_to_bits(ctx, scalar_chunk, max_bits))
-                .chain(
-                    std::iter::repeat_with(|| zero_cell.clone()).take(rounded_bitlen - total_bits),
-                )
+                .chain(std::iter::repeat(zero_cell).take(rounded_bitlen - total_bits))
                 .collect_vec()
         })
         .collect_vec();
@@ -457,7 +500,7 @@ where
             // adversary could pick `A` so add equal case occurs, so we must use strict add_unequal
             let mut new_point = ec_add_unequal(chip, ctx, prev, point, true);
             // special case for when P[idx] = O
-            new_point = ec_select(chip, ctx, prev, &new_point, &is_infinity);
+            new_point = ec_select(chip, ctx, prev, &new_point, is_infinity);
             chip.enforce_less_than(ctx, new_point.x());
             cached_points.push(new_point);
         }
@@ -547,31 +590,32 @@ pub fn get_naf(mut exp: Vec<u64>) -> Vec<i8> {
     naf
 }
 
-pub type BaseFieldEccChip<C> = EccChip<
+pub type BaseFieldEccChip<'chip, C> = EccChip<
+    'chip,
     <C as CurveAffine>::ScalarExt,
-    FpConfig<<C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>,
+    FpChip<'chip, <C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>,
 >;
 
 #[derive(Clone, Debug)]
-pub struct EccChip<F: PrimeField, FC: FieldChip<F>> {
-    pub field_chip: FC,
+pub struct EccChip<'chip, F: PrimeField, FC: FieldChip<F>> {
+    pub field_chip: &'chip FC,
     _marker: PhantomData<F>,
 }
 
-impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
-    pub fn construct(field_chip: FC) -> Self {
+impl<'chip, F: PrimeField, FC: FieldChip<F>> EccChip<'chip, F, FC> {
+    pub fn new(field_chip: &'chip FC) -> Self {
         Self { field_chip, _marker: PhantomData }
     }
 
     pub fn field_chip(&self) -> &FC {
-        &self.field_chip
+        self.field_chip
     }
 
-    pub fn load_private<'v>(
+    pub fn load_private(
         &self,
-        ctx: &mut Context<'_, F>,
-        point: (Value<FC::FieldType>, Value<FC::FieldType>),
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        point: (FC::FieldType, FC::FieldType),
+    ) -> EcPoint<F, FC::FieldPoint> {
         let (x, y) = (FC::fe_to_witness(&point.0), FC::fe_to_witness(&point.1));
 
         let x_assigned = self.field_chip.load_private(ctx, x);
@@ -581,23 +625,15 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
     }
 
     /// Does not constrain witness to lie on curve
-    pub fn assign_point<'v, C>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        g: Value<C>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign_point<C>(&self, ctx: &mut Context<F>, g: C) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
-        let (x, y) = g.map(|g| g.into_coordinates()).unzip();
+        let (x, y) = g.into_coordinates();
         self.load_private(ctx, (x, y))
     }
 
-    pub fn assign_constant_point<'v, C>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        g: C,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign_constant_point<C>(&self, ctx: &mut Context<F>, g: C) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
@@ -609,31 +645,25 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
         EcPoint::construct(x, y)
     }
 
-    pub fn load_random_point<'v, C>(
-        &self,
-        ctx: &mut Context<'v, F>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn load_random_point<C>(&self, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
         load_random_point::<F, FC, C>(self.field_chip(), ctx)
     }
 
-    pub fn assert_is_on_curve<'v, C>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) where
+    pub fn assert_is_on_curve<C>(&self, ctx: &mut Context<F>, P: &EcPoint<F, FC::FieldPoint>)
+    where
         C: CurveAffine<Base = FC::FieldType>,
     {
-        is_on_curve::<F, FC, C>(&self.field_chip, ctx, P)
+        is_on_curve::<F, FC, C>(self.field_chip, ctx, P)
     }
 
-    pub fn is_on_curve_or_infinity<'v, C>(
+    pub fn is_on_curve_or_infinity<C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> AssignedValue<'v, F>
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> AssignedValue<F>
     where
         C: CurveAffine<Base = FC::FieldType>,
         C::Base: ff::PrimeField,
@@ -652,71 +682,66 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
         let x_is_zero = self.field_chip.is_zero(ctx, &P.x);
         let y_is_zero = self.field_chip.is_zero(ctx, &P.y);
 
-        self.field_chip.range().gate().or_and(
-            ctx,
-            Existing(&is_on_curve),
-            Existing(&x_is_zero),
-            Existing(&y_is_zero),
-        )
+        self.field_chip.range().gate().or_and(ctx, is_on_curve, x_is_zero, y_is_zero)
     }
 
-    pub fn negate<'v>(
+    pub fn negate(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> EcPoint<F, FC::FieldPoint> {
         EcPoint::construct(P.x.clone(), self.field_chip.negate(ctx, &P.y))
     }
 
     /// Assumes that P.x != Q.x
     /// If `is_strict == true`, then actually constrains that `P.x != Q.x`
-    pub fn add_unequal<'v>(
+    pub fn add_unequal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
         is_strict: bool,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_add_unequal(&self.field_chip, ctx, P, Q, is_strict)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_add_unequal(self.field_chip, ctx, P, Q, is_strict)
     }
 
     /// Assumes that P.x != Q.x
     /// Otherwise will panic
-    pub fn sub_unequal<'v>(
+    pub fn sub_unequal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
         is_strict: bool,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_sub_unequal(&self.field_chip, ctx, P, Q, is_strict)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_sub_unequal(self.field_chip, ctx, P, Q, is_strict)
     }
 
-    pub fn double<'v>(
+    pub fn double(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_double(&self.field_chip, ctx, P)
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_double(self.field_chip, ctx, P)
     }
 
-    pub fn is_equal<'v>(
+    pub fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
+    ) -> AssignedValue<F> {
         // TODO: optimize
         let x_is_equal = self.field_chip.is_equal(ctx, &P.x, &Q.x);
         let y_is_equal = self.field_chip.is_equal(ctx, &P.y, &Q.y);
-        self.field_chip.range().gate().and(ctx, Existing(&x_is_equal), Existing(&y_is_equal))
+        self.field_chip.range().gate().and(ctx, x_is_equal, y_is_equal)
     }
 
-    pub fn assert_equal<'v>(
+    pub fn assert_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
     ) {
         self.field_chip.assert_equal(ctx, &P.x, &Q.x);
         self.field_chip.assert_equal(ctx, &P.y, &Q.y);
@@ -724,12 +749,12 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
 
     pub fn sum<'b, 'v: 'b, C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        points: impl Iterator<Item = &'b EcPoint<F, FC::FieldPoint<'v>>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        ctx: &mut Context<F>,
+        points: impl Iterator<Item = &'b EcPoint<F, FC::FieldPoint>>,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
-        FC::FieldPoint<'v>: 'b,
+        FC::FieldPoint: 'b,
     {
         let rand_point = self.load_random_point::<C>(ctx);
         self.field_chip.enforce_less_than(ctx, rand_point.x());
@@ -743,40 +768,57 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
     }
 }
 
-impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC>
+impl<'chip, F: PrimeField, FC: FieldChip<F>> EccChip<'chip, F, FC>
 where
-    for<'v> FC: Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: Selectable<F, Point = FC::FieldPoint>,
 {
-    pub fn select<'v>(
+    pub fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
-        condition: &AssignedValue<'v, F>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_select(&self.field_chip, ctx, P, Q, condition)
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
+        condition: AssignedValue<F>,
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_select(self.field_chip, ctx, P, Q, condition)
     }
 
-    pub fn scalar_mult<'v>(
+    pub fn scalar_mult(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        scalar: &Vec<AssignedValue<'v, F>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        scalar: Vec<AssignedValue<F>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        scalar_multiply::<F, FC>(&self.field_chip, ctx, P, scalar, max_bits, window_bits)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        scalar_multiply::<F, FC>(self.field_chip, ctx, P, scalar, max_bits, window_bits)
+    }
+
+    // default for most purposes
+    pub fn variable_base_msm<C>(
+        &self,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        P: &[EcPoint<F, FC::FieldPoint>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
+        max_bits: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
+    where
+        C: CurveAffineExt<Base = FC::FieldType>,
+        C::Base: ff::PrimeField,
+    {
+        // window_bits = 4 is optimal from empirical observations
+        self.variable_base_msm_in::<C>(thread_pool, P, scalars, max_bits, 4, 0)
     }
 
     // TODO: put a check in place that scalar is < modulus of C::Scalar
-    pub fn variable_base_msm<'v, C>(
+    pub fn variable_base_msm_in<C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &[EcPoint<F, FC::FieldPoint<'v>>],
-        scalars: &[Vec<AssignedValue<'v, F>>],
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        P: &[EcPoint<F, FC::FieldPoint>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        phase: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
         C::Base: ff::PrimeField,
@@ -785,8 +827,10 @@ where
         println!("computing length {} MSM", P.len());
 
         if P.len() <= 25 {
+            let mut builder = thread_pool.lock().unwrap();
+            let ctx = builder.main(phase);
             multi_scalar_multiply::<F, FC, C>(
-                &self.field_chip,
+                self.field_chip,
                 ctx,
                 P,
                 scalars,
@@ -801,40 +845,40 @@ where
             if radix == 0 {
                 radix = 1;
             }*/
-            let radix = 1;
-            pippenger::multi_exp::<F, FC, C>(
-                &self.field_chip,
-                ctx,
+            // guessing that is is always better to use parallelism for >25 points
+            pippenger::multi_exp_par::<F, FC, C>(
+                self.field_chip,
+                thread_pool,
                 P,
                 scalars,
                 max_bits,
-                radix,
-                window_bits,
+                window_bits, // clump_factor := window_bits
+                phase,
             )
         }
     }
 }
 
-impl<F: PrimeField, FC: PrimeFieldChip<F>> EccChip<F, FC>
+impl<'chip, F: PrimeField, FC: PrimeFieldChip<F>> EccChip<'chip, F, FC>
 where
     FC::FieldType: PrimeField,
 {
     // TODO: put a check in place that scalar is < modulus of C::Scalar
-    pub fn fixed_base_scalar_mult<'v, C>(
+    pub fn fixed_base_scalar_mult<C>(
         &self,
-        ctx: &mut Context<'v, F>,
+        ctx: &mut Context<F>,
         point: &C,
-        scalar: &[AssignedValue<'v, F>],
+        scalar: Vec<AssignedValue<F>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt,
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-            + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
     {
         fixed_base::scalar_multiply::<F, _, _>(
-            &self.field_chip,
+            self.field_chip,
             ctx,
             point,
             scalar,
@@ -843,30 +887,61 @@ where
         )
     }
 
-    /// `radix = 0` means auto-calculate
-    ///
+    // default for most purposes
+    pub fn fixed_base_msm<C>(
+        &self,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        points: &[C],
+        scalars: Vec<Vec<AssignedValue<F>>>,
+        max_scalar_bits_per_cell: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
+    where
+        C: CurveAffineExt,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
+    {
+        self.fixed_base_msm_in::<C>(thread_pool, points, scalars, max_scalar_bits_per_cell, 4, 0)
+    }
+
+    // `radix = 0` means auto-calculate
+    //
     /// `clump_factor = 0` means auto-calculate
     ///
     /// The user should filter out base points that are identity beforehand; we do not separately do this here
-    pub fn fixed_base_msm<'v, C>(
+    pub fn fixed_base_msm_in<C>(
         &self,
-        ctx: &mut Context<'v, F>,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
         points: &[C],
-        scalars: &[Vec<AssignedValue<'v, F>>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
         max_scalar_bits_per_cell: usize,
-        _radix: usize,
         clump_factor: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        phase: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt,
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-            + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
     {
-        assert_eq!(points.len(), scalars.len());
+        debug_assert_eq!(points.len(), scalars.len());
         #[cfg(feature = "display")]
         println!("computing length {} fixed base msm", points.len());
 
-        fixed_base::msm(self, ctx, points, scalars, max_scalar_bits_per_cell, clump_factor)
+        // heuristic to decide when to use parallelism
+        if points.len() < rayon::current_num_threads() {
+            let mut builder = thread_pool.lock().unwrap();
+            let ctx = builder.main(phase);
+            fixed_base::msm(self, ctx, points, scalars, max_scalar_bits_per_cell, clump_factor)
+        } else {
+            fixed_base::msm_par(
+                self,
+                thread_pool,
+                points,
+                scalars,
+                max_scalar_bits_per_cell,
+                clump_factor,
+                phase,
+            )
+        }
 
         // Empirically does not seem like pippenger is any better for fixed base msm right now, because of the cost of `select_by_indicator`
         // Cell usage becomes around comparable when `points.len() > 100`, and `clump_factor` should always be 4
diff --git a/halo2-ecc/src/ecc/pippenger.rs b/halo2-ecc/src/ecc/pippenger.rs
index bb60f9c2..11ada696 100644
--- a/halo2-ecc/src/ecc/pippenger.rs
+++ b/halo2-ecc/src/ecc/pippenger.rs
@@ -2,12 +2,14 @@ use super::{
     ec_add_unequal, ec_double, ec_select, ec_select_from_bits, ec_sub_unequal, load_random_point,
     EcPoint,
 };
-use crate::fields::{FieldChip, Selectable};
+use crate::fields::{FieldChip, PrimeField, Selectable};
 use halo2_base::{
-    gates::GateInstructions,
-    utils::{CurveAffineExt, PrimeField},
+    gates::{builder::GateThreadBuilder, GateInstructions},
+    utils::CurveAffineExt,
     AssignedValue, Context,
 };
+use rayon::prelude::*;
+use std::sync::Mutex;
 
 // Reference: https://jbootle.github.io/Misc/pippenger.pdf
 
@@ -15,14 +17,17 @@ use halo2_base::{
 // Output:
 // * new_points: length `points.len() * radix`
 // * new_bool_scalars: 2d array `ceil(scalar_bits / radix)` by `points.len() * radix`
-pub fn decompose<'v, F, FC>(
+//
+// Empirically `radix = 1` is best, so we don't use this function for now
+/*
+pub fn decompose<F, FC>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: &[Vec<AssignedValue<F>>],
     max_scalar_bits_per_cell: usize,
     radix: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, Vec<Vec<AssignedValue<'v, F>>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, Vec<Vec<AssignedValue<F>>>)
 where
     F: PrimeField,
     FC: FieldChip<F>,
@@ -34,7 +39,7 @@ where
     let mut new_points = Vec::with_capacity(radix * points.len());
     let mut new_bool_scalars = vec![Vec::with_capacity(radix * points.len()); t];
 
-    let zero_cell = chip.gate().load_zero(ctx);
+    let zero_cell = ctx.load_zero();
     for (point, scalar) in points.iter().zip(scalars.iter()) {
         assert_eq!(scalars[0].len(), scalar.len());
         let mut g = point.clone();
@@ -46,7 +51,7 @@ where
         }
         let mut bits = Vec::with_capacity(scalar_bits);
         for x in scalar {
-            let mut new_bits = chip.gate().num_to_bits(ctx, x, max_scalar_bits_per_cell);
+            let mut new_bits = chip.gate().num_to_bits(ctx, *x, max_scalar_bits_per_cell);
             bits.append(&mut new_bits);
         }
         for k in 0..t {
@@ -58,19 +63,20 @@ where
 
     (new_points, new_bool_scalars)
 }
+*/
 
 // Given points[i] and bool_scalars[j][i],
 // compute G'[j] = sum_{i=0..points.len()} points[i] * bool_scalars[j][i]
 // output is [ G'[j] + rand_point ]_{j=0..bool_scalars.len()}, rand_point
-pub fn multi_product<'v, F: PrimeField, FC, C>(
+pub fn multi_product<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    bool_scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    bool_scalars: &[Vec<AssignedValue<F>>],
     clumping_factor: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, EcPoint<F, FC::FieldPoint<'v>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, EcPoint<F, FC::FieldPoint>)
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
     let c = clumping_factor; // this is `b` in Section 3 of Bootle
@@ -107,7 +113,7 @@ where
             for j in 0..(1 << i) {
                 let mut new_point = ec_add_unequal(chip, ctx, &bucket[j], point, true);
                 // if points[i] is point at infinity, do nothing
-                new_point = ec_select(chip, ctx, &bucket[j], &new_point, &is_infinity);
+                new_point = ec_select(chip, ctx, &bucket[j], &new_point, is_infinity);
                 chip.enforce_less_than(ctx, new_point.x());
                 bucket.push(new_point);
             }
@@ -138,68 +144,220 @@ where
     (acc, rand_point)
 }
 
-pub fn multi_exp<'v, F: PrimeField, FC, C>(
+/// Currently does not support if the final answer is actually the point at infinity
+pub fn multi_exp<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
-    radix: usize,
+    // radix: usize, // specialize to radix = 1
     clump_factor: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
-    let (points, bool_scalars) =
-        decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
-
-    /*
-    let t = bool_scalars.len();
-    let c = {
-        let m = points.len();
-        let cost = |b: usize| -> usize { (m + b - 1) / b * ((1 << b) + t) };
-        let c_max: usize = f64::from(points.len() as u32).log2().ceil() as usize;
-        let mut c_best = c_max;
-        for b in 1..c_max {
-            if cost(b) <= cost(c_best) {
-                c_best = b;
+    // let (points, bool_scalars) = decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
+
+    debug_assert_eq!(points.len(), scalars.len());
+    let scalar_bits = max_scalar_bits_per_cell * scalars[0].len();
+    // bool_scalars: 2d array `scalar_bits` by `points.len()`
+    let mut bool_scalars = vec![Vec::with_capacity(points.len()); scalar_bits];
+    for scalar in scalars {
+        for (scalar_chunk, bool_chunk) in
+            scalar.into_iter().zip(bool_scalars.chunks_mut(max_scalar_bits_per_cell))
+        {
+            let bits = chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell);
+            for (bit, bool_bit) in bits.into_iter().zip(bool_chunk.iter_mut()) {
+                bool_bit.push(bit);
             }
         }
-        c_best
-    };
-    #[cfg(feature = "display")]
-    dbg!(clump_factor);
-    */
+    }
 
     let (mut agg, rand_point) =
-        multi_product::<F, FC, C>(chip, ctx, &points, &bool_scalars, clump_factor);
+        multi_product::<F, FC, C>(chip, ctx, points, &bool_scalars, clump_factor);
     // everything in agg has been enforced
 
     // compute sum_{k=0..t} agg[k] * 2^{radix * k} - (sum_k 2^{radix * k}) * rand_point
-    // (sum_{k=0..t} 2^{radix * k}) * rand_point = (2^{radix * t} - 1)/(2^radix - 1)
+    // (sum_{k=0..t} 2^{radix * k}) = (2^{radix * t} - 1)/(2^radix - 1)
     let mut sum = agg.pop().unwrap();
     let mut rand_sum = rand_point.clone();
     for g in agg.iter().rev() {
-        for _ in 0..radix {
-            sum = ec_double(chip, ctx, &sum);
-            rand_sum = ec_double(chip, ctx, &rand_sum);
-        }
-        sum = ec_add_unequal(chip, ctx, &sum, g, true);
+        rand_sum = ec_double(chip, ctx, &rand_sum);
+        // cannot use ec_double_and_add_unequal because you cannot guarantee that `sum != g`
+        sum = ec_double(chip, ctx, &sum);
         chip.enforce_less_than(ctx, sum.x());
+        sum = ec_add_unequal(chip, ctx, &sum, g, true);
+    }
+
+    rand_sum = ec_double(chip, ctx, &rand_sum);
+    // assume 2^scalar_bits != +-1 mod modulus::<F>()
+    rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+
+    chip.enforce_less_than(ctx, sum.x());
+    chip.enforce_less_than(ctx, rand_sum.x());
+    ec_sub_unequal(chip, ctx, &sum, &rand_sum, true)
+}
+
+/// Multi-thread witness generation for multi-scalar multiplication.
+/// Should give exact same circuit as `multi_exp`.
+///
+/// Currently does not support if the final answer is actually the point at infinity
+pub fn multi_exp_par<F: PrimeField, FC, C>(
+    chip: &FC,
+    // we use a Mutex guard for synchronous adding threads to the thread pool
+    // these are the threads within a single Phase
+    thread_pool: &Mutex<GateThreadBuilder<F>>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
+    max_scalar_bits_per_cell: usize,
+    // radix: usize, // specialize to radix = 1
+    clump_factor: usize,
+    phase: usize,
+) -> EcPoint<F, FC::FieldPoint>
+where
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
+    C: CurveAffineExt<Base = FC::FieldType>,
+{
+    // let (points, bool_scalars) = decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
 
-        if radix != 1 {
-            // Can use non-strict as long as some property of the prime is true?
-            rand_sum = ec_add_unequal(chip, ctx, &rand_sum, &rand_point, false);
+    debug_assert_eq!(points.len(), scalars.len());
+    let scalar_bits = max_scalar_bits_per_cell * scalars[0].len();
+    // bool_scalars: 2d array `scalar_bits` by `points.len()`
+    let mut bool_scalars = vec![Vec::with_capacity(points.len()); scalar_bits];
+
+    // get a main thread
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(phase);
+    let witness_gen_only = ctx.witness_gen_only();
+    // single-threaded computation:
+    for scalar in scalars {
+        for (scalar_chunk, bool_chunk) in
+            scalar.into_iter().zip(bool_scalars.chunks_mut(max_scalar_bits_per_cell))
+        {
+            let bits = chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell);
+            for (bit, bool_bit) in bits.into_iter().zip(bool_chunk.iter_mut()) {
+                bool_bit.push(bit);
+            }
         }
     }
+    // see multi-product comments for explanation of below
+
+    let c = clump_factor;
+    let num_rounds = (points.len() + c - 1) / c;
+    let rand_base = load_random_point::<F, FC, C>(chip, ctx);
+    let mut rand_points = Vec::with_capacity(num_rounds);
+    rand_points.push(rand_base);
+    for _ in 1..num_rounds {
+        rand_points.push(ec_double(chip, ctx, rand_points.last().unwrap()));
+    }
+    // we will use a different thread per round
+    // to prevent concurrency issues with context id, we generate all the ids first
+    let thread_ids = (0..num_rounds).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    drop(builder);
+    // now begins multi-threading
+
+    // multi_prods is 2d vector of size `num_rounds` by `scalar_bits`
+    let (new_threads, multi_prods): (Vec<_>, Vec<_>) = points
+        .par_chunks(c)
+        .zip(rand_points.par_iter())
+        .zip(thread_ids.into_par_iter())
+        .enumerate()
+        .map(|(round, ((points_clump, rand_point), thread_id))| {
+            // compute all possible multi-products of elements in points[round * c .. round * (c+1)]
+            // create new thread
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+            // stores { rand_point, rand_point + points[0], rand_point + points[1], rand_point + points[0] + points[1] , ... }
+            let mut bucket = Vec::with_capacity(1 << c);
+            chip.enforce_less_than(ctx, rand_point.x());
+            bucket.push(rand_point.clone());
+            for (i, point) in points_clump.iter().enumerate() {
+                // we allow for points[i] to be the point at infinity, represented by (0, 0) in affine coordinates
+                // this can be checked by points[i].y == 0 iff points[i] == O
+                let is_infinity = chip.is_zero(ctx, &point.y);
+                chip.enforce_less_than(ctx, point.x());
+
+                for j in 0..(1 << i) {
+                    let mut new_point = ec_add_unequal(chip, ctx, &bucket[j], point, true);
+                    // if points[i] is point at infinity, do nothing
+                    new_point = ec_select(chip, ctx, &bucket[j], &new_point, is_infinity);
+                    chip.enforce_less_than(ctx, new_point.x());
+                    bucket.push(new_point);
+                }
+            }
+            let multi_prods = bool_scalars
+                .iter()
+                .map(|bits| {
+                    ec_select_from_bits::<F, _>(
+                        chip,
+                        ctx,
+                        &bucket,
+                        &bits[round * c..round * c + points_clump.len()],
+                    )
+                })
+                .collect::<Vec<_>>();
+
+            (thread, multi_prods)
+        })
+        .unzip();
+    // we collect the new threads to ensure they are a FIXED order, otherwise later `assign_threads_in` will get confused
+    thread_pool.lock().unwrap().threads[phase].extend(new_threads);
 
-    if radix == 1 {
+    // agg[j] = sum_{i=0..num_rounds} multi_prods[i][j] for j = 0..scalar_bits
+    // get a main thread
+    let mut builder = thread_pool.lock().unwrap();
+    let thread_ids = (0..scalar_bits).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    drop(builder);
+    let (new_threads, mut agg): (Vec<_>, Vec<_>) = thread_ids
+        .into_par_iter()
+        .enumerate()
+        .map(|(i, thread_id)| {
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+            let mut acc = if multi_prods.len() == 1 {
+                multi_prods[0][i].clone()
+            } else {
+                ec_add_unequal(chip, ctx, &multi_prods[0][i], &multi_prods[1][i], true)
+            };
+            chip.enforce_less_than(ctx, acc.x());
+            for multi_prod in multi_prods.iter().skip(2) {
+                acc = ec_add_unequal(chip, ctx, &acc, &multi_prod[i], true);
+                chip.enforce_less_than(ctx, acc.x());
+            }
+            (thread, acc)
+        })
+        .unzip();
+    thread_pool.lock().unwrap().threads[phase].extend(new_threads);
+
+    // gets the LAST thread for single threaded work
+    // warning: don't get any earlier threads, because currently we assume equality constraints in thread i only involves threads <= i
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(phase);
+    // we have agg[j] = G'[j] + (2^num_rounds - 1) * rand_base
+    // let rand_point = (2^num_rounds - 1) * rand_base
+    // TODO: can we remove all these random point operations somehow?
+    let mut rand_point = ec_double(chip, ctx, rand_points.last().unwrap());
+    rand_point = ec_sub_unequal(chip, ctx, &rand_point, &rand_points[0], false);
+
+    // compute sum_{k=0..scalar_bits} agg[k] * 2^k - (sum_{k=0..scalar_bits} 2^k) * rand_point
+    // (sum_{k=0..scalar_bits} 2^k) = (2^scalar_bits - 1)
+    let mut sum = agg.pop().unwrap();
+    let mut rand_sum = rand_point.clone();
+    for g in agg.iter().rev() {
         rand_sum = ec_double(chip, ctx, &rand_sum);
-        // assume 2^t != +-1 mod modulus::<F>()
-        rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+        // cannot use ec_double_and_add_unequal because you cannot guarantee that `sum != g`
+        sum = ec_double(chip, ctx, &sum);
+        chip.enforce_less_than(ctx, sum.x());
+        sum = ec_add_unequal(chip, ctx, &sum, g, true);
     }
 
+    rand_sum = ec_double(chip, ctx, &rand_sum);
+    // assume 2^scalar_bits != +-1 mod modulus::<F>()
+    rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+
+    chip.enforce_less_than(ctx, sum.x());
     chip.enforce_less_than(ctx, rand_sum.x());
     ec_sub_unequal(chip, ctx, &sum, &rand_sum, true)
 }
diff --git a/halo2-ecc/src/ecc/tests.rs b/halo2-ecc/src/ecc/tests.rs
index fa9d6ed5..fb9d7abf 100644
--- a/halo2-ecc/src/ecc/tests.rs
+++ b/halo2-ecc/src/ecc/tests.rs
@@ -1,6 +1,5 @@
 #![allow(unused_assignments, unused_imports, unused_variables)]
 use super::*;
-use crate::fields::fp::{FpConfig, FpStrategy};
 use crate::fields::fp2::Fp2Chip;
 use crate::halo2_proofs::{
     circuit::*,
@@ -9,158 +8,73 @@ use crate::halo2_proofs::{
     plonk::*,
 };
 use group::Group;
+use halo2_base::gates::builder::RangeCircuitBuilder;
+use halo2_base::gates::RangeChip;
 use halo2_base::utils::bigint_to_fe;
 use halo2_base::SKIP_FIRST_PASS;
-use halo2_base::{
-    gates::range::RangeStrategy, utils::value_to_option, utils::PrimeField, ContextParams,
-};
+use halo2_base::{gates::range::RangeStrategy, utils::value_to_option};
 use num_bigint::{BigInt, RandBigInt};
+use rand_core::OsRng;
 use std::marker::PhantomData;
 use std::ops::Neg;
 
-#[derive(Default)]
-pub struct MyCircuit<F> {
-    pub P: Option<G1Affine>,
-    pub Q: Option<G1Affine>,
-    pub _marker: PhantomData<F>,
-}
-
-const NUM_ADVICE: usize = 2;
-const NUM_FIXED: usize = 2;
-
-impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-    type Config = FpConfig<F, Fq>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self { P: None, Q: None, _marker: PhantomData }
+fn basic_g1_tests<F: PrimeField>(
+    ctx: &mut Context<F>,
+    lookup_bits: usize,
+    limb_bits: usize,
+    num_limbs: usize,
+    P: G1Affine,
+    Q: G1Affine,
+) {
+    std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+    let range = RangeChip::<F>::default(lookup_bits);
+    let fp_chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+    let chip = EccChip::new(&fp_chip);
+
+    let P_assigned = chip.load_private(ctx, (P.x, P.y));
+    let Q_assigned = chip.load_private(ctx, (Q.x, Q.y));
+
+    // test add_unequal
+    chip.field_chip.enforce_less_than(ctx, P_assigned.x());
+    chip.field_chip.enforce_less_than(ctx, Q_assigned.x());
+    let sum = chip.add_unequal(ctx, &P_assigned, &Q_assigned, false);
+    assert_eq!(sum.x.truncation.to_bigint(limb_bits), sum.x.value);
+    assert_eq!(sum.y.truncation.to_bigint(limb_bits), sum.y.value);
+    {
+        let actual_sum = G1Affine::from(P + Q);
+        assert_eq!(bigint_to_fe::<Fq>(&sum.x.value), actual_sum.x);
+        assert_eq!(bigint_to_fe::<Fq>(&sum.y.value), actual_sum.y);
     }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        FpConfig::<F, _>::configure(
-            meta,
-            FpStrategy::Simple,
-            &[NUM_ADVICE],
-            &[1],
-            NUM_FIXED,
-            22,
-            88,
-            3,
-            modulus::<Fq>(),
-            0,
-            23,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        config.load_lookup_table(&mut layouter)?;
-        let chip = EccChip::construct(config.clone());
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "ecc",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = chip.field_chip().new_context(region);
-                let ctx = &mut aux;
-
-                let P_assigned = chip.load_private(
-                    ctx,
-                    match self.P {
-                        Some(P) => (Value::known(P.x), Value::known(P.y)),
-                        None => (Value::unknown(), Value::unknown()),
-                    },
-                );
-                let Q_assigned = chip.load_private(
-                    ctx,
-                    match self.Q {
-                        Some(Q) => (Value::known(Q.x), Value::known(Q.y)),
-                        None => (Value::unknown(), Value::unknown()),
-                    },
-                );
-
-                // test add_unequal
-                {
-                    chip.field_chip.enforce_less_than(ctx, P_assigned.x());
-                    chip.field_chip.enforce_less_than(ctx, Q_assigned.x());
-                    let sum = chip.add_unequal(ctx, &P_assigned, &Q_assigned, false);
-                    assert_eq!(
-                        value_to_option(sum.x.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(sum.x.value.clone())
-                    );
-                    assert_eq!(
-                        value_to_option(sum.y.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(sum.y.value.clone())
-                    );
-                    if self.P.is_some() {
-                        let actual_sum = G1Affine::from(self.P.unwrap() + self.Q.unwrap());
-                        sum.x.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_sum.x));
-                        sum.y.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_sum.y));
-                    }
-                    println!("add unequal witness OK");
-                }
-
-                // test double
-                {
-                    let doub = chip.double(ctx, &P_assigned);
-                    assert_eq!(
-                        value_to_option(doub.x.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(doub.x.value.clone())
-                    );
-                    assert_eq!(
-                        value_to_option(doub.y.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(doub.y.value.clone())
-                    );
-                    if self.P.is_some() {
-                        let actual_doub = G1Affine::from(self.P.unwrap() * Fr::from(2u64));
-                        doub.x.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_doub.x));
-                        doub.y.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_doub.y));
-                    }
-                    println!("double witness OK");
-                }
-
-                chip.field_chip.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                {
-                    println!("Using {NUM_ADVICE} advice columns and {NUM_FIXED} fixed columns");
-                    println!("total advice cells: {}", ctx.total_advice);
-                    let (const_rows, _) = ctx.fixed_stats();
-                    println!("maximum rows used by a fixed column: {const_rows}");
-                }
-
-                Ok(())
-            },
-        )
+    println!("add unequal witness OK");
+
+    // test double
+    let doub = chip.double(ctx, &P_assigned);
+    assert_eq!(doub.x.truncation.to_bigint(limb_bits), doub.x.value);
+    assert_eq!(doub.y.truncation.to_bigint(limb_bits), doub.y.value);
+    {
+        let actual_doub = G1Affine::from(P * Fr::from(2u64));
+        assert_eq!(bigint_to_fe::<Fq>(&doub.x.value), actual_doub.x);
+        assert_eq!(bigint_to_fe::<Fq>(&doub.y.value), actual_doub.y);
     }
+    println!("double witness OK");
 }
 
-#[cfg(test)]
 #[test]
 fn test_ecc() {
     let k = 23;
-    let mut rng = rand::thread_rng();
+    let P = G1Affine::random(OsRng);
+    let Q = G1Affine::random(OsRng);
 
-    let P = Some(G1Affine::random(&mut rng));
-    let Q = Some(G1Affine::random(&mut rng));
+    let mut builder = GateThreadBuilder::<Fr>::mock();
+    basic_g1_tests(builder.main(0), k - 1, 88, 3, P, Q);
 
-    let circuit = MyCircuit::<Fr> { P, Q, _marker: PhantomData };
+    builder.config(k, Some(20));
+    let circuit = RangeCircuitBuilder::mock(builder);
 
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[cfg(feature = "dev-graph")]
-#[cfg(test)]
 #[test]
 fn plot_ecc() {
     let k = 10;
@@ -170,7 +84,14 @@ fn plot_ecc() {
     root.fill(&WHITE).unwrap();
     let root = root.titled("Ecc Layout", ("sans-serif", 60)).unwrap();
 
-    let circuit = MyCircuit::<Fr>::default();
+    let P = G1Affine::random(OsRng);
+    let Q = G1Affine::random(OsRng);
+
+    let mut builder = GateThreadBuilder::<Fr>::keygen();
+    basic_g1_tests(builder.main(0), 22, 88, 3, P, Q);
+
+    builder.config(k, Some(10));
+    let circuit = RangeCircuitBuilder::mock(builder);
 
     halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
 }
diff --git a/halo2-ecc/src/fields/fp.rs b/halo2-ecc/src/fields/fp.rs
index 1329726a..a97f1d11 100644
--- a/halo2-ecc/src/fields/fp.rs
+++ b/halo2-ecc/src/fields/fp.rs
@@ -1,43 +1,33 @@
-use super::{FieldChip, PrimeFieldChip, Selectable};
+use super::{FieldChip, PrimeField, PrimeFieldChip, Selectable};
 use crate::bigint::{
     add_no_carry, big_is_equal, big_is_zero, carry_mod, check_carry_mod_to_zero, mul_no_carry,
     scalar_mul_and_add_no_carry, scalar_mul_no_carry, select, select_by_indicator, sub,
     sub_no_carry, CRTInteger, FixedCRTInteger, OverflowInteger,
 };
-use crate::halo2_proofs::{
-    circuit::{Layouter, Region, Value},
-    halo2curves::CurveAffine,
-    plonk::{ConstraintSystem, Error},
-};
+use crate::halo2_proofs::halo2curves::CurveAffine;
+use halo2_base::gates::RangeChip;
+use halo2_base::utils::decompose_bigint;
 use halo2_base::{
-    gates::{
-        range::{RangeConfig, RangeStrategy},
-        GateInstructions, RangeInstructions,
-    },
-    utils::{
-        bigint_to_fe, biguint_to_fe, bit_length, decompose_bigint_option, decompose_biguint,
-        fe_to_biguint, modulus, PrimeField,
-    },
-    AssignedValue, Context, ContextParams,
+    gates::{range::RangeConfig, GateInstructions, RangeInstructions},
+    utils::{bigint_to_fe, biguint_to_fe, bit_length, decompose_biguint, fe_to_biguint, modulus},
+    AssignedValue, Context,
     QuantumCell::{Constant, Existing},
 };
 use num_bigint::{BigInt, BigUint};
 use num_traits::One;
-use serde::{Deserialize, Serialize};
 use std::{cmp::max, marker::PhantomData};
 
-#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
-pub enum FpStrategy {
-    Simple,
-    SimplePlus,
-}
+pub type BaseFieldChip<'range, C> =
+    FpChip<'range, <C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>;
 
-pub type BaseFieldChip<C> = FpConfig<<C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>;
+pub type FpConfig<F> = RangeConfig<F>;
+
+// `Fp` always needs to be `BigPrimeField`, we may later want support for `F` being just `ScalarField` but for optimization reasons we'll assume it's also `BigPrimeField` for now
 
 #[derive(Clone, Debug)]
-pub struct FpConfig<F: PrimeField, Fp: PrimeField> {
-    pub range: RangeConfig<F>,
-    // pub bigint_chip: BigIntConfig<F>,
+pub struct FpChip<'range, F: PrimeField, Fp: PrimeField> {
+    pub range: &'range RangeChip<F>,
+
     pub limb_bits: usize,
     pub num_limbs: usize,
 
@@ -55,45 +45,10 @@ pub struct FpConfig<F: PrimeField, Fp: PrimeField> {
     _marker: PhantomData<Fp>,
 }
 
-impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        gate_context_id: usize,
-        k: usize,
-    ) -> Self {
-        let range = RangeConfig::<F>::configure(
-            meta,
-            match strategy {
-                FpStrategy::Simple => RangeStrategy::Vertical,
-                FpStrategy::SimplePlus => RangeStrategy::PlonkPlus,
-            },
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            gate_context_id,
-            k,
-        );
-
-        Self::construct(range, limb_bits, num_limbs, p)
-    }
-
-    pub fn construct(
-        range: RangeConfig<F>,
-        // bigint_chip: BigIntConfig<F>,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-    ) -> Self {
+impl<'range, F: PrimeField, Fp: PrimeField> FpChip<'range, F, Fp> {
+    pub fn new(range: &'range RangeChip<F>, limb_bits: usize, num_limbs: usize) -> Self {
         let limb_mask = (BigUint::from(1u64) << limb_bits) - 1usize;
+        let p = modulus::<Fp>();
         let p_limbs = decompose_biguint(&p, num_limbs, limb_bits);
         let native_modulus = modulus::<F>();
         let p_native = biguint_to_fe(&(&p % &native_modulus));
@@ -105,9 +60,8 @@ impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
             limb_bases.push(limb_base * limb_bases.last().unwrap());
         }
 
-        FpConfig {
+        Self {
             range,
-            // bigint_chip,
             limb_bits,
             num_limbs,
             num_limbs_bits: bit_length(num_limbs as u64),
@@ -123,54 +77,29 @@ impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
         }
     }
 
-    pub fn new_context<'a, 'b>(&'b self, region: Region<'a, F>) -> Context<'a, F> {
-        Context::new(
-            region,
-            ContextParams {
-                max_rows: self.range.gate.max_rows,
-                num_context_ids: 1,
-                fixed_columns: self.range.gate.constants.clone(),
-            },
-        )
-    }
-
-    pub fn load_lookup_table(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
-        self.range.load_lookup_table(layouter)
-    }
-
-    pub fn enforce_less_than_p<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) {
+    pub fn enforce_less_than_p(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) {
         // a < p iff a - p has underflow
         let mut borrow: Option<AssignedValue<F>> = None;
-        for (p_limb, a_limb) in self.p_limbs.iter().zip(a.truncation.limbs.iter()) {
+        for (&p_limb, &a_limb) in self.p_limbs.iter().zip(a.truncation.limbs.iter()) {
             let lt = match borrow {
-                None => self.range.is_less_than(
-                    ctx,
-                    Existing(a_limb),
-                    Constant(*p_limb),
-                    self.limb_bits,
-                ),
+                None => self.range.is_less_than(ctx, a_limb, Constant(p_limb), self.limb_bits),
                 Some(borrow) => {
-                    let plus_borrow =
-                        self.range.gate.add(ctx, Constant(*p_limb), Existing(&borrow));
+                    let plus_borrow = self.range.gate.add(ctx, Constant(p_limb), borrow);
                     self.range.is_less_than(
                         ctx,
                         Existing(a_limb),
-                        Existing(&plus_borrow),
+                        Existing(plus_borrow),
                         self.limb_bits,
                     )
                 }
             };
             borrow = Some(lt);
         }
-        self.range.gate.assert_is_const(ctx, &borrow.unwrap(), F::one())
-    }
-
-    pub fn finalize(&self, ctx: &mut Context<'_, F>) -> usize {
-        self.range.finalize(ctx)
+        self.range.gate.assert_is_const(ctx, &borrow.unwrap(), &F::one());
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpConfig<F, Fp> {
+impl<'range, F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpChip<'range, F, Fp> {
     fn num_limbs(&self) -> usize {
         self.num_limbs
     }
@@ -182,46 +111,45 @@ impl<F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpConfig<F, Fp> {
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
+impl<'range, F: PrimeField, Fp: PrimeField> FieldChip<F> for FpChip<'range, F, Fp> {
     const PRIME_FIELD_NUM_BITS: u32 = Fp::NUM_BITS;
     type ConstantType = BigUint;
-    type WitnessType = Value<BigInt>;
-    type FieldPoint<'v> = CRTInteger<'v, F>;
+    type WitnessType = BigInt;
+    type FieldPoint = CRTInteger<F>;
     type FieldType = Fp;
-    type RangeChip = RangeConfig<F>;
+    type RangeChip = RangeChip<F>;
 
     fn native_modulus(&self) -> &BigUint {
         &self.native_modulus
     }
-    fn range(&self) -> &Self::RangeChip {
-        &self.range
+    fn range(&self) -> &'range Self::RangeChip {
+        self.range
     }
     fn limb_bits(&self) -> usize {
         self.limb_bits
     }
 
-    fn get_assigned_value(&self, x: &CRTInteger<F>) -> Value<Fp> {
-        x.value.as_ref().map(|x| bigint_to_fe::<Fp>(&(x % &self.p)))
+    fn get_assigned_value(&self, x: &CRTInteger<F>) -> Fp {
+        bigint_to_fe(&(&x.value % &self.p))
     }
 
     fn fe_to_constant(x: Fp) -> BigUint {
         fe_to_biguint(&x)
     }
 
-    fn fe_to_witness(x: &Value<Fp>) -> Value<BigInt> {
-        x.map(|x| BigInt::from(fe_to_biguint(&x)))
+    fn fe_to_witness(x: &Fp) -> BigInt {
+        BigInt::from(fe_to_biguint(x))
     }
 
-    fn load_private<'v>(&self, ctx: &mut Context<'_, F>, a: Value<BigInt>) -> CRTInteger<'v, F> {
-        let a_vec = decompose_bigint_option::<F>(a.as_ref(), self.num_limbs, self.limb_bits);
-        let limbs = self.range.gate().assign_witnesses(ctx, a_vec);
+    fn load_private(&self, ctx: &mut Context<F>, a: BigInt) -> CRTInteger<F> {
+        let a_vec = decompose_bigint::<F>(&a, self.num_limbs, self.limb_bits);
+        let limbs = ctx.assign_witnesses(a_vec);
 
         let a_native = OverflowInteger::<F>::evaluate(
             self.range.gate(),
-            //&self.bigint_chip,
             ctx,
-            &limbs,
-            self.limb_bases.iter().cloned(),
+            limbs.iter().copied(),
+            self.limb_bases.iter().copied(),
         );
 
         let a_loaded =
@@ -232,62 +160,57 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         a_loaded
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, a: BigUint) -> CRTInteger<'v, F> {
-        let a_native = self.range.gate.assign_region_last(
-            ctx,
-            vec![Constant(biguint_to_fe(&(&a % modulus::<F>())))],
-            vec![],
-        );
-        let a_limbs = self.range.gate().assign_region(
-            ctx,
-            decompose_biguint::<F>(&a, self.num_limbs, self.limb_bits).into_iter().map(Constant),
-            vec![],
-        );
+    fn load_constant(&self, ctx: &mut Context<F>, a: BigUint) -> CRTInteger<F> {
+        let a_native = ctx.load_constant(biguint_to_fe(&(&a % self.native_modulus())));
+        let a_limbs = decompose_biguint::<F>(&a, self.num_limbs, self.limb_bits)
+            .into_iter()
+            .map(|c| ctx.load_constant(c))
+            .collect();
 
         CRTInteger::construct(
             OverflowInteger::construct(a_limbs, self.limb_bits),
             a_native,
-            Value::known(BigInt::from(a)),
+            BigInt::from(a),
         )
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         add_no_carry::crt::<F>(self.range.gate(), ctx, a, b)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         c: BigUint,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         let c = FixedCRTInteger::from_native(c, self.num_limbs, self.limb_bits);
         let c_native = biguint_to_fe::<F>(&(&c.value % modulus::<F>()));
         let mut limbs = Vec::with_capacity(a.truncation.limbs.len());
         for (a_limb, c_limb) in a.truncation.limbs.iter().zip(c.truncation.limbs.into_iter()) {
-            let limb = self.range.gate.add(ctx, Existing(a_limb), Constant(c_limb));
+            let limb = self.range.gate.add(ctx, *a_limb, Constant(c_limb));
             limbs.push(limb);
         }
-        let native = self.range.gate.add(ctx, Existing(&a.native), Constant(c_native));
+        let native = self.range.gate.add(ctx, a.native, Constant(c_native));
         let trunc =
             OverflowInteger::construct(limbs, max(a.truncation.max_limb_bits, self.limb_bits) + 1);
-        let value = a.value.as_ref().map(|a| a + BigInt::from(c.value));
+        let value = &a.value + BigInt::from(c.value);
 
         CRTInteger::construct(trunc, native, value)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         sub_no_carry::crt::<F>(self.range.gate(), ctx, a, b)
     }
 
@@ -295,47 +218,47 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
     // Output: p - a if a != 0, else a
     // Assume the actual value of `a` equals `a.truncation`
     // Constrains a.truncation <= p using subtraction with carries
-    fn negate<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> CRTInteger<'v, F> {
+    fn negate(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> CRTInteger<F> {
         // Compute p - a.truncation using carries
         let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
         let (out_or_p, underflow) =
             sub::crt::<F>(self.range(), ctx, &p, a, self.limb_bits, self.limb_bases[1]);
         // constrain underflow to equal 0
-        self.range.gate.assert_is_const(ctx, &underflow, F::zero());
+        self.range.gate.assert_is_const(ctx, &underflow, &F::zero());
 
         let a_is_zero = big_is_zero::assign::<F>(self.gate(), ctx, &a.truncation);
-        select::crt::<F>(self.range.gate(), ctx, a, &out_or_p, &a_is_zero)
+        select::crt::<F>(self.range.gate(), ctx, a, &out_or_p, a_is_zero)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         c: i64,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         scalar_mul_no_carry::crt::<F>(self.range.gate(), ctx, a, c)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
         c: i64,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         scalar_mul_and_add_no_carry::crt::<F>(self.range.gate(), ctx, a, b, c)
     }
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         mul_no_carry::crt::<F>(self.range.gate(), ctx, a, b, self.num_limbs_log2_ceil)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) {
         check_carry_mod_to_zero::crt::<F>(
             self.range(),
             // &self.bigint_chip,
@@ -351,7 +274,7 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         )
     }
 
-    fn carry_mod<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> CRTInteger<'v, F> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> CRTInteger<F> {
         carry_mod::crt::<F>(
             self.range(),
             // &self.bigint_chip,
@@ -367,10 +290,10 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         )
     }
 
-    fn range_check<'v>(
+    fn range_check(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         max_bits: usize, // the maximum bits that a.value could take
     ) {
         let n = self.limb_bits;
@@ -379,111 +302,97 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         let last_limb_bits = max_bits - n * (k - 1);
 
         #[cfg(debug_assertions)]
-        a.value.as_ref().map(|v| {
-            debug_assert!(v.bits() as usize <= max_bits);
-        });
+        debug_assert!(a.value.bits() as usize <= max_bits);
 
         // range check limbs of `a` are in [0, 2^n) except last limb should be in [0, 2^last_limb_bits)
         for (i, cell) in a.truncation.limbs.iter().enumerate() {
             let limb_bits = if i == k - 1 { last_limb_bits } else { n };
-            self.range.range_check(ctx, cell, limb_bits);
+            self.range.range_check(ctx, *cell, limb_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         self.enforce_less_than_p(ctx, a)
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let is_zero = big_is_zero::crt::<F>(self.gate(), ctx, a);
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
+        big_is_zero::crt::<F>(self.gate(), ctx, a)
 
+        // CHECK: I don't think this is necessary:
         // underflow != 0 iff carry < p
-        let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
-        let (_, underflow) =
-            sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
-        let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
-        let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
+        // let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
+        // let (_, underflow) =
+        //     sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
+        // let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
+        // let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
 
-        self.gate().and(ctx, Existing(&is_zero), Existing(&range_check))
+        // self.gate().and(ctx, is_zero, range_check)
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
         let is_zero = big_is_zero::crt::<F>(self.gate(), ctx, a);
-        let is_nonzero = self.gate().not(ctx, Existing(&is_zero));
+        let is_nonzero = self.gate().not(ctx, is_zero);
 
         // underflow != 0 iff carry < p
         let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
         let (_, underflow) =
             sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
-        let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
-        let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
+        let is_underflow_zero = self.gate().is_zero(ctx, underflow);
+        let range_check = self.gate().not(ctx, is_underflow_zero);
 
-        self.gate().and(ctx, Existing(&is_nonzero), Existing(&range_check))
+        self.gate().and(ctx, is_nonzero, range_check)
     }
 
     // assuming `a` has been range checked to be a proper BigInt
     // constrain the witness `a` to be `< p`
     // then check if `a` is 0
-    fn is_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
         self.enforce_less_than_p(ctx, a);
         // just check truncated limbs are all 0 since they determine the native value
         big_is_zero::positive::<F>(self.gate(), ctx, &a.truncation)
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         big_is_equal::assign::<F>(self.gate(), ctx, &a.truncation, &b.truncation)
     }
 
     // assuming `a, b` have been range checked to be a proper BigInt
     // constrain the witnesses `a, b` to be `< p`
     // then assert `a == b` as BigInts
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         self.enforce_less_than_p(ctx, a);
         self.enforce_less_than_p(ctx, b);
         // a.native and b.native are derived from `a.truncation, b.truncation`, so no need to check if they're equal
         for (limb_a, limb_b) in a.truncation.limbs.iter().zip(a.truncation.limbs.iter()) {
-            self.range.gate.assert_equal(ctx, Existing(limb_a), Existing(limb_b));
+            ctx.constrain_equal(limb_a, limb_b);
         }
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> Selectable<F> for FpConfig<F, Fp> {
-    type Point<'v> = CRTInteger<'v, F>;
+impl<'range, F: PrimeField, Fp: PrimeField> Selectable<F> for FpChip<'range, F, Fp> {
+    type Point = CRTInteger<F>;
 
-    fn select<'v>(
+    fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-        sel: &AssignedValue<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+        sel: AssignedValue<F>,
+    ) -> CRTInteger<F> {
         select::crt::<F>(self.range.gate(), ctx, a, b, sel)
     }
 
-    fn select_by_indicator<'v>(
+    fn select_by_indicator(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &[CRTInteger<'v, F>],
-        coeffs: &[AssignedValue<'v, F>],
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &[CRTInteger<F>],
+        coeffs: &[AssignedValue<F>],
+    ) -> CRTInteger<F> {
         select_by_indicator::crt::<F>(self.range.gate(), ctx, a, coeffs, &self.limb_bases)
     }
 }
diff --git a/halo2-ecc/src/fields/fp12.rs b/halo2-ecc/src/fields/fp12.rs
index f130fd52..b82305ca 100644
--- a/halo2-ecc/src/fields/fp12.rs
+++ b/halo2-ecc/src/fields/fp12.rs
@@ -1,10 +1,9 @@
-use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeFieldChip};
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
+use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeField, PrimeFieldChip};
+use crate::halo2_proofs::arithmetic::Field;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, value_to_option, PrimeField},
+    utils::fe_to_biguint,
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use num_bigint::{BigInt, BigUint};
 use std::marker::PhantomData;
@@ -15,6 +14,7 @@ use std::marker::PhantomData;
 /// be irreducible over Fp; i.e., in order for -1 to not be a square (quadratic residue) in Fp
 /// This means we store an Fp12 point as `\sum_{i = 0}^6 (a_{i0} + a_{i1} * u) * w^i`
 /// This is encoded in an FqPoint of degree 12 as `(a_{00}, ..., a_{50}, a_{01}, ..., a_{51})`
+#[derive(Clone, Copy, Debug)]
 pub struct Fp12Chip<'a, F: PrimeField, FpChip: PrimeFieldChip<F>, Fp12: Field, const XI_0: i64>
 where
     FpChip::FieldType: PrimeField,
@@ -34,16 +34,16 @@ where
     Fp12: Field + FieldExtConstructor<FpChip::FieldType, 12>,
 {
     /// User must construct an `FpChip` first using a config. This is intended so everything shares a single `FlexGateChip`, which is needed for the column allocation to work.
-    pub fn construct(fp_chip: &'a FpChip) -> Self {
+    pub fn new(fp_chip: &'a FpChip) -> Self {
         Self { fp_chip, _f: PhantomData, _fp12: PhantomData }
     }
 
-    pub fn fp2_mul_no_carry<'v>(
+    pub fn fp2_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        fp2_pt: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        fp2_pt: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 12);
         assert_eq!(fp2_pt.coeffs.len(), 2);
 
@@ -64,11 +64,11 @@ where
     }
 
     // for \sum_i (a_i + b_i u) w^i, returns \sum_i (-1)^i (a_i + b_i u) w^i
-    pub fn conjugate<'v>(
+    pub fn conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 12);
 
         let coeffs = a
@@ -82,11 +82,11 @@ where
 }
 
 /// multiply (a0 + a1 * u) * (XI0 + u) without carry
-pub fn mul_no_carry_w6<'v, F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
+pub fn mul_no_carry_w6<F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
     fp_chip: &FC,
-    ctx: &mut Context<'v, F>,
-    a: &FieldExtPoint<FC::FieldPoint<'v>>,
-) -> FieldExtPoint<FC::FieldPoint<'v>> {
+    ctx: &mut Context<F>,
+    a: &FieldExtPoint<FC::FieldPoint>,
+) -> FieldExtPoint<FC::FieldPoint> {
     assert_eq!(a.coeffs.len(), 2);
     let (a0, a1) = (&a.coeffs[0], &a.coeffs[1]);
     // (a0 + a1 u) * (XI_0 + u) = (a0 * XI_0 - a1) + (a1 * XI_0 + a0) u     with u^2 = -1
@@ -97,17 +97,18 @@ pub fn mul_no_carry_w6<'v, F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
     FieldExtPoint::construct(vec![out0_0_nocarry, out0_1_nocarry])
 }
 
+// a lot of this is common to any field extension (lots of for loops), but due to the way rust traits work, it is hard to create a common generic trait that does this. The main problem is that if you had a `FieldExtCommon` trait and wanted to implement `FieldChip` for anything with `FieldExtCommon`, rust will stop you because someone could implement `FieldExtCommon` and `FieldChip` for the same type, causing a conflict.
 impl<'a, F, FpChip, Fp12, const XI_0: i64> FieldChip<F> for Fp12Chip<'a, F, FpChip, Fp12, XI_0>
 where
     F: PrimeField,
-    FpChip: PrimeFieldChip<F, WitnessType = Value<BigInt>, ConstantType = BigUint>,
+    FpChip: PrimeFieldChip<F, WitnessType = BigInt, ConstantType = BigUint>,
     FpChip::FieldType: PrimeField,
     Fp12: Field + FieldExtConstructor<FpChip::FieldType, 12>,
 {
     const PRIME_FIELD_NUM_BITS: u32 = FpChip::FieldType::NUM_BITS;
     type ConstantType = Fp12;
-    type WitnessType = Vec<Value<BigInt>>;
-    type FieldPoint<'v> = FieldExtPoint<FpChip::FieldPoint<'v>>;
+    type WitnessType = Vec<BigInt>;
+    type FieldPoint = FieldExtPoint<FpChip::FieldPoint>;
     type FieldType = Fp12;
     type RangeChip = FpChip::RangeChip;
 
@@ -122,30 +123,21 @@ where
         self.fp_chip.limb_bits()
     }
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Fp12> {
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Fp12 {
         assert_eq!(x.coeffs.len(), 12);
-        let values = x.coeffs.iter().map(|v| self.fp_chip.get_assigned_value(v));
-        let values_collected: Value<Vec<FpChip::FieldType>> = values.into_iter().collect();
-        values_collected.map(|c| Fp12::new(c.try_into().unwrap()))
+        let values =
+            x.coeffs.iter().map(|v| self.fp_chip.get_assigned_value(v)).collect::<Vec<_>>();
+        Fp12::new(values.try_into().unwrap())
     }
 
     fn fe_to_constant(x: Self::FieldType) -> Self::ConstantType {
         x
     }
-    fn fe_to_witness(x: &Value<Fp12>) -> Vec<Value<BigInt>> {
-        match value_to_option(*x) {
-            Some(x) => {
-                x.coeffs().iter().map(|c| Value::known(BigInt::from(fe_to_biguint(c)))).collect()
-            }
-            None => vec![Value::unknown(); 12],
-        }
+    fn fe_to_witness(x: &Fp12) -> Vec<BigInt> {
+        x.coeffs().iter().map(|c| BigInt::from(fe_to_biguint(c))).collect()
     }
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Vec<Value<BigInt>>,
-    ) -> Self::FieldPoint<'v> {
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Vec<BigInt>) -> Self::FieldPoint {
         assert_eq!(coeffs.len(), 12);
         let mut assigned_coeffs = Vec::with_capacity(12);
         for a in coeffs {
@@ -155,7 +147,7 @@ where
         Self::FieldPoint::construct(assigned_coeffs)
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, c: Fp12) -> Self::FieldPoint<'v> {
+    fn load_constant(&self, ctx: &mut Context<F>, c: Fp12) -> Self::FieldPoint {
         let mut assigned_coeffs = Vec::with_capacity(12);
         for a in &c.coeffs() {
             let assigned_coeff = self.fp_chip.load_constant(ctx, fe_to_biguint(a));
@@ -165,12 +157,12 @@ where
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -180,12 +172,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let c_coeffs = c.coeffs();
         assert_eq!(a.coeffs.len(), c_coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
@@ -196,12 +188,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -211,11 +203,7 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let out_coeff = self.fp_chip.negate(ctx, a_coeff);
@@ -224,12 +212,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff = self.fp_chip.scalar_mul_no_carry(ctx, &a.coeffs[i], c);
@@ -238,13 +226,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff =
@@ -255,12 +243,12 @@ where
     }
 
     // w^6 = u + xi for xi = 9
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), 12);
         assert_eq!(b.coeffs.len(), 12);
 
@@ -341,17 +329,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for coeff in &a.coeffs {
             self.fp_chip.check_carry_mod_to_zero(ctx, coeff);
         }
     }
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.carry_mod(ctx, a_coeff);
@@ -360,28 +344,24 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize) {
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize) {
         for a_coeff in &a.coeffs {
             self.fp_chip.range_check(ctx, a_coeff, max_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for a_coeff in &a.coeffs {
             self.fp_chip.enforce_less_than(ctx, a_coeff)
         }
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.fp_chip.range().gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -390,16 +370,12 @@ where
         prev.unwrap()
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_nonzero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().or(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().or(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -408,16 +384,12 @@ where
         prev.unwrap()
     }
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -426,17 +398,17 @@ where
         prev.unwrap()
     }
 
-    fn is_equal<'v>(
+    fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -444,17 +416,17 @@ where
         acc.unwrap()
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal_unenforced(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -462,12 +434,7 @@ where
         acc.unwrap()
     }
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             self.fp_chip.assert_equal(ctx, a_coeff, b_coeff);
         }
diff --git a/halo2-ecc/src/fields/fp2.rs b/halo2-ecc/src/fields/fp2.rs
index 633ae6fa..aed390fa 100644
--- a/halo2-ecc/src/fields/fp2.rs
+++ b/halo2-ecc/src/fields/fp2.rs
@@ -1,11 +1,8 @@
-use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeFieldChip, Selectable};
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
-use halo2_base::{
-    gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, value_to_option, PrimeField},
-    AssignedValue, Context,
-    QuantumCell::Existing,
+use super::{
+    FieldChip, FieldExtConstructor, FieldExtPoint, PrimeField, PrimeFieldChip, Selectable,
 };
+use crate::halo2_proofs::arithmetic::Field;
+use halo2_base::{gates::GateInstructions, utils::fe_to_biguint, AssignedValue, Context};
 use num_bigint::{BigInt, BigUint};
 use std::marker::PhantomData;
 
@@ -13,7 +10,7 @@ use std::marker::PhantomData;
 /// `Fp2 = Fp[u] / (u^2 + 1)`
 /// This implementation assumes p = 3 (mod 4) in order for the polynomial u^2 + 1 to be irreducible over Fp; i.e., in order for -1 to not be a square (quadratic residue) in Fp
 /// This means we store an Fp2 point as `a_0 + a_1 * u` where `a_0, a_1 in Fp`
-#[derive(Clone, Debug)]
+#[derive(Clone, Copy, Debug)]
 pub struct Fp2Chip<'a, F: PrimeField, FpChip: PrimeFieldChip<F>, Fp2: Field>
 where
     FpChip::FieldType: PrimeField,
@@ -33,16 +30,16 @@ where
     Fp2: Field + FieldExtConstructor<FpChip::FieldType, 2>,
 {
     /// User must construct an `FpChip` first using a config. This is intended so everything shares a single `FlexGateChip`, which is needed for the column allocation to work.
-    pub fn construct(fp_chip: &'a FpChip) -> Self {
+    pub fn new(fp_chip: &'a FpChip) -> Self {
         Self { fp_chip, _f: PhantomData, _fp2: PhantomData }
     }
 
-    pub fn fp_mul_no_carry<'v>(
+    pub fn fp_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        fp_point: &FpChip::FieldPoint<'v>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        fp_point: &FpChip::FieldPoint,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let mut out_coeffs = Vec::with_capacity(2);
@@ -53,37 +50,37 @@ where
         FieldExtPoint::construct(out_coeffs)
     }
 
-    pub fn conjugate<'v>(
+    pub fn conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let neg_a1 = self.fp_chip.negate(ctx, &a.coeffs[1]);
         FieldExtPoint::construct(vec![a.coeffs[0].clone(), neg_a1])
     }
 
-    pub fn neg_conjugate<'v>(
+    pub fn neg_conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let neg_a0 = self.fp_chip.negate(ctx, &a.coeffs[0]);
         FieldExtPoint::construct(vec![neg_a0, a.coeffs[1].clone()])
     }
 
-    pub fn select<'v>(
+    pub fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        b: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        sel: &AssignedValue<'v, F>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>>
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        b: &FieldExtPoint<FpChip::FieldPoint>,
+        sel: AssignedValue<F>,
+    ) -> FieldExtPoint<FpChip::FieldPoint>
     where
-        FpChip: Selectable<F, Point<'v> = FpChip::FieldPoint<'v>>,
+        FpChip: Selectable<F, Point = FpChip::FieldPoint>,
     {
         let coeffs: Vec<_> = a
             .coeffs
@@ -99,13 +96,13 @@ impl<'a, F, FpChip, Fp2> FieldChip<F> for Fp2Chip<'a, F, FpChip, Fp2>
 where
     F: PrimeField,
     FpChip::FieldType: PrimeField,
-    FpChip: PrimeFieldChip<F, WitnessType = Value<BigInt>, ConstantType = BigUint>,
+    FpChip: PrimeFieldChip<F, WitnessType = BigInt, ConstantType = BigUint>,
     Fp2: Field + FieldExtConstructor<FpChip::FieldType, 2>,
 {
     const PRIME_FIELD_NUM_BITS: u32 = FpChip::FieldType::NUM_BITS;
     type ConstantType = Fp2;
-    type WitnessType = Vec<Value<BigInt>>;
-    type FieldPoint<'v> = FieldExtPoint<FpChip::FieldPoint<'v>>;
+    type WitnessType = Vec<BigInt>;
+    type FieldPoint = FieldExtPoint<FpChip::FieldPoint>;
     type FieldType = Fp2;
     type RangeChip = FpChip::RangeChip;
 
@@ -120,34 +117,25 @@ where
         self.fp_chip.limb_bits()
     }
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Fp2> {
-        assert_eq!(x.coeffs.len(), 2);
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Fp2 {
+        debug_assert_eq!(x.coeffs.len(), 2);
         let c0 = self.fp_chip.get_assigned_value(&x.coeffs[0]);
         let c1 = self.fp_chip.get_assigned_value(&x.coeffs[1]);
-        c0.zip(c1).map(|(c0, c1)| Fp2::new([c0, c1]))
+        Fp2::new([c0, c1])
     }
 
     fn fe_to_constant(x: Fp2) -> Fp2 {
         x
     }
 
-    fn fe_to_witness(x: &Value<Fp2>) -> Vec<Value<BigInt>> {
-        match value_to_option(*x) {
-            None => vec![Value::unknown(), Value::unknown()],
-            Some(x) => {
-                let coeffs = x.coeffs();
-                assert_eq!(coeffs.len(), 2);
-                coeffs.iter().map(|c| Value::known(BigInt::from(fe_to_biguint(c)))).collect()
-            }
-        }
+    fn fe_to_witness(x: &Fp2) -> Vec<BigInt> {
+        let coeffs = x.coeffs();
+        debug_assert_eq!(coeffs.len(), 2);
+        coeffs.iter().map(|c| BigInt::from(fe_to_biguint(c))).collect()
     }
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Vec<Value<BigInt>>,
-    ) -> Self::FieldPoint<'v> {
-        assert_eq!(coeffs.len(), 2);
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Vec<BigInt>) -> Self::FieldPoint {
+        debug_assert_eq!(coeffs.len(), 2);
         let mut assigned_coeffs = Vec::with_capacity(2);
         for a in coeffs {
             let assigned_coeff = self.fp_chip.load_private(ctx, a);
@@ -156,7 +144,7 @@ where
         Self::FieldPoint::construct(assigned_coeffs)
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, c: Fp2) -> Self::FieldPoint<'v> {
+    fn load_constant(&self, ctx: &mut Context<F>, c: Fp2) -> Self::FieldPoint {
         let mut assigned_coeffs = Vec::with_capacity(2);
         for a in &c.coeffs() {
             let assigned_coeff = self.fp_chip.load_constant(ctx, fe_to_biguint(a));
@@ -166,12 +154,12 @@ where
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -181,12 +169,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let c_coeffs = c.coeffs();
         assert_eq!(a.coeffs.len(), c_coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
@@ -197,12 +185,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -212,11 +200,7 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let out_coeff = self.fp_chip.negate(ctx, a_coeff);
@@ -225,12 +209,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff = self.fp_chip.scalar_mul_no_carry(ctx, &a.coeffs[i], c);
@@ -239,13 +223,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff =
@@ -255,12 +239,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         // (a_0 + a_1 * u) * (b_0 + b_1 * u) = (a_0 b_0 - a_1 b_1) + (a_0 b_1 + a_1 b_0) * u
         let mut ab_coeffs = Vec::with_capacity(a.coeffs.len() * b.coeffs.len());
@@ -282,17 +266,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for coeff in &a.coeffs {
             self.fp_chip.check_carry_mod_to_zero(ctx, coeff);
         }
     }
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.carry_mod(ctx, a_coeff);
@@ -301,28 +281,24 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize) {
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize) {
         for a_coeff in &a.coeffs {
             self.fp_chip.range_check(ctx, a_coeff, max_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for a_coeff in &a.coeffs {
             self.fp_chip.enforce_less_than(ctx, a_coeff)
         }
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -331,16 +307,12 @@ where
         prev.unwrap()
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_nonzero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().or(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().or(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -349,16 +321,12 @@ where
         prev.unwrap()
     }
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -367,17 +335,17 @@ where
         prev.unwrap()
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal_unenforced(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -385,12 +353,7 @@ where
         acc.unwrap()
     }
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             self.fp_chip.assert_equal(ctx, a_coeff, b_coeff)
         }
diff --git a/halo2-ecc/src/fields/mod.rs b/halo2-ecc/src/fields/mod.rs
index e5e65f16..cdae8275 100644
--- a/halo2-ecc/src/fields/mod.rs
+++ b/halo2-ecc/src/fields/mod.rs
@@ -1,6 +1,11 @@
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
-use halo2_base::{gates::RangeInstructions, utils::PrimeField, AssignedValue, Context};
+use crate::halo2_proofs::arithmetic::Field;
+use halo2_base::{
+    gates::RangeInstructions,
+    utils::{BigPrimeField, ScalarField},
+    AssignedValue, Context,
+};
 use num_bigint::BigUint;
+use serde::{Deserialize, Serialize};
 use std::fmt::Debug;
 
 pub mod fp;
@@ -10,6 +15,8 @@ pub mod fp2;
 #[cfg(test)]
 mod tests;
 
+pub trait PrimeField = BigPrimeField;
+
 #[derive(Clone, Debug)]
 pub struct FieldExtPoint<FieldPoint: Clone + Debug> {
     // `F_q` field extension of `F_p` where `q = p^degree`
@@ -28,12 +35,12 @@ impl<FieldPoint: Clone + Debug> FieldExtPoint<FieldPoint> {
 }
 
 /// Common functionality for finite field chips
-pub trait FieldChip<F: PrimeField> {
+pub trait FieldChip<F: PrimeField>: Clone + Debug + Send + Sync {
     const PRIME_FIELD_NUM_BITS: u32;
 
     type ConstantType: Debug;
     type WitnessType: Debug;
-    type FieldPoint<'v>: Clone + Debug;
+    type FieldPoint: Clone + Debug + Send + Sync;
     // a type implementing `Field` trait to help with witness generation (for example with inverse)
     type FieldType: Field;
     type RangeChip: RangeInstructions<F>;
@@ -45,159 +52,126 @@ pub trait FieldChip<F: PrimeField> {
     fn range(&self) -> &Self::RangeChip;
     fn limb_bits(&self) -> usize;
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Self::FieldType>;
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Self::FieldType;
 
     fn fe_to_constant(x: Self::FieldType) -> Self::ConstantType;
-    fn fe_to_witness(x: &Value<Self::FieldType>) -> Self::WitnessType;
+    fn fe_to_witness(x: &Self::FieldType) -> Self::WitnessType;
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Self::WitnessType,
-    ) -> Self::FieldPoint<'v>;
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Self::WitnessType) -> Self::FieldPoint;
 
-    fn load_constant<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Self::ConstantType,
-    ) -> Self::FieldPoint<'v>;
+    fn load_constant(&self, ctx: &mut Context<F>, coeffs: Self::ConstantType) -> Self::FieldPoint;
 
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
     /// output: `a + c`
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint;
 
     /// a * c
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
     /// a * c + b
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>);
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint);
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint;
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize);
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize);
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>);
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint);
 
-    // Assumes the witness for a is 0
-    // Constrains that the underlying big integer is 0 and < p.
+    // Returns 1 iff the underlying big integer for `a` is 0. Otherwise returns 0.
     // For field extensions, checks coordinate-wise.
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
-    // Constrains that the underlying big integer is in [1, p - 1].
+    // Constrains that the underlying big integer is in [0, p - 1].
+    // Then returns 1 iff the underlying big integer for `a` is 0. Otherwise returns 0.
     // For field extensions, checks coordinate-wise.
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
     // assuming `a, b` have been range checked to be a proper BigInt
     // constrain the witnesses `a, b` to be `< p`
     // then check `a == b` as BigInts
-    fn is_equal<'v>(
+    fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         self.enforce_less_than(ctx, a);
         self.enforce_less_than(ctx, b);
         // a.native and b.native are derived from `a.truncation, b.truncation`, so no need to check if they're equal
         self.is_equal_unenforced(ctx, a, b)
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F>;
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    );
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint);
 
-    fn mul<'v>(
+    fn mul(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let no_carry = self.mul_no_carry(ctx, a, b);
         self.carry_mod(ctx, &no_carry)
     }
 
-    fn divide<'v>(
+    fn divide(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let a_val = self.get_assigned_value(a);
         let b_val = self.get_assigned_value(b);
-        let b_inv = b_val.map(|bv| bv.invert().unwrap());
-        let quot_val = a_val.zip(b_inv).map(|(a, bi)| a * bi);
+        let b_inv = b_val.invert().unwrap();
+        let quot_val = a_val * b_inv;
 
         let quot = self.load_private(ctx, Self::fe_to_witness(&quot_val));
 
@@ -211,16 +185,16 @@ pub trait FieldChip<F: PrimeField> {
 
     // constrain and output -a / b
     // this is usually cheaper constraint-wise than computing -a and then (-a) / b separately
-    fn neg_divide<'v>(
+    fn neg_divide(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let a_val = self.get_assigned_value(a);
         let b_val = self.get_assigned_value(b);
-        let b_inv = b_val.map(|bv| bv.invert().unwrap());
-        let quot_val = a_val.zip(b_inv).map(|(a, b)| -a * b);
+        let b_inv = b_val.invert().unwrap();
+        let quot_val = -a_val * b_inv;
 
         let quot = self.load_private(ctx, Self::fe_to_witness(&quot_val));
         self.range_check(ctx, &quot, Self::PRIME_FIELD_NUM_BITS as usize);
@@ -234,23 +208,23 @@ pub trait FieldChip<F: PrimeField> {
     }
 }
 
-pub trait Selectable<F: PrimeField> {
-    type Point<'v>;
+pub trait Selectable<F: ScalarField> {
+    type Point;
 
-    fn select<'v>(
+    fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &Self::Point<'v>,
-        b: &Self::Point<'v>,
-        sel: &AssignedValue<'v, F>,
-    ) -> Self::Point<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::Point,
+        b: &Self::Point,
+        sel: AssignedValue<F>,
+    ) -> Self::Point;
 
-    fn select_by_indicator<'v>(
+    fn select_by_indicator(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &[Self::Point<'v>],
-        coeffs: &[AssignedValue<'v, F>],
-    ) -> Self::Point<'v>;
+        ctx: &mut Context<F>,
+        a: &[Self::Point],
+        coeffs: &[AssignedValue<F>],
+    ) -> Self::Point;
 }
 
 // Common functionality for prime field chips
@@ -265,8 +239,13 @@ where
 
 // helper trait so we can actually construct and read the Fp2 struct
 // needs to be implemented for Fp2 struct for use cases below
-pub trait FieldExtConstructor<Fp: PrimeField, const DEGREE: usize> {
+pub trait FieldExtConstructor<Fp: ff::PrimeField, const DEGREE: usize> {
     fn new(c: [Fp; DEGREE]) -> Self;
 
     fn coeffs(&self) -> Vec<Fp>;
 }
+
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
+pub enum FpStrategy {
+    Simple,
+}
diff --git a/halo2-ecc/src/fields/tests.rs b/halo2-ecc/src/fields/tests.rs
index 49cd349e..6e013486 100644
--- a/halo2-ecc/src/fields/tests.rs
+++ b/halo2-ecc/src/fields/tests.rs
@@ -1,120 +1,55 @@
 mod fp {
-    use crate::fields::{
-        fp::{FpConfig, FpStrategy},
-        FieldChip,
-    };
+    use crate::fields::fp::FpChip;
+    use crate::fields::{FieldChip, PrimeField};
     use crate::halo2_proofs::{
-        circuit::*,
         dev::MockProver,
         halo2curves::bn256::{Fq, Fr},
-        plonk::*,
-    };
-    use group::ff::Field;
-    use halo2_base::{
-        utils::{fe_to_biguint, modulus, PrimeField},
-        SKIP_FIRST_PASS,
     };
-    use num_bigint::BigInt;
+    use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+    use halo2_base::gates::RangeChip;
+    use halo2_base::utils::biguint_to_fe;
+    use halo2_base::utils::{fe_to_biguint, modulus};
+    use halo2_base::Context;
     use rand::rngs::OsRng;
-    use std::marker::PhantomData;
-
-    #[derive(Default)]
-    struct MyCircuit<F> {
-        a: Value<Fq>,
-        b: Value<Fq>,
-        _marker: PhantomData<F>,
-    }
 
-    const NUM_ADVICE: usize = 1;
-    const NUM_FIXED: usize = 1;
     const K: usize = 10;
 
-    impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-        type Config = FpConfig<F, Fq>;
-        type FloorPlanner = SimpleFloorPlanner;
-
-        fn without_witnesses(&self) -> Self {
-            Self::default()
-        }
-
-        fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-            FpConfig::<F, _>::configure(
-                meta,
-                FpStrategy::Simple,
-                &[NUM_ADVICE],
-                &[1],
-                NUM_FIXED,
-                9,
-                88,
-                3,
-                modulus::<Fq>(),
-                0,
-                K,
-            )
-        }
-
-        fn synthesize(
-            &self,
-            chip: Self::Config,
-            mut layouter: impl Layouter<F>,
-        ) -> Result<(), Error> {
-            chip.load_lookup_table(&mut layouter)?;
-
-            let mut first_pass = SKIP_FIRST_PASS;
-
-            layouter.assign_region(
-                || "fp",
-                |region| {
-                    if first_pass {
-                        first_pass = false;
-                        return Ok(());
-                    }
-
-                    let mut aux = chip.new_context(region);
-                    let ctx = &mut aux;
-
-                    let a_assigned =
-                        chip.load_private(ctx, self.a.map(|a| BigInt::from(fe_to_biguint(&a))));
-                    let b_assigned =
-                        chip.load_private(ctx, self.b.map(|b| BigInt::from(fe_to_biguint(&b))));
-
-                    // test fp_multiply
-                    {
-                        chip.mul(ctx, &a_assigned, &b_assigned);
-                    }
-
-                    // IMPORTANT: this copies advice cells to enable lookup
-                    // This is not optional.
-                    chip.finalize(ctx);
-
-                    #[cfg(feature = "display")]
-                    {
-                        println!(
-                            "Using {} advice columns and {} fixed columns",
-                            NUM_ADVICE, NUM_FIXED
-                        );
-                        println!("total cells: {}", ctx.total_advice);
-
-                        let (const_rows, _) = ctx.fixed_stats();
-                        println!("maximum rows used by a fixed column: {const_rows}");
-                    }
-                    Ok(())
-                },
-            )
-        }
+    fn fp_mul_test<F: PrimeField>(
+        ctx: &mut Context<F>,
+        lookup_bits: usize,
+        limb_bits: usize,
+        num_limbs: usize,
+        _a: Fq,
+        _b: Fq,
+    ) {
+        std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+        let range = RangeChip::<F>::default(lookup_bits);
+        let chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+
+        let [a, b] = [_a, _b].map(|x| chip.load_private(ctx, FpChip::<F, Fq>::fe_to_witness(&x)));
+        let c = chip.mul(ctx, &a, &b);
+
+        assert_eq!(c.truncation.to_bigint(limb_bits), c.value);
+        assert_eq!(
+            c.native.value(),
+            &biguint_to_fe(&(&c.value.to_biguint().unwrap() % modulus::<F>()))
+        );
+        assert_eq!(c.value, fe_to_biguint(&(_a * _b)).into())
     }
 
     #[test]
     fn test_fp() {
+        let k = K;
         let a = Fq::random(OsRng);
         let b = Fq::random(OsRng);
 
-        let circuit =
-            MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(10));
+        let circuit = RangeCircuitBuilder::mock(builder);
 
-        let prover = MockProver::run(K as u32, &circuit, vec![]).unwrap();
-        prover.assert_satisfied();
-        //assert_eq!(prover.verify(), Ok(()));
+        MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
     }
 
     #[cfg(feature = "dev-graph")]
@@ -126,144 +61,93 @@ mod fp {
         root.fill(&WHITE).unwrap();
         let root = root.titled("Fp Layout", ("sans-serif", 60)).unwrap();
 
-        let circuit = MyCircuit::<Fr>::default();
-        halo2_proofs::dev::CircuitLayout::default().render(K as u32, &circuit, &root).unwrap();
+        let k = K;
+        let a = Fq::zero();
+        let b = Fq::zero();
+
+        let mut builder = GateThreadBuilder::keygen();
+        fp_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(10));
+        let circuit = RangeCircuitBuilder::keygen(builder);
+        halo2_proofs::dev::CircuitLayout::default().render(k as u32, &circuit, &root).unwrap();
     }
 }
 
 mod fp12 {
-    use crate::fields::{
-        fp::{FpConfig, FpStrategy},
-        fp12::*,
-        FieldChip,
-    };
+    use crate::fields::fp::FpChip;
+    use crate::fields::fp12::Fp12Chip;
+    use crate::fields::{FieldChip, PrimeField};
     use crate::halo2_proofs::{
-        circuit::*,
         dev::MockProver,
         halo2curves::bn256::{Fq, Fq12, Fr},
-        plonk::*,
     };
-    use halo2_base::utils::modulus;
-    use halo2_base::{utils::PrimeField, SKIP_FIRST_PASS};
-    use std::marker::PhantomData;
+    use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+    use halo2_base::gates::RangeChip;
+    use halo2_base::Context;
+    use rand_core::OsRng;
 
-    #[derive(Default)]
-    struct MyCircuit<F> {
-        a: Value<Fq12>,
-        b: Value<Fq12>,
-        _marker: PhantomData<F>,
-    }
-
-    const NUM_ADVICE: usize = 1;
-    const NUM_FIXED: usize = 1;
     const XI_0: i64 = 9;
 
-    impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-        type Config = FpConfig<F, Fq>;
-        type FloorPlanner = SimpleFloorPlanner;
-
-        fn without_witnesses(&self) -> Self {
-            Self::default()
-        }
-
-        fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-            FpConfig::<F, _>::configure(
-                meta,
-                FpStrategy::Simple,
-                &[NUM_ADVICE],
-                &[1],
-                NUM_FIXED,
-                22,
-                88,
-                3,
-                modulus::<Fq>(),
-                0,
-                23,
-            )
-        }
-
-        fn synthesize(
-            &self,
-            config: Self::Config,
-            mut layouter: impl Layouter<F>,
-        ) -> Result<(), Error> {
-            config.load_lookup_table(&mut layouter)?;
-            let chip = Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::construct(&config);
-
-            let mut first_pass = SKIP_FIRST_PASS;
-
-            layouter.assign_region(
-                || "fp12",
-                |region| {
-                    if first_pass {
-                        first_pass = false;
-                        return Ok(());
-                    }
-
-                    let mut aux = config.new_context(region);
-                    let ctx = &mut aux;
-
-                    let a_assigned = chip.load_private(
-                        ctx,
-                        Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::fe_to_witness(&self.a),
-                    );
-                    let b_assigned = chip.load_private(
-                        ctx,
-                        Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::fe_to_witness(&self.b),
-                    );
-
-                    // test fp_multiply
-                    {
-                        chip.mul(ctx, &a_assigned, &b_assigned);
-                    }
-
-                    // IMPORTANT: this copies advice cells to enable lookup
-                    // This is not optional.
-                    chip.fp_chip.finalize(ctx);
-
-                    #[cfg(feature = "display")]
-                    {
-                        println!(
-                            "Using {} advice columns and {} fixed columns",
-                            NUM_ADVICE, NUM_FIXED
-                        );
-                        println!("total advice cells: {}", ctx.total_advice);
-
-                        let (const_rows, _) = ctx.fixed_stats();
-                        println!("maximum rows used by a fixed column: {const_rows}");
-                    }
-                    Ok(())
-                },
-            )
+    fn fp12_mul_test<F: PrimeField>(
+        ctx: &mut Context<F>,
+        lookup_bits: usize,
+        limb_bits: usize,
+        num_limbs: usize,
+        _a: Fq12,
+        _b: Fq12,
+    ) {
+        std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+        let range = RangeChip::<F>::default(lookup_bits);
+        let fp_chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+        let chip = Fp12Chip::<F, _, Fq12, XI_0>::new(&fp_chip);
+
+        let [a, b] = [_a, _b].map(|x| {
+            chip.load_private(ctx, Fp12Chip::<F, FpChip<F, Fq>, Fq12, XI_0>::fe_to_witness(&x))
+        });
+        let c = chip.mul(ctx, &a, &b);
+
+        assert_eq!(chip.get_assigned_value(&c), _a * _b);
+        for c in c.coeffs {
+            assert_eq!(c.truncation.to_bigint(limb_bits), c.value);
         }
     }
 
     #[test]
     fn test_fp12() {
-        let k = 23;
-        let mut rng = rand::thread_rng();
-        let a = Fq12::random(&mut rng);
-        let b = Fq12::random(&mut rng);
+        let k = 12;
+        let a = Fq12::random(OsRng);
+        let b = Fq12::random(OsRng);
 
-        let circuit =
-            MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp12_mul_test(builder.main(0), k - 1, 88, 3, a, b);
 
-        let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-        prover.assert_satisfied();
-        // assert_eq!(prover.verify(), Ok(()));
+        builder.config(k, Some(20));
+        let circuit = RangeCircuitBuilder::mock(builder);
+
+        MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
     }
 
     #[cfg(feature = "dev-graph")]
     #[test]
     fn plot_fp12() {
-        let k = 9;
+        use ff::Field;
         use plotters::prelude::*;
 
         let root = BitMapBackend::new("layout.png", (1024, 1024)).into_drawing_area();
         root.fill(&WHITE).unwrap();
         let root = root.titled("Fp Layout", ("sans-serif", 60)).unwrap();
 
-        let circuit = MyCircuit::<Fr>::default();
+        let k = 23;
+        let a = Fq12::zero();
+        let b = Fq12::zero();
+
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp12_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(20));
+        let circuit = RangeCircuitBuilder::mock(builder);
+
         halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
     }
 }
diff --git a/halo2-ecc/src/lib.rs b/halo2-ecc/src/lib.rs
index cfa6e1f5..55df690a 100644
--- a/halo2-ecc/src/lib.rs
+++ b/halo2-ecc/src/lib.rs
@@ -2,12 +2,13 @@
 #![allow(clippy::op_ref)]
 #![allow(clippy::type_complexity)]
 #![feature(int_log)]
+#![feature(trait_alias)]
 
 pub mod bigint;
-//pub mod ecc;
-//pub mod fields;
+pub mod ecc;
+pub mod fields;
 
-//pub mod bn254;
+pub mod bn254;
 //pub mod secp256k1;
 
 pub use halo2_base;
diff --git a/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv b/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
deleted file mode 100644
index d6fdf049..00000000
--- a/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
+++ /dev/null
@@ -1,10 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time
-19,1,0,1,18,88,3,192,13.385351667s,960,3.0945ms
-18,2,1,1,17,88,3,256,8.359564584s,1344,6.137958ms
-17,4,1,1,16,88,3,384,5.56246375s,1920,5.302292ms
-16,9,2,1,15,90,3,736,5.090631625s,3776,6.617666ms
-15,17,3,1,14,90,3,1280,4.457021917s,6560,7.191958ms
-14,36,6,1,13,91,3,2592,4.635864542s,13280,11.689375ms
-13,71,12,1,12,88,3,5024,4.887099708s,25792,16.996459ms
-12,145,23,2,11,88,3,10176,5.740054292s,51808,51.147917ms
-11,305,53,4,10,88,3,21504,7.244600792s,110624,55.647375ms
\ No newline at end of file
diff --git a/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt b/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt
deleted file mode 100644
index 2146b1ab..00000000
--- a/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt
+++ /dev/null
@@ -1,253 +0,0 @@
----------------------- degree = 19 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 3.405966541s
-ecdsa done
-Time elapsed in generating vkey: 2.123023125s
-ecdsa done
-Time elapsed in generating pkey: 5.21121525s
-Time elapsed in filling circuit: 340.292µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 0, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 1
-special lookup advice columns: 0
-fixed columns: 1
-lookup bits: 18
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 488463
-minimum rows used by an advice column: 488463
-total cells used: 488463
-cells used in special lookup column: 0
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 1 advice columns?
-Have you tried using 0 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 13.385351667s
-Verify time: 3.0945ms
----------------------- degree = 18 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 1.678767125s
-ecdsa done
-Time elapsed in generating vkey: 2.120563625s
-ecdsa done
-Time elapsed in generating pkey: 3.271299875s
-Time elapsed in filling circuit: 343.416µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 1, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 2
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 17
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 249145
-minimum rows used by an advice column: 249144
-total cells used: 498289
-cells used in special lookup column: 69615
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 2 advice columns?
-Have you tried using 1 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 8.359564584s
-Verify time: 6.137958ms
----------------------- degree = 17 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 845.511958ms
-ecdsa done
-Time elapsed in generating vkey: 1.821084583s
-ecdsa done
-Time elapsed in generating pkey: 2.1293145s
-Time elapsed in filling circuit: 350.042µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 2, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 4
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 16
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 125487
-minimum rows used by an advice column: 125483
-total cells used: 501940
-cells used in special lookup column: 70832
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 4 advice columns?
-Have you tried using 1 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 5.56246375s
-Verify time: 5.302292ms
----------------------- degree = 16 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 476.735291ms
-ecdsa done
-Time elapsed in generating vkey: 1.889539709s
-ecdsa done
-Time elapsed in generating pkey: 1.838861167s
-Time elapsed in filling circuit: 361.875µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(60200), row_offset: 3, column: Column { index: 5, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 9
-special lookup advice columns: 2
-fixed columns: 1
-lookup bits: 15
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 59389
-minimum rows used by an advice column: 59366
-total cells used: 534362
-cells used in special lookup column: 71669
-maximum rows used by a fixed column: 8269
-Suggestions:
-Have you tried using 9 advice columns?
-Have you tried using 2 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 5.090631625s
-Verify time: 6.617666ms
----------------------- degree = 15 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 234.525125ms
-ecdsa done
-test secp256k1::ecdsa::bench_secp has been running for over 60 seconds
-Time elapsed in generating vkey: 1.897827708s
-ecdsa done
-Time elapsed in generating pkey: 1.611605583s
-Time elapsed in filling circuit: 347.375µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(60200), row_offset: 3, column: Column { index: 3, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 17
-special lookup advice columns: 3
-fixed columns: 1
-lookup bits: 14
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 32707
-minimum rows used by an advice column: 32678
-total cells used: 555735
-cells used in special lookup column: 85486
-maximum rows used by a fixed column: 8268
-Suggestions:
-Have you tried using 17 advice columns?
-Have you tried using 3 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.457021917s
-Verify time: 7.191958ms
----------------------- degree = 14 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 127.057375ms
-ecdsa done
-Time elapsed in generating vkey: 2.217236041s
-ecdsa done
-Time elapsed in generating pkey: 1.632434708s
-Time elapsed in filling circuit: 344.458µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(61065), row_offset: 3, column: Column { index: 20, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 36
-special lookup advice columns: 6
-fixed columns: 1
-lookup bits: 13
-limb bits: 91
-num limbs: 3
-maximum rows used by an advice column: 15931
-minimum rows used by an advice column: 15895
-total cells used: 572648
-cells used in special lookup column: 85441
-maximum rows used by a fixed column: 8390
-Suggestions:
-Have you tried using 35 advice columns?
-Have you tried using 6 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.635864542s
-Verify time: 11.689375ms
----------------------- degree = 13 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 61.105375ms
-ecdsa done
-Time elapsed in generating vkey: 2.53718925s
-ecdsa done
-Time elapsed in generating pkey: 1.570442167s
-Time elapsed in filling circuit: 344.25µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 16, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 71
-special lookup advice columns: 12
-fixed columns: 1
-lookup bits: 12
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 8096
-minimum rows used by an advice column: 8057
-total cells used: 572742
-cells used in special lookup column: 91593
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 70 advice columns?
-Have you tried using 12 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.887099708s
-Verify time: 16.996459ms
----------------------- degree = 12 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 34.131375ms
-ecdsa done
-Time elapsed in generating vkey: 2.9427305s
-ecdsa done
-Time elapsed in generating pkey: 1.724485125s
-Time elapsed in filling circuit: 338.666µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 45, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 145
-special lookup advice columns: 23
-fixed columns: 2
-lookup bits: 11
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 4064
-minimum rows used by an advice column: 4023
-total cells used: 584948
-cells used in special lookup column: 94029
-maximum rows used by a fixed column: 4014
-Suggestions:
-Have you tried using 143 advice columns?
-Have you tried using 23 lookup columns?
-Have you tried using 2 fixed columns?
-ecdsa done
-Proving time: 5.740054292s
-Verify time: 51.147917ms
----------------------- degree = 11 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 17.605458ms
-ecdsa done
-Time elapsed in generating vkey: 3.72480825s
-ecdsa done
-Time elapsed in generating pkey: 2.107728542s
-Time elapsed in filling circuit: 338.25µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 302, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 305
-special lookup advice columns: 53
-fixed columns: 4
-lookup bits: 10
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 2038
-minimum rows used by an advice column: 1995
-total cells used: 611812
-cells used in special lookup column: 107456
-maximum rows used by a fixed column: 2007
-Suggestions:
-Have you tried using 299 advice columns?
-Have you tried using 53 lookup columns?
-Have you tried using 4 fixed columns?
-ecdsa done
-Proving time: 7.244600792s
-Verify time: 55.647375ms
\ No newline at end of file

From e1540cf136ec40db3fb08c780c9ab21aa9d5ded1 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 09:29:53 -0800
Subject: [PATCH 10/26] feat: remove `size_hint` in `inner_product_simple`

* change other uses of `size_hint` to follow with `assert_eq!` instead
  of `debug_assert_eq!`
---
 halo2-base/src/gates/builder.rs   | 58 +++++++++++++++++++------------
 halo2-base/src/gates/flex_gate.rs | 22 ++++++------
 halo2-ecc/benches/msm.rs          |  2 +-
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index c049ba28..9771aa15 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -168,7 +168,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     let column = basic_gate.value;
                     let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
                     #[cfg(feature = "halo2-axiom")]
-                    let cell = *region.assign_advice(column, row_offset, value).unwrap().cell();
+                    let cell = region.assign_advice(column, row_offset, value);
                     #[cfg(not(feature = "halo2-axiom"))]
                     let cell =
                         region.assign_advice(|| "", column, row_offset, || value).unwrap().cell();
@@ -187,8 +187,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
 
                         #[cfg(feature = "halo2-axiom")]
                         {
-                            let ncell =
-                                *region.assign_advice(column, row_offset, value).unwrap().cell();
+                            let ncell = region.assign_advice(column, row_offset, value);
                             region.constrain_equal(&ncell, &cell);
                         }
                         #[cfg(not(feature = "halo2-axiom"))]
@@ -270,10 +269,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
 
                     #[cfg(feature = "halo2-axiom")]
                     {
-                        let bcell = *region
-                            .assign_advice(column, lookup_offset, value)
-                            .expect("assign_advice should not fail")
-                            .cell();
+                        let bcell = region.assign_advice(column, lookup_offset, value);
                         region.constrain_equal(&acell, &bcell);
                     }
                     #[cfg(not(feature = "halo2-axiom"))]
@@ -327,7 +323,7 @@ pub fn assign_threads_in<F: ScalarField>(
             let value = advice.value;
             let lookup_column = *lookup_column.unwrap();
             #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(lookup_column, lookup_offset, Value::known(value)).unwrap();
+            region.assign_advice(lookup_column, lookup_offset, Value::known(value));
             #[cfg(not(feature = "halo2-axiom"))]
             region
                 .assign_advice(|| "", lookup_column, lookup_offset, || Value::known(value))
@@ -337,7 +333,7 @@ pub fn assign_threads_in<F: ScalarField>(
         }
         for advice in ctx.advice {
             #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+            region.assign_advice(column, row_offset, Value::known(advice));
             #[cfg(not(feature = "halo2-axiom"))]
             region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
 
@@ -348,7 +344,7 @@ pub fn assign_threads_in<F: ScalarField>(
                 column = config.basic_gates[phase][gate_index].value;
 
                 #[cfg(feature = "halo2-axiom")]
-                region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+                region.assign_advice(column, row_offset, Value::known(advice));
                 #[cfg(not(feature = "halo2-axiom"))]
                 region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
             }
@@ -423,21 +419,28 @@ impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
+                // only support FirstPhase in this Builder because getting challenge value requires more specialized witness generation during synthesize
                 if !self.builder.borrow().witness_gen_only {
                     // clone the builder so we can re-use the circuit for both vk and pk gen
                     let builder = self.builder.borrow().clone();
+                    for threads in builder.threads.iter().skip(1) {
+                        assert!(
+                            threads.is_empty(),
+                            "GateCircuitBuilder only supports FirstPhase for now"
+                        );
+                    }
                     *self.break_points.borrow_mut() =
                         builder.assign_all(&config, &[], &[], &mut region);
                 } else {
                     let builder = self.builder.take();
                     let break_points = self.break_points.take();
-                    for (phase, (threads, break_points)) in
-                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    for (phase, (threads, break_points)) in builder
+                        .threads
+                        .into_iter()
+                        .zip(break_points.into_iter())
+                        .enumerate()
+                        .take(1)
                     {
-                        #[cfg(feature = "halo2-axiom")]
-                        if phase != 0 && !threads.is_empty() {
-                            region.next_phase();
-                        }
                         assign_threads_in(phase, threads, &config, &[], &mut region, break_points);
                     }
                 }
@@ -514,9 +517,16 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
+                // only support FirstPhase in this Builder because getting challenge value requires more specialized witness generation during synthesize
                 if !self.0.builder.borrow().witness_gen_only {
                     // clone the builder so we can re-use the circuit for both vk and pk gen
                     let builder = self.0.builder.borrow().clone();
+                    for threads in builder.threads.iter().skip(1) {
+                        assert!(
+                            threads.is_empty(),
+                            "GateCircuitBuilder only supports FirstPhase for now"
+                        );
+                    }
                     *self.0.break_points.borrow_mut() = builder.assign_all(
                         &config.gate,
                         &config.lookup_advice,
@@ -524,15 +534,17 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                         &mut region,
                     );
                 } else {
+                    #[cfg(feature = "display")]
+                    let start0 = std::time::Instant::now();
                     let builder = self.0.builder.take();
                     let break_points = self.0.break_points.take();
-                    for (phase, (threads, break_points)) in
-                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    for (phase, (threads, break_points)) in builder
+                        .threads
+                        .into_iter()
+                        .zip(break_points.into_iter())
+                        .enumerate()
+                        .take(1)
                     {
-                        #[cfg(feature = "halo2-axiom")]
-                        if phase != 0 && !threads.is_empty() {
-                            region.next_phase();
-                        }
                         assign_threads_in(
                             phase,
                             threads,
@@ -542,6 +554,8 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                             break_points,
                         );
                     }
+                    #[cfg(feature = "display")]
+                    println!("assign threads in {:?}", start0.elapsed());
                 }
                 Ok(())
             },
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index a70de4b8..d5292d90 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -290,7 +290,7 @@ pub trait GateInstructions<F: ScalarField> {
             return ctx.assign_region_last([start], []);
         }
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let mut sum = *start.value();
         let cells = iter::once(start).chain(a.flat_map(|a| {
@@ -320,7 +320,7 @@ pub trait GateInstructions<F: ScalarField> {
             return Box::new(iter::once(ctx.assign_region_last([start], [])));
         }
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let mut sum = *start.value();
         let cells = iter::once(start).chain(a.flat_map(|a| {
@@ -532,7 +532,7 @@ pub trait GateInstructions<F: ScalarField> {
         let mut sum = F::zero();
         let a = a.into_iter();
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let cells = std::iter::once(Constant(F::zero())).chain(
             a.zip(indicator.into_iter()).flat_map(|(a, ind)| {
@@ -555,7 +555,7 @@ pub trait GateInstructions<F: ScalarField> {
     {
         let cells = cells.into_iter();
         let (len, hi) = cells.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let ind = self.idx_to_indicator(ctx, idx, len);
         self.select_by_indicator(ctx, cells, ind)
@@ -706,15 +706,14 @@ impl<F: ScalarField> GateChip<F> {
             [a, b, Witness(sum)]
         }));
 
-        let gate_offsets = if ctx.witness_gen_only() {
-            vec![]
+        if ctx.witness_gen_only() {
+            ctx.assign_region(cells, vec![]);
         } else {
-            let (lo, hi) = cells.size_hint();
-            debug_assert_eq!(Some(lo), hi);
+            let cells = cells.collect::<Vec<_>>();
+            let lo = cells.len();
             let len = lo / 3;
-            (0..len).map(|i| 3 * i as isize).collect()
+            ctx.assign_region(cells, (0..len).map(|i| 3 * i as isize));
         };
-        ctx.assign_region(cells, gate_offsets);
         b_starts_with_one
     }
 }
@@ -899,8 +898,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             .iter()
             .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
             .map(|x| Witness(F::from(x)))
-            .take(range_bits)
-            .collect::<Vec<_>>();
+            .take(range_bits);
 
         let mut bit_cells = Vec::with_capacity(range_bits);
         let row_offset = ctx.advice.len();
diff --git a/halo2-ecc/benches/msm.rs b/halo2-ecc/benches/msm.rs
index 76141425..1a8e774d 100644
--- a/halo2-ecc/benches/msm.rs
+++ b/halo2-ecc/benches/msm.rs
@@ -82,6 +82,7 @@ fn msm_circuit(
     scalars: Vec<Fr>,
     break_points: Option<MultiPhaseThreadBreakPoints>,
 ) -> RangeCircuitBuilder<Fr> {
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
     let k = params.degree as usize;
     let builder = match stage {
         CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
@@ -90,7 +91,6 @@ fn msm_circuit(
     };
     let builder = Mutex::new(builder);
 
-    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
     msm_bench(&builder, params, bases, scalars);
 
     let builder = builder.into_inner().unwrap();

From f3e814acca5d4b95014f314e1213a92be8086b41 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 10:32:28 -0800
Subject: [PATCH 11/26] fix: change `debug_assert` in
 `decompose_u64_digits_limbs` to restrict `bit_len < 64` and
 `decompose_biguint` to `64 <= bit_len < 128` * add more comprehensive tests
 for above two functions

---
 halo2-base/src/gates/flex_gate.rs |  2 +-
 halo2-base/src/gates/tests.rs     |  2 +-
 halo2-base/src/lib.rs             |  1 +
 halo2-base/src/utils.rs           | 90 ++++++++++++++++++++++++++-----
 4 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index d5292d90..05317338 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -896,7 +896,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
         let bits = a_bytes
             .as_ref()
             .iter()
-            .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
+            .flat_map(|byte| (0..8u32).map(|i| (*byte as u64 >> i) & 1))
             .map(|x| Witness(F::from(x)))
             .take(range_bits);
 
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index cf6a3cb6..06406043 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -54,7 +54,7 @@ fn test_multithread_gates() {
     let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
-    let thread_ids = (0..4).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    let thread_ids = (0..4usize).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
     let new_threads = thread_ids
         .into_par_iter()
         .map(|id| {
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index ccf4f973..3b7523ae 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -2,6 +2,7 @@
 #![feature(trait_alias)]
 #![deny(clippy::perf)]
 #![allow(clippy::too_many_arguments)]
+#![warn(clippy::default_numeric_fallback)]
 
 // different memory allocator options:
 // mimalloc is fastest on Mac M2
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 6802b71c..152971ac 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -62,7 +62,7 @@ pub(crate) fn decompose_u64_digits_to_limbs(
     number_of_limbs: usize,
     bit_len: usize,
 ) -> Vec<u64> {
-    debug_assert!(bit_len <= 64);
+    debug_assert!(bit_len < 64);
 
     let mut e = e.into_iter();
     let mask: u64 = (1u64 << bit_len) - 1u64;
@@ -196,22 +196,22 @@ pub fn decompose_biguint<F: BigPrimeField>(
     num_limbs: usize,
     bit_len: usize,
 ) -> Vec<F> {
-    debug_assert!(bit_len > 64 && bit_len <= 128);
+    debug_assert!((64..128).contains(&bit_len));
     let mut e = e.iter_u64_digits();
 
     let mut limb0 = e.next().unwrap_or(0) as u128;
     let mut rem = bit_len - 64;
     let mut u64_digit = e.next().unwrap_or(0);
-    limb0 |= ((u64_digit & ((1 << rem) - 1)) as u128) << 64;
+    limb0 |= ((u64_digit & ((1 << rem) - 1u64)) as u128) << 64u32;
     u64_digit >>= rem;
     rem = 64 - rem;
 
     core::iter::once(F::from_u128(limb0))
         .chain((1..num_limbs).map(|_| {
-            let mut limb: u128 = u64_digit.into();
+            let mut limb = u64_digit as u128;
             let mut bits = rem;
             u64_digit = e.next().unwrap_or(0);
-            if bit_len - bits >= 64 {
+            if bit_len >= 64 + bits {
                 limb |= (u64_digit as u128) << bits;
                 u64_digit = e.next().unwrap_or(0);
                 bits += 64;
@@ -258,13 +258,6 @@ pub fn compose(input: Vec<BigUint>, bit_len: usize) -> BigUint {
     input.iter().rev().fold(BigUint::zero(), |acc, val| (acc << bit_len) + val)
 }
 
-#[cfg(test)]
-#[test]
-fn test_signed_roundtrip() {
-    use crate::halo2_proofs::halo2curves::bn256::Fr;
-    assert_eq!(fe_to_bigint(&bigint_to_fe::<Fr>(&-BigInt::one())), -BigInt::one());
-}
-
 #[cfg(feature = "halo2-axiom")]
 pub use halo2_proofs_axiom::halo2curves::CurveAffineExt;
 
@@ -337,3 +330,76 @@ pub mod fs {
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::halo2_proofs::halo2curves::bn256::Fr;
+    use num_bigint::RandomBits;
+    use rand::{rngs::OsRng, Rng};
+    use std::ops::Shl;
+
+    use super::*;
+
+    #[test]
+    fn test_signed_roundtrip() {
+        use crate::halo2_proofs::halo2curves::bn256::Fr;
+        assert_eq!(fe_to_bigint(&bigint_to_fe::<Fr>(&-BigInt::one())), -BigInt::one());
+    }
+
+    #[test]
+    fn test_decompose_biguint() {
+        let mut rng = OsRng;
+        const MAX_LIMBS: u64 = 5;
+        for bit_len in 64..128usize {
+            for num_limbs in 1..=MAX_LIMBS {
+                for _ in 0..10_000usize {
+                    let mut e: BigUint = rng.sample(RandomBits::new(num_limbs * bit_len as u64));
+                    let limbs = decompose_biguint::<Fr>(&e, num_limbs as usize, bit_len);
+
+                    let limbs2 = {
+                        let mut limbs = vec![];
+                        let mask = BigUint::one().shl(bit_len) - 1usize;
+                        for _ in 0..num_limbs {
+                            let limb = &e & &mask;
+                            let mut bytes_le = limb.to_bytes_le();
+                            bytes_le.resize(32, 0u8);
+                            limbs.push(Fr::from_bytes(&bytes_le.try_into().unwrap()).unwrap());
+                            e >>= bit_len;
+                        }
+                        limbs
+                    };
+                    assert_eq!(limbs, limbs2);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_decompose_u64_digits_to_limbs() {
+        let mut rng = OsRng;
+        const MAX_LIMBS: u64 = 5;
+        for bit_len in 0..64usize {
+            for num_limbs in 1..=MAX_LIMBS {
+                for _ in 0..10_000usize {
+                    let mut e: BigUint = rng.sample(RandomBits::new(num_limbs * bit_len as u64));
+                    let limbs = decompose_u64_digits_to_limbs(
+                        e.to_u64_digits(),
+                        num_limbs as usize,
+                        bit_len,
+                    );
+                    let limbs2 = {
+                        let mut limbs = vec![];
+                        let mask = BigUint::one().shl(bit_len) - 1usize;
+                        for _ in 0..num_limbs {
+                            let limb = &e & &mask;
+                            limbs.push(u64::try_from(limb).unwrap());
+                            e >>= bit_len;
+                        }
+                        limbs
+                    };
+                    assert_eq!(limbs, limbs2);
+                }
+            }
+        }
+    }
+}

From 63946ce0112441e14b9d480ad4c6d712fa8aca2e Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 11:26:09 -0800
Subject: [PATCH 12/26] feat: re-enable `secp256k1` module with updated tests

---
 halo2-base/src/gates/builder.rs        |  31 +-
 halo2-ecc/src/bigint/big_is_zero.rs    |   3 -
 halo2-ecc/src/lib.rs                   |   2 +-
 halo2-ecc/src/secp256k1/mod.rs         |  12 +-
 halo2-ecc/src/secp256k1/tests/ecdsa.rs | 380 ++++++++-----------------
 5 files changed, 143 insertions(+), 285 deletions(-)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index 9771aa15..87749a29 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -315,21 +315,24 @@ pub fn assign_threads_in<F: ScalarField>(
     let mut lookup_advice = lookup_advice.iter();
     let mut lookup_column = lookup_advice.next();
     for ctx in threads {
-        for advice in ctx.cells_to_lookup {
-            if lookup_offset >= config.max_rows {
-                lookup_offset = 0;
-                lookup_column = lookup_advice.next();
-            }
-            let value = advice.value;
-            let lookup_column = *lookup_column.unwrap();
-            #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(lookup_column, lookup_offset, Value::known(value));
-            #[cfg(not(feature = "halo2-axiom"))]
-            region
-                .assign_advice(|| "", lookup_column, lookup_offset, || Value::known(value))
-                .unwrap();
+        // if lookup_column is empty, that means there should be a single advice column and it has lookup enabled, so we don't need to copy to special lookup advice columns
+        if lookup_column.is_some() {
+            for advice in ctx.cells_to_lookup {
+                if lookup_offset >= config.max_rows {
+                    lookup_offset = 0;
+                    lookup_column = lookup_advice.next();
+                }
+                let value = advice.value;
+                let lookup_column = *lookup_column.unwrap();
+                #[cfg(feature = "halo2-axiom")]
+                region.assign_advice(lookup_column, lookup_offset, Value::known(value));
+                #[cfg(not(feature = "halo2-axiom"))]
+                region
+                    .assign_advice(|| "", lookup_column, lookup_offset, || Value::known(value))
+                    .unwrap();
 
-            lookup_offset += 1;
+                lookup_offset += 1;
+            }
         }
         for advice in ctx.advice {
             #[cfg(feature = "halo2-axiom")]
diff --git a/halo2-ecc/src/bigint/big_is_zero.rs b/halo2-ecc/src/bigint/big_is_zero.rs
index 5014d194..d6b03cd5 100644
--- a/halo2-ecc/src/bigint/big_is_zero.rs
+++ b/halo2-ecc/src/bigint/big_is_zero.rs
@@ -1,7 +1,5 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
-use num_bigint::BigInt;
-use num_traits::Zero;
 
 /// assume you know that the limbs of `a` are all in [0, 2^{a.max_limb_bits})
 pub fn positive<F: ScalarField>(
@@ -40,7 +38,6 @@ pub fn crt<F: ScalarField>(
     ctx: &mut Context<F>,
     a: &CRTInteger<F>,
 ) -> AssignedValue<F> {
-    debug_assert_eq!(a.value, BigInt::zero());
     let out_trunc = assign::<F>(gate, ctx, &a.truncation);
     let out_native = gate.is_zero(ctx, a.native);
     gate.and(ctx, out_trunc, out_native)
diff --git a/halo2-ecc/src/lib.rs b/halo2-ecc/src/lib.rs
index 55df690a..10da56bc 100644
--- a/halo2-ecc/src/lib.rs
+++ b/halo2-ecc/src/lib.rs
@@ -9,7 +9,7 @@ pub mod ecc;
 pub mod fields;
 
 pub mod bn254;
-//pub mod secp256k1;
+pub mod secp256k1;
 
 pub use halo2_base;
 pub(crate) use halo2_base::halo2_proofs;
diff --git a/halo2-ecc/src/secp256k1/mod.rs b/halo2-ecc/src/secp256k1/mod.rs
index c81e136f..ca4528e4 100644
--- a/halo2-ecc/src/secp256k1/mod.rs
+++ b/halo2-ecc/src/secp256k1/mod.rs
@@ -1,14 +1,12 @@
-use crate::halo2_proofs::halo2curves::secp256k1::Fp;
+use crate::halo2_proofs::halo2curves::secp256k1::{Fp, Fq};
 
 use crate::ecc;
 use crate::fields::fp;
 
-#[allow(dead_code)]
-type FpChip<F> = fp::FpConfig<F, Fp>;
-#[allow(dead_code)]
-type Secp256k1Chip<F> = ecc::EccChip<F, FpChip<F>>;
-#[allow(dead_code)]
-const SECP_B: u64 = 7;
+pub type FpChip<'range, F> = fp::FpChip<'range, F, Fp>;
+pub type FqChip<'range, F> = fp::FpChip<'range, F, Fq>;
+pub type Secp256k1Chip<'chip, F> = ecc::EccChip<'chip, F, FpChip<'chip, F>>;
+pub const SECP_B: u64 = 7;
 
 #[cfg(test)]
 mod tests;
diff --git a/halo2-ecc/src/secp256k1/tests/ecdsa.rs b/halo2-ecc/src/secp256k1/tests/ecdsa.rs
index 3a91befb..ed171476 100644
--- a/halo2-ecc/src/secp256k1/tests/ecdsa.rs
+++ b/halo2-ecc/src/secp256k1/tests/ecdsa.rs
@@ -1,32 +1,43 @@
 #![allow(non_snake_case)]
-use ark_std::{end_timer, start_timer};
-use halo2_base::{utils::PrimeField, SKIP_FIRST_PASS};
-use serde::{Deserialize, Serialize};
-use std::fs::File;
-use std::marker::PhantomData;
-use std::{env::var, io::Write};
-
+use crate::fields::FpStrategy;
 use crate::halo2_proofs::{
     arithmetic::CurveAffine,
-    circuit::*,
     dev::MockProver,
     halo2curves::bn256::{Bn256, Fr, G1Affine},
     halo2curves::secp256k1::{Fp, Fq, Secp256k1Affine},
     plonk::*,
-    poly::commitment::{Params, ParamsProver},
+    poly::commitment::ParamsProver,
     transcript::{Blake2bRead, Blake2bWrite, Challenge255},
 };
-use rand_core::OsRng;
-
-use crate::fields::fp::FpConfig;
-use crate::secp256k1::FpChip;
+use crate::halo2_proofs::{
+    poly::kzg::{
+        commitment::KZGCommitmentScheme,
+        multiopen::{ProverSHPLONK, VerifierSHPLONK},
+        strategy::SingleStrategy,
+    },
+    transcript::{TranscriptReadBuffer, TranscriptWriterBuffer},
+};
+use crate::secp256k1::{FpChip, FqChip};
 use crate::{
     ecc::{ecdsa::ecdsa_verify_no_pubkey_check, EccChip},
-    fields::{fp::FpStrategy, FieldChip},
+    fields::{FieldChip, PrimeField},
 };
+use ark_std::{end_timer, start_timer};
+use halo2_base::gates::builder::{
+    CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints, RangeCircuitBuilder,
+};
+use halo2_base::gates::RangeChip;
+use halo2_base::utils::fs::gen_srs;
 use halo2_base::utils::{biguint_to_fe, fe_to_biguint, modulus};
+use halo2_base::Context;
+use rand_core::OsRng;
+use serde::{Deserialize, Serialize};
+use std::fs::File;
+use std::io::BufReader;
+use std::io::Write;
+use std::{fs, io::BufRead};
 
-#[derive(Serialize, Deserialize)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct CircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -38,272 +49,121 @@ struct CircuitParams {
     num_limbs: usize,
 }
 
-pub struct ECDSACircuit<F> {
-    pub r: Option<Fq>,
-    pub s: Option<Fq>,
-    pub msghash: Option<Fq>,
-    pub pk: Option<Secp256k1Affine>,
-    pub G: Secp256k1Affine,
-    pub _marker: PhantomData<F>,
-}
-impl<F: PrimeField> Default for ECDSACircuit<F> {
-    fn default() -> Self {
-        Self {
-            r: None,
-            s: None,
-            msghash: None,
-            pk: None,
-            G: Secp256k1Affine::generator(),
-            _marker: PhantomData,
-        }
-    }
+fn ecdsa_test<F: PrimeField>(
+    ctx: &mut Context<F>,
+    params: CircuitParams,
+    r: Fq,
+    s: Fq,
+    msghash: Fq,
+    pk: Secp256k1Affine,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<F>::default(params.lookup_bits);
+    let fp_chip = FpChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+    let fq_chip = FqChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+
+    let [m, r, s] =
+        [msghash, r, s].map(|x| fq_chip.load_private(ctx, FqChip::<F>::fe_to_witness(&x)));
+
+    let ecc_chip = EccChip::<F, FpChip<F>>::new(&fp_chip);
+    let pk = ecc_chip.load_private(ctx, (pk.x, pk.y));
+    // test ECDSA
+    let res = ecdsa_verify_no_pubkey_check::<F, Fp, Fq, Secp256k1Affine>(
+        &fp_chip, ctx, &pk, &r, &s, &m, 4, 4,
+    );
+    assert_eq!(res.value(), &F::one());
 }
 
-impl<F: PrimeField> Circuit<F> for ECDSACircuit<F> {
-    type Config = FpChip<F>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self::default()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        let path = var("ECDSA_CONFIG")
-            .unwrap_or_else(|_| "./src/secp256k1/configs/ecdsa_circuit.config".to_string());
-        let params: CircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        FpChip::<F>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            modulus::<Fp>(),
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        fp_chip: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        fp_chip.range.load_lookup_table(&mut layouter)?;
-
-        let limb_bits = fp_chip.limb_bits;
-        let num_limbs = fp_chip.num_limbs;
-        let num_fixed = fp_chip.range.gate.constants.len();
-        let lookup_bits = fp_chip.range.lookup_bits;
-        let num_advice = fp_chip.range.gate.num_advice;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        // ECDSA verify
-        layouter.assign_region(
-            || "ECDSA",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let (r_assigned, s_assigned, m_assigned) = {
-                    let fq_chip = FpConfig::<F, Fq>::construct(fp_chip.range.clone(), limb_bits, num_limbs, modulus::<Fq>());
-
-                    let m_assigned = fq_chip.load_private(
-                        ctx,
-                        FpConfig::<F, Fq>::fe_to_witness(&self.msghash.map_or(Value::unknown(), Value::known)),
-                    );
-
-                    let r_assigned = fq_chip
-                        .load_private(ctx, FpConfig::<F,Fq>::fe_to_witness(&self.r.map_or(Value::unknown(), Value::known)));
-                    let s_assigned = fq_chip
-                        .load_private(ctx, FpConfig::<F,Fq>::fe_to_witness(&self.s.map_or(Value::unknown(), Value::known)));
-                    (r_assigned, s_assigned, m_assigned)
-                };
-
-                let ecc_chip = EccChip::<F, FpChip<F>>::construct(fp_chip.clone());
-                let pk_assigned = ecc_chip
-                    .load_private(ctx, (self.pk.map_or(Value::unknown(), |pt| Value::known(pt.x)), self.pk.map_or(Value::unknown(), |pt| Value::known(pt.y))));
-                // test ECDSA
-                let ecdsa = ecdsa_verify_no_pubkey_check::<F, Fp, Fq, Secp256k1Affine>(
-                    &ecc_chip.field_chip,
-                    ctx,
-                    &pk_assigned,
-                    &r_assigned,
-                    &s_assigned,
-                    &m_assigned,
-                    4,
-                    4,
-                );
-
-                // IMPORTANT: this copies cells to the lookup advice column to perform range check lookups
-                // This is not optional.
-                fp_chip.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                if self.r.is_some() {
-                    println!("ECDSA res {ecdsa:?}");
-
-                    ctx.print_stats(&["Range"]);
-                }
-            Ok(())
-        })
-    }
-}
-
-#[cfg(test)]
-#[test]
-fn test_secp256k1_ecdsa() {
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/secp256k1");
-    folder.push("configs/ecdsa_circuit.config");
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/secp256k1/configs/ecdsa_circuit.config file should exist");
-    let params: CircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let K = params.degree;
-
-    // generate random pub key and sign random message
-    let G = Secp256k1Affine::generator();
+fn random_ecdsa_circuit(
+    params: CircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let mut builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
     let sk = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
-    let pubkey = Secp256k1Affine::from(G * sk);
+    let pubkey = Secp256k1Affine::from(Secp256k1Affine::generator() * sk);
     let msg_hash = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
 
     let k = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
     let k_inv = k.invert().unwrap();
 
-    let r_point = Secp256k1Affine::from(G * k).coordinates().unwrap();
+    let r_point = Secp256k1Affine::from(Secp256k1Affine::generator() * k).coordinates().unwrap();
     let x = r_point.x();
     let x_bigint = fe_to_biguint(x);
     let r = biguint_to_fe::<Fq>(&(x_bigint % modulus::<Fq>()));
     let s = k_inv * (msg_hash + (r * sk));
 
-    let circuit = ECDSACircuit::<Fr> {
-        r: Some(r),
-        s: Some(s),
-        msghash: Some(msg_hash),
-        pk: Some(pubkey),
-        G,
-        _marker: PhantomData,
-    };
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    ecdsa_test(builder.main(0), params, r, s, msg_hash, pubkey);
 
-    let prover = MockProver::run(K, &circuit, vec![]).unwrap();
-    //prover.assert_satisfied();
-    assert_eq!(prover.verify(), Ok(()));
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(params.degree as usize, Some(20));
+            RangeCircuitBuilder::mock(builder)
+        }
+        CircuitBuilderStage::Keygen => {
+            builder.config(params.degree as usize, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
-#[cfg(test)]
 #[test]
-fn bench_secp256_ecdsa() -> Result<(), Box<dyn std::error::Error>> {
-    /*
-    // Parameters for use with FpStrategy::CustomVerticalCRT
-    const DEGREE: [u32; 9] = [19, 18, 17, 16, 15, 14, 13, 12, 11];
-    const NUM_ADVICE: [usize; 9] = [1, 2, 3, 6, 12, 25, 49, 98, 201];
-    const NUM_LOOKUP: [usize; 9] = [0, 1, 1, 2, 3, 6, 12, 24, 53];
-    const NUM_FIXED: [usize; 9] = [1, 1, 1, 1, 1, 1, 1, 2, 5];
-    const LOOKUP_BITS: [usize; 9] = [18, 17, 16, 15, 14, 13, 12, 11, 10];
-    const LIMB_BITS: [usize; 9] = [88, 88, 88, 88, 88, 88, 88, 88, 88];
-    */
-
-    use halo2_base::utils::fs::gen_srs;
-
-    use crate::halo2_proofs::{
-        poly::kzg::{
-            commitment::{KZGCommitmentScheme, ParamsKZG},
-            multiopen::{ProverSHPLONK, VerifierSHPLONK},
-            strategy::SingleStrategy,
-        },
-        transcript::{TranscriptReadBuffer, TranscriptWriterBuffer},
-    };
-    use std::{env::set_var, fs, io::BufRead};
+fn test_secp256k1_ecdsa() {
+    let path = "configs/secp256k1/ecdsa_circuit.config";
+    let params: CircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_ecdsa_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
+}
 
+#[test]
+fn bench_secp256k1_ecdsa() -> Result<(), Box<dyn std::error::Error>> {
     let mut rng = OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/secp256k1");
-
-    folder.push("configs/bench_ecdsa.config");
-    let bench_params_file = std::fs::File::open(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/ecdsa_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/secp256k1/bench_ecdsa.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/secp256k1").unwrap();
+    fs::create_dir_all("data").unwrap();
+    let results_path = "results/secp256k1/ecdsa_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: CircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
 
-        {
-            folder.pop();
-            folder.push("configs/ecdsa_circuit.tmp.config");
-            set_var("ECDSA_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params_time = start_timer!(|| "Time elapsed in circuit & params construction");
-        let params = gen_srs(bench_params.degree);
-        let circuit = ECDSACircuit::<Fr>::default();
-        end_timer!(params_time);
+        let params = gen_srs(k);
+        println!("{bench_params:?}");
+
+        let circuit = random_ecdsa_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
-        let vk_time = start_timer!(|| "Time elapsed in generating vkey");
+        let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
 
-        let pk_time = start_timer!(|| "Time elapsed in generating pkey");
+        let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        // generate random pub key and sign random message
-        let G = Secp256k1Affine::generator();
-        let sk = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
-        let pubkey = Secp256k1Affine::from(G * sk);
-        let msg_hash = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
-
-        let k = <Secp256k1Affine as CurveAffine>::ScalarExt::random(OsRng);
-        let k_inv = k.invert().unwrap();
-
-        let r_point = Secp256k1Affine::from(G * k).coordinates().unwrap();
-        let x = r_point.x();
-        let x_bigint = fe_to_biguint(x);
-        let r = biguint_to_fe::<Fq>(&x_bigint);
-        let s = k_inv * (msg_hash + (r * sk));
-
-        let proof_circuit = ECDSACircuit::<Fr> {
-            r: Some(r),
-            s: Some(s),
-            msghash: Some(msg_hash),
-            pk: Some(pubkey),
-            G,
-            _marker: PhantomData,
-        };
-        let mut rng = OsRng;
-
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit =
+            random_ecdsa_circuit(bench_params, CircuitBuilderStage::Prover, Some(break_points));
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -311,14 +171,14 @@ fn bench_secp256_ecdsa() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            ECDSACircuit<Fr>,
-        >(&params, &pk, &[proof_circuit], &[&[]], &mut rng, &mut transcript)?;
+            _,
+        >(&params, &pk, &[circuit], &[&[]], &mut rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "ecdsa_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/ecdsa_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -326,27 +186,27 @@ fn bench_secp256_ecdsa() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.lookup_bits,
                 bench_params.limb_bits,
                 bench_params.num_limbs
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("ECDSA_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,

From d316a820a1eda236666a6748852ad4784b81a6ae Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 11:33:32 -0800
Subject: [PATCH 13/26] chore: fix result println

---
 halo2-ecc/src/bn254/tests/pairing.rs   | 2 +-
 halo2-ecc/src/secp256k1/tests/ecdsa.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/halo2-ecc/src/bn254/tests/pairing.rs b/halo2-ecc/src/bn254/tests/pairing.rs
index e8194f58..703736b7 100644
--- a/halo2-ecc/src/bn254/tests/pairing.rs
+++ b/halo2-ecc/src/bn254/tests/pairing.rs
@@ -111,7 +111,7 @@ fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
 
     let results_path = "results/bn254/pairing_bench.csv";
     let mut fs_results = File::create(results_path).unwrap();
-    writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time")?;
+    writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,proof_time,proof_size,verify_time")?;
 
     let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
diff --git a/halo2-ecc/src/secp256k1/tests/ecdsa.rs b/halo2-ecc/src/secp256k1/tests/ecdsa.rs
index ed171476..739bffc7 100644
--- a/halo2-ecc/src/secp256k1/tests/ecdsa.rs
+++ b/halo2-ecc/src/secp256k1/tests/ecdsa.rs
@@ -137,7 +137,7 @@ fn bench_secp256k1_ecdsa() -> Result<(), Box<dyn std::error::Error>> {
     fs::create_dir_all("data").unwrap();
     let results_path = "results/secp256k1/ecdsa_bench.csv";
     let mut fs_results = File::create(results_path).unwrap();
-    writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time")?;
+    writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,proof_time,proof_size,verify_time")?;
 
     let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {

From 7f8f0548b1b11bcbfbbeac7cc49de17bf28db870 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 12:15:38 -0800
Subject: [PATCH 14/26] chore: update Cargo halo2_proofs_axiom to axiom/dev
 branch

* compatibility update with `halo2_proofs_axiom`
---
 halo2-base/Cargo.toml                          | 2 +-
 halo2-base/src/gates/builder.rs                | 6 +++---
 hashes/zkevm-keccak/src/keccak_packed_multi.rs | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index cf9ededf..a12545b6 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -15,7 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 
 # Use Axiom's custom halo2 monorepo for faster proving when feature = "halo2-axiom" is on
-halo2_proofs_axiom = { git = "https://github.com/axiom-crypto/halo2.git", tag = "v2023_01_17", package = "halo2_proofs", optional = true }
+halo2_proofs_axiom = { git = "https://github.com/axiom-crypto/halo2.git", branch = "axiom/dev", package = "halo2_proofs", optional = true }
 # Use PSE halo2 and halo2curves for compatibility when feature = "halo2-pse" is on
 halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", tag = "v2023_01_20", optional = true }
 
diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index 87749a29..d58c0d2f 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -168,7 +168,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     let column = basic_gate.value;
                     let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
                     #[cfg(feature = "halo2-axiom")]
-                    let cell = region.assign_advice(column, row_offset, value);
+                    let cell = *region.assign_advice(column, row_offset, value).cell();
                     #[cfg(not(feature = "halo2-axiom"))]
                     let cell =
                         region.assign_advice(|| "", column, row_offset, || value).unwrap().cell();
@@ -188,7 +188,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                         #[cfg(feature = "halo2-axiom")]
                         {
                             let ncell = region.assign_advice(column, row_offset, value);
-                            region.constrain_equal(&ncell, &cell);
+                            region.constrain_equal(ncell.cell(), &cell);
                         }
                         #[cfg(not(feature = "halo2-axiom"))]
                         {
@@ -270,7 +270,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     #[cfg(feature = "halo2-axiom")]
                     {
                         let bcell = region.assign_advice(column, lookup_offset, value);
-                        region.constrain_equal(&acell, &bcell);
+                        region.constrain_equal(&acell, bcell.cell());
                     }
                     #[cfg(not(feature = "halo2-axiom"))]
                     {
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi.rs b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
index d474f962..3edc2e1a 100644
--- a/hashes/zkevm-keccak/src/keccak_packed_multi.rs
+++ b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
@@ -395,7 +395,7 @@ pub fn assign_advice_custom<'v, F: Field>(
 ) -> KeccakAssignedValue<'v, F> {
     #[cfg(feature = "halo2-axiom")]
     {
-        region.assign_advice(column, offset, value).unwrap()
+        region.assign_advice(column, offset, value)
     }
     #[cfg(feature = "halo2-pse")]
     {

From 57c3f2ae614b96c407e3a2704c51189ddb58b5db Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Sat, 18 Feb 2023 10:46:47 -0800
Subject: [PATCH 15/26] fix: `GateThreadBuilder::assign_all` now returns
 `HashMap`s of assigned cells for external equality constraints (e.g.,
 instance cells, `AssignedCells` from chips not using halo2-lib).

fix: `assign_all` was not assigning constants as desired: it was
assigning a new constant per context. This leads to confusion and
possible undesired consequences down the line.
---
 halo2-base/src/gates/builder.rs   | 122 +++++++++++++++++-------------
 halo2-base/src/gates/flex_gate.rs |   8 +-
 halo2-base/src/lib.rs             |  23 +-----
 3 files changed, 74 insertions(+), 79 deletions(-)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index d58c0d2f..53480d01 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -4,18 +4,27 @@ use super::{
 };
 use crate::{
     halo2_proofs::{
-        circuit::{Layouter, Region, SimpleFloorPlanner, Value},
+        circuit::{self, Layouter, Region, SimpleFloorPlanner, Value},
         plonk::{Advice, Circuit, Column, ConstraintSystem, Error, Selector},
     },
     utils::ScalarField,
     Context, SKIP_FIRST_PASS,
 };
 use serde::{Deserialize, Serialize};
-use std::{cell::RefCell, collections::HashMap};
+use std::{
+    cell::RefCell,
+    collections::{HashMap, HashSet},
+};
 
 pub type ThreadBreakPoints = Vec<usize>;
 pub type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
 
+pub struct KeygenAssignments<F: ScalarField> {
+    pub assigned_advices: HashMap<(usize, usize), (circuit::Cell, usize)>, // (key = ContextCell, value = (circuit::Cell, row offset))
+    pub assigned_constants: HashMap<F, circuit::Cell>, // (key = constant, value = circuit::Cell)
+    pub break_points: MultiPhaseThreadBreakPoints,
+}
+
 #[derive(Clone, Debug, Default)]
 pub struct GateThreadBuilder<F: ScalarField> {
     /// Threads for each challenge phase
@@ -103,11 +112,10 @@ impl<F: ScalarField> GateThreadBuilder<F> {
             .map(|count| (count + max_rows - 1) / max_rows)
             .collect::<Vec<_>>();
 
-        let total_fixed: usize = self
-            .threads
-            .iter()
-            .map(|threads| threads.iter().map(|ctx| ctx.constants.len()).sum::<usize>())
-            .sum();
+        let total_fixed: usize = HashSet::<F>::from_iter(self.threads.iter().flat_map(|threads| {
+            threads.iter().flat_map(|ctx| ctx.constant_equality_constraints.iter().map(|(c, _)| *c))
+        }))
+        .len();
         let num_fixed = (total_fixed + (1 << k) - 1) >> k;
 
         let params = FlexGateConfigParams {
@@ -137,12 +145,12 @@ impl<F: ScalarField> GateThreadBuilder<F> {
     /// Assigns all advice and fixed cells, turns on selectors, imposes equality constraints.
     /// This should only be called during keygen.
     pub fn assign_all(
-        self,
+        &self,
         config: &FlexGateConfig<F>,
         lookup_advice: &[Vec<Column<Advice>>],
         q_lookup: &[Option<Selector>],
         region: &mut Region<F>,
-    ) -> MultiPhaseThreadBreakPoints {
+    ) -> KeygenAssignments<F> {
         assert!(!self.witness_gen_only);
         let use_unknown = self.use_unknown;
         let max_rows = config.max_rows;
@@ -151,27 +159,26 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         let mut assigned_constants = HashMap::new();
         let mut fixed_col = 0;
         let mut fixed_offset = 0;
-        for (phase, threads) in self.threads.into_iter().enumerate() {
+        for (phase, threads) in self.threads.iter().enumerate() {
             let mut break_point = vec![];
             let mut gate_index = 0;
             let mut row_offset = 0;
-            let mut lookup_offset = 0;
-            let mut lookup_col = 0;
-            for mut ctx in threads {
+            for ctx in threads {
                 let mut basic_gate = config.basic_gates[phase]
                         .get(gate_index)
                         .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
-                ctx.selector.resize(ctx.advice.len(), false);
+                assert_eq!(ctx.selector.len(), ctx.advice.len());
 
-                for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
-                {
+                for (i, (advice, &q)) in ctx.advice.iter().zip(ctx.selector.iter()).enumerate() {
                     let column = basic_gate.value;
                     let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
                     #[cfg(feature = "halo2-axiom")]
                     let cell = *region.assign_advice(column, row_offset, value).cell();
                     #[cfg(not(feature = "halo2-axiom"))]
-                    let cell =
-                        region.assign_advice(|| "", column, row_offset, || value).unwrap().cell();
+                    let cell = region
+                        .assign_advice(|| "", column, row_offset, || value.map(|v| *v))
+                        .unwrap()
+                        .cell();
                     assigned_advices.insert((ctx.context_id, i), (cell, row_offset));
 
                     if (q && row_offset + 4 > max_rows) || row_offset >= max_rows - 1 {
@@ -193,7 +200,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                         #[cfg(not(feature = "halo2-axiom"))]
                         {
                             let ncell = region
-                                .assign_advice(|| "", column, row_offset, || value)
+                                .assign_advice(|| "", column, row_offset, || value.map(|v| *v))
                                 .unwrap()
                                 .cell();
                             region.constrain_equal(ncell, cell).unwrap();
@@ -209,30 +216,38 @@ impl<F: ScalarField> GateThreadBuilder<F> {
 
                     row_offset += 1;
                 }
-                for (c, i) in ctx.constants.into_iter() {
-                    #[cfg(feature = "halo2-axiom")]
-                    let cell = region.assign_fixed(config.constants[fixed_col], fixed_offset, c);
-                    #[cfg(not(feature = "halo2-axiom"))]
-                    let cell = region
-                        .assign_fixed(
-                            || "",
-                            config.constants[fixed_col],
-                            fixed_offset,
-                            || Value::known(c),
-                        )
-                        .unwrap()
-                        .cell();
-                    assigned_constants.insert((ctx.context_id, i), cell);
-                    fixed_col += 1;
-                    if fixed_col >= config.constants.len() {
-                        fixed_col = 0;
-                        fixed_offset += 1;
+                for (c, _) in ctx.constant_equality_constraints.iter() {
+                    if assigned_constants.get(c).is_none() {
+                        #[cfg(feature = "halo2-axiom")]
+                        let cell =
+                            region.assign_fixed(config.constants[fixed_col], fixed_offset, c);
+                        #[cfg(not(feature = "halo2-axiom"))]
+                        let cell = region
+                            .assign_fixed(
+                                || "",
+                                config.constants[fixed_col],
+                                fixed_offset,
+                                || Value::known(*c),
+                            )
+                            .unwrap()
+                            .cell();
+                        assigned_constants.insert(*c, cell);
+                        fixed_col += 1;
+                        if fixed_col >= config.constants.len() {
+                            fixed_col = 0;
+                            fixed_offset += 1;
+                        }
                     }
                 }
-
-                // warning: currently we assume equality constraints in thread i only involves threads <= i
-                // I guess a fix is to just rerun this several times?
-                for (left, right) in ctx.advice_equality_constraints {
+            }
+            break_points.push(break_point);
+        }
+        // we constrain equality constraints in a separate loop in case context `i` contains references to context `j` for `j > i`
+        for (phase, threads) in self.threads.iter().enumerate() {
+            let mut lookup_offset = 0;
+            let mut lookup_col = 0;
+            for ctx in threads {
+                for (left, right) in &ctx.advice_equality_constraints {
                     let (left, _) = assigned_advices[&(left.context_id, left.offset)];
                     let (right, _) = assigned_advices[&(right.context_id, right.offset)];
                     #[cfg(feature = "halo2-axiom")]
@@ -240,8 +255,8 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     #[cfg(not(feature = "halo2-axiom"))]
                     region.constrain_equal(left, right).unwrap();
                 }
-                for (left, right) in ctx.constant_equality_constraints {
-                    let left = assigned_constants[&(left.context_id, left.offset)];
+                for (left, right) in &ctx.constant_equality_constraints {
+                    let left = assigned_constants[left];
                     let (right, _) = assigned_advices[&(right.context_id, right.offset)];
                     #[cfg(feature = "halo2-axiom")]
                     region.constrain_equal(&left, &right);
@@ -249,7 +264,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     region.constrain_equal(left, right).unwrap();
                 }
 
-                for advice in ctx.cells_to_lookup {
+                for advice in &ctx.cells_to_lookup {
                     // if q_lookup is Some, that means there should be a single advice column and it has lookup enabled
                     let cell = advice.cell.unwrap();
                     let (acell, row_offset) = assigned_advices[&(cell.context_id, cell.offset)];
@@ -283,9 +298,8 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     lookup_offset += 1;
                 }
             }
-            break_points.push(break_point);
         }
-        break_points
+        KeygenAssignments { assigned_advices, assigned_constants, break_points }
     }
 }
 
@@ -433,7 +447,7 @@ impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
                         );
                     }
                     *self.break_points.borrow_mut() =
-                        builder.assign_all(&config, &[], &[], &mut region);
+                        builder.assign_all(&config, &[], &[], &mut region).break_points;
                 } else {
                     let builder = self.builder.take();
                     let break_points = self.break_points.take();
@@ -530,12 +544,14 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                             "GateCircuitBuilder only supports FirstPhase for now"
                         );
                     }
-                    *self.0.break_points.borrow_mut() = builder.assign_all(
-                        &config.gate,
-                        &config.lookup_advice,
-                        &config.q_lookup,
-                        &mut region,
-                    );
+                    *self.0.break_points.borrow_mut() = builder
+                        .assign_all(
+                            &config.gate,
+                            &config.lookup_advice,
+                            &config.q_lookup,
+                            &mut region,
+                        )
+                        .break_points;
                 } else {
                     #[cfg(feature = "display")]
                     let start0 = std::time::Instant::now();
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index 05317338..5e762ed5 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -7,7 +7,7 @@ use crate::{
         poly::Rotation,
     },
     utils::ScalarField,
-    AssignedValue, Context, ContextCell,
+    AssignedValue, Context,
     QuantumCell::{self, Constant, Existing, Witness, WitnessFraction},
 };
 use serde::{Deserialize, Serialize};
@@ -238,11 +238,7 @@ pub trait GateInstructions<F: ScalarField> {
 
     fn assert_is_const(&self, ctx: &mut Context<F>, a: &AssignedValue<F>, constant: &F) {
         if !ctx.witness_gen_only {
-            let c_index = ctx.assign_fixed(*constant);
-            ctx.constant_equality_constraints.push((
-                ContextCell { context_id: ctx.context_id, offset: c_index },
-                a.cell.unwrap(),
-            ));
+            ctx.constant_equality_constraints.push((*constant, a.cell.unwrap()));
         }
     }
 
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index 3b7523ae..0d117988 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -32,7 +32,6 @@ pub use halo2_proofs;
 pub use halo2_proofs_axiom as halo2_proofs;
 
 use halo2_proofs::plonk::Assigned;
-use std::collections::HashMap;
 use utils::ScalarField;
 
 pub mod gates;
@@ -121,15 +120,13 @@ pub struct Context<F: ScalarField> {
     // ========================================
     // General principle: we don't need to optimize anything specific to `witness_gen_only == false` because it is only done during keygen
     // If `witness_gen_only == false`:
-    /// the constants used in this context
-    pub constants: HashMap<F, usize>,
     /// one selector column accompanying each advice column, should have same length as `advice`
     pub selector: Vec<bool>,
     // TODO: gates that use fixed columns as selectors?
     /// A pair of context cells, both assumed to be `advice`, that must be constrained equal
     pub advice_equality_constraints: Vec<(ContextCell, ContextCell)>,
-    /// A pair of context cells, where the first is in `constant` and the second in `advice` that must be constrained equal
-    pub constant_equality_constraints: Vec<(ContextCell, ContextCell)>,
+    /// A pair of (constant, advice_cell) that must be constrained equal
+    pub constant_equality_constraints: Vec<(F, ContextCell)>,
 }
 
 impl<F: ScalarField> Context<F> {
@@ -140,7 +137,6 @@ impl<F: ScalarField> Context<F> {
             advice: Vec::new(),
             cells_to_lookup: Vec::new(),
             zero_cell: None,
-            constants: HashMap::new(),
             selector: Vec::new(),
             advice_equality_constraints: Vec::new(),
             constant_equality_constraints: Vec::new(),
@@ -151,17 +147,6 @@ impl<F: ScalarField> Context<F> {
         self.witness_gen_only
     }
 
-    pub fn assign_fixed(&mut self, c: F) -> usize {
-        let index = self.constants.get(&c);
-        if let Some(index) = index {
-            *index
-        } else {
-            let index = self.constants.len();
-            self.constants.insert(c, index);
-            index
-        }
-    }
-
     /// Push a `QuantumCell` onto the stack of advice cells to be assigned
     pub fn assign_cell(&mut self, input: impl Into<QuantumCell<F>>) {
         match input.into() {
@@ -182,11 +167,9 @@ impl<F: ScalarField> Context<F> {
             QuantumCell::Constant(c) => {
                 self.advice.push(Assigned::Trivial(c));
                 if !self.witness_gen_only {
-                    let c_cell =
-                        ContextCell { context_id: self.context_id, offset: self.assign_fixed(c) };
                     let new_cell =
                         ContextCell { context_id: self.context_id, offset: self.advice.len() - 1 };
-                    self.constant_equality_constraints.push((c_cell, new_cell));
+                    self.constant_equality_constraints.push((c, new_cell));
                 }
             }
         }

From 3763ba740c98b342a86745e8a8b65b654d4f26ce Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Sun, 19 Feb 2023 12:43:45 -0800
Subject: [PATCH 16/26] chore: update halo2-pse tag

---
 halo2-base/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index a12545b6..d151cf14 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -17,7 +17,7 @@ serde_json = "1.0"
 # Use Axiom's custom halo2 monorepo for faster proving when feature = "halo2-axiom" is on
 halo2_proofs_axiom = { git = "https://github.com/axiom-crypto/halo2.git", branch = "axiom/dev", package = "halo2_proofs", optional = true }
 # Use PSE halo2 and halo2curves for compatibility when feature = "halo2-pse" is on
-halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", tag = "v2023_01_20", optional = true }
+halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", tag = "v2023_02_02", optional = true }
 
 # plotting circuit layout
 plotters = { version = "0.3.0", optional = true }

From 7a7b3fb77dfb5e782b3c9193cc1d382dde165bf0 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Mon, 20 Feb 2023 23:10:52 -0800
Subject: [PATCH 17/26] feat: `GateThreadBuilder::assign_all` takes
 assigned_{advices,constants} as input instead of new hashmap, in case we want
 to constrain equalities for cells not belonging to this builder

---
 halo2-base/src/gates/builder.rs | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index 53480d01..9c97515a 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -19,6 +19,7 @@ use std::{
 pub type ThreadBreakPoints = Vec<usize>;
 pub type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
 
+#[derive(Clone, Debug, Default)]
 pub struct KeygenAssignments<F: ScalarField> {
     pub assigned_advices: HashMap<(usize, usize), (circuit::Cell, usize)>, // (key = ContextCell, value = (circuit::Cell, row offset))
     pub assigned_constants: HashMap<F, circuit::Cell>, // (key = constant, value = circuit::Cell)
@@ -150,13 +151,15 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         lookup_advice: &[Vec<Column<Advice>>],
         q_lookup: &[Option<Selector>],
         region: &mut Region<F>,
+        KeygenAssignments {
+            mut assigned_advices,
+            mut assigned_constants,
+            mut break_points
+        }: KeygenAssignments<F>,
     ) -> KeygenAssignments<F> {
         assert!(!self.witness_gen_only);
         let use_unknown = self.use_unknown;
         let max_rows = config.max_rows;
-        let mut break_points = vec![];
-        let mut assigned_advices = HashMap::new();
-        let mut assigned_constants = HashMap::new();
         let mut fixed_col = 0;
         let mut fixed_offset = 0;
         for (phase, threads) in self.threads.iter().enumerate() {
@@ -446,8 +449,9 @@ impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
                             "GateCircuitBuilder only supports FirstPhase for now"
                         );
                     }
-                    *self.break_points.borrow_mut() =
-                        builder.assign_all(&config, &[], &[], &mut region).break_points;
+                    *self.break_points.borrow_mut() = builder
+                        .assign_all(&config, &[], &[], &mut region, Default::default())
+                        .break_points;
                 } else {
                     let builder = self.builder.take();
                     let break_points = self.break_points.take();
@@ -550,6 +554,7 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                             &config.lookup_advice,
                             &config.q_lookup,
                             &mut region,
+                            Default::default(),
                         )
                         .break_points;
                 } else {

From d8bb38483ca9ce2d4a5c574e3bd1ba5f1e887e3f Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Tue, 21 Feb 2023 00:44:33 -0800
Subject: [PATCH 18/26] chore: expose gate_builder.unknown

---
 halo2-base/src/gates/builder.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index 9c97515a..ba2de5d0 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -71,6 +71,10 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         self.witness_gen_only
     }
 
+    pub fn use_unknown(&self) -> bool {
+        self.use_unknown
+    }
+
     pub fn thread_count(&self) -> usize {
         self.thread_count
     }

From 44b13b7a383c5856d1cc1bff04805f41bda01635 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Sun, 26 Feb 2023 18:17:24 -0800
Subject: [PATCH 19/26] BUG: `GateChip::idx_to_indicator` still had soundness
 bug where at index `idx` the value could be 0 or 1 (instead of only 1)

---
 halo2-base/src/gates/flex_gate.rs | 58 +++++++++++++++----------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index 5e762ed5..1d92e366 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -482,35 +482,35 @@ pub trait GateInstructions<F: ScalarField> {
         len: usize,
     ) -> Vec<AssignedValue<F>> {
         let mut idx = idx.into();
-        let mut ind = Vec::with_capacity(len);
-        let idx_val = idx.value().get_lower_32() as usize;
-        for i in 0..len {
-            // check ind[i] * (i - idx) == 0
-            let ind_val = F::from(idx_val == i);
-            let val = if idx_val == i { *idx.value() } else { F::zero() };
-            ctx.assign_region_smart(
-                [
-                    Constant(F::zero()),
-                    Witness(ind_val),
-                    idx,
-                    Witness(val),
-                    Constant(-F::from(i as u64)),
-                    Witness(ind_val),
-                    Constant(F::zero()),
-                ],
-                [0, 3],
-                [(1, 5)],
-                [],
-            );
-            // need to use assigned idx after i > 0 so equality constraint holds
-            if i == 0 {
-                idx = Existing(ctx.get(-5));
-            }
-            let ind_cell = ctx.get(-2);
-            self.assert_bit(ctx, ind_cell);
-            ind.push(ind_cell);
-        }
-        ind
+        (0..len)
+            .map(|i| {
+                // need to use assigned idx after i > 0 so equality constraint holds
+                if i == 0 {
+                    // unroll `is_zero` to make sure if `idx == Witness(_)` it is replaced by `Existing(_)` in later iterations
+                    let x = idx.value();
+                    let (is_zero, inv) = if x.is_zero_vartime() {
+                        (F::one(), Assigned::Trivial(F::one()))
+                    } else {
+                        (F::zero(), Assigned::Rational(F::one(), *x))
+                    };
+                    let cells = [
+                        Witness(is_zero),
+                        idx,
+                        WitnessFraction(inv),
+                        Constant(F::one()),
+                        Constant(F::zero()),
+                        idx,
+                        Witness(is_zero),
+                        Constant(F::zero()),
+                    ];
+                    ctx.assign_region_smart(cells, [0, 4], [(0, 6), (1, 5)], []); // note the two `idx` need to be constrained equal: (1, 5)
+                    idx = Existing(ctx.get(-3)); // replacing `idx` with Existing cell so future loop iterations constrain equality of all `idx`s
+                    ctx.get(-2)
+                } else {
+                    self.is_equal(ctx, idx, Constant(self.get_field_element(i as u64)))
+                }
+            })
+            .collect()
     }
 
     // performs inner product on a, indicator

From 0bd6ea00b0de3d4f761c4794d3cb829ea1f66a4e Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Wed, 8 Mar 2023 20:37:00 -0800
Subject: [PATCH 20/26] chore: update halo2-ecc version to 0.3.0

---
 halo2-ecc/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/halo2-ecc/Cargo.toml b/halo2-ecc/Cargo.toml
index 0d5041b2..a2b4fe07 100644
--- a/halo2-ecc/Cargo.toml
+++ b/halo2-ecc/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "halo2-ecc"
-version = "0.2.2"
+version = "0.3.0"
 edition = "2021"
 
 [dependencies]

From 38e1c650b61debd64c246f8b06f5238a0af20e55 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 17 Mar 2023 08:56:58 -0700
Subject: [PATCH 21/26] BUG: `FpChip::assert_equal` had `a` instead of `b` typo

---
 halo2-ecc/src/fields/fp.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/halo2-ecc/src/fields/fp.rs b/halo2-ecc/src/fields/fp.rs
index a97f1d11..6099a147 100644
--- a/halo2-ecc/src/fields/fp.rs
+++ b/halo2-ecc/src/fields/fp.rs
@@ -368,7 +368,7 @@ impl<'range, F: PrimeField, Fp: PrimeField> FieldChip<F> for FpChip<'range, F, F
         self.enforce_less_than_p(ctx, a);
         self.enforce_less_than_p(ctx, b);
         // a.native and b.native are derived from `a.truncation, b.truncation`, so no need to check if they're equal
-        for (limb_a, limb_b) in a.truncation.limbs.iter().zip(a.truncation.limbs.iter()) {
+        for (limb_a, limb_b) in a.truncation.limbs.iter().zip(b.truncation.limbs.iter()) {
             ctx.constrain_equal(limb_a, limb_b);
         }
     }

From 530e744232860641f9533c9b9f8c1fee57f54cab Mon Sep 17 00:00:00 2001
From: zhenfei <zhenfei.zhang@hotmail.com>
Date: Tue, 28 Mar 2023 22:07:29 -0400
Subject: [PATCH 22/26] [chore] cargo fmt; update .gitignore

---
 .gitignore                      | 5 +++++
 halo2-ecc/src/bigint/mod.rs     | 5 +----
 halo2-ecc/src/ecc/fixed_base.rs | 3 +--
 halo2-ecc/src/ecc/mod.rs        | 5 ++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index 65983083..5d74e42d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,8 @@ Cargo.lock
 
 /halo2_ecc/src/bn254/data/
 /halo2_ecc/src/secp256k1/data/
+
+# test and bench results
+*.data
+*.csv
+*.srs
\ No newline at end of file
diff --git a/halo2-ecc/src/bigint/mod.rs b/halo2-ecc/src/bigint/mod.rs
index f7f2886c..0684e54e 100644
--- a/halo2-ecc/src/bigint/mod.rs
+++ b/halo2-ecc/src/bigint/mod.rs
@@ -24,8 +24,7 @@ pub mod select_by_indicator;
 pub mod sub;
 pub mod sub_no_carry;
 
-#[derive(Clone, Debug, PartialEq)]
-#[derive(Default)]
+#[derive(Clone, Debug, PartialEq, Default)]
 pub enum BigIntStrategy {
     // use existing gates
     #[default]
@@ -35,8 +34,6 @@ pub enum BigIntStrategy {
     // CustomVerticalShort,
 }
 
-
-
 #[derive(Clone, Debug)]
 pub struct OverflowInteger<F: ScalarField> {
     pub limbs: Vec<AssignedValue<F>>,
diff --git a/halo2-ecc/src/ecc/fixed_base.rs b/halo2-ecc/src/ecc/fixed_base.rs
index c69a4f31..9249d878 100644
--- a/halo2-ecc/src/ecc/fixed_base.rs
+++ b/halo2-ecc/src/ecc/fixed_base.rs
@@ -230,8 +230,7 @@ where
         .chunks(cached_points.len() / points.len())
         .zip(bits.chunks(total_bits))
         .map(|(cached_points, bits)| {
-            let cached_point_window_rev =
-                cached_points.chunks(1usize << window_bits).rev();
+            let cached_point_window_rev = cached_points.chunks(1usize << window_bits).rev();
             let bit_window_rev = bits.chunks(window_bits).rev();
             let mut curr_point = None;
             // `is_started` is just a way to deal with if `curr_point` is actually identity
diff --git a/halo2-ecc/src/ecc/mod.rs b/halo2-ecc/src/ecc/mod.rs
index 1f83042d..a343ed65 100644
--- a/halo2-ecc/src/ecc/mod.rs
+++ b/halo2-ecc/src/ecc/mod.rs
@@ -524,9 +524,8 @@ where
         for _ in 0..window_bits {
             curr_point = ec_double(chip, ctx, &curr_point);
         }
-        for (cached_points, rounded_bits) in cached_points
-            .chunks(cache_size)
-            .zip(rounded_bits.chunks(rounded_bitlen))
+        for (cached_points, rounded_bits) in
+            cached_points.chunks(cache_size).zip(rounded_bits.chunks(rounded_bitlen))
         {
             let add_point = ec_select_from_bits::<F, FC>(
                 chip,

From 8a4b9e416cf5bec7ee48b9108db416b939a62a80 Mon Sep 17 00:00:00 2001
From: zhenfei <zhenfei.zhang@hotmail.com>
Date: Wed, 22 Mar 2023 16:40:57 -0400
Subject: [PATCH 23/26] sync scroll-dev-0220

---
 Cargo.toml                                | 5 +++++
 halo2-base/Cargo.toml                     | 2 +-
 halo2-ecc/Cargo.toml                      | 2 +-
 hashes/zkevm-keccak/Cargo.toml            | 2 +-
 hashes/zkevm-keccak/src/util/eth_types.rs | 7 +++----
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9d8d2d5c..1e7d4ad9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,3 +43,8 @@ debug = true
 [patch."https://github.com/axiom-crypto/halo2-lib.git"]
 halo2-base = { path = "./halo2-base" }
 halo2-ecc = { path = "./halo2-ecc" }
+
+[patch."https://github.com/privacy-scaling-explorations/halo2.git"]
+halo2_proofs = { git = "https://github.com/scroll-tech/halo2.git", branch = "scroll-dev-0220"  }
+[patch."https://github.com/privacy-scaling-explorations/poseidon.git"]
+poseidon = { git = "https://github.com/scroll-tech/poseidon.git", branch = "scroll-dev-0220" }
\ No newline at end of file
diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index d151cf14..b25cb43e 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -38,7 +38,7 @@ jemallocator = { version = "0.5", optional = true }
 mimalloc = { version = "0.1", default-features = false, optional = true }
 
 [features]
-default = ["halo2-axiom", "display"]
+default = ["halo2-pse", "display"]
 dev-graph = ["halo2_proofs?/dev-graph", "halo2_proofs_axiom?/dev-graph", "plotters"]
 halo2-pse = ["halo2_proofs"]
 halo2-axiom = ["halo2_proofs_axiom"]
diff --git a/halo2-ecc/Cargo.toml b/halo2-ecc/Cargo.toml
index a2b4fe07..f543f5c7 100644
--- a/halo2-ecc/Cargo.toml
+++ b/halo2-ecc/Cargo.toml
@@ -28,7 +28,7 @@ criterion = "0.4"
 criterion-macro = "0.4"
 
 [features]
-default = ["jemallocator", "halo2-axiom", "display"]
+default = ["jemallocator", "halo2-pse", "display"]
 dev-graph = ["halo2-base/dev-graph"]
 display = ["halo2-base/display"]
 halo2-pse = ["halo2-base/halo2-pse"]
diff --git a/hashes/zkevm-keccak/Cargo.toml b/hashes/zkevm-keccak/Cargo.toml
index 3b35b7a3..b1bb9344 100644
--- a/hashes/zkevm-keccak/Cargo.toml
+++ b/hashes/zkevm-keccak/Cargo.toml
@@ -27,7 +27,7 @@ rand_xorshift = "0.3"
 env_logger = "0.10"
 
 [features]
-default = ["halo2-axiom", "display"]
+default = ["halo2-pse", "display"]
 display = ["halo2-base/display"]
 halo2-pse = ["halo2-base/halo2-pse"]
 halo2-axiom = ["halo2-base/halo2-axiom"]
diff --git a/hashes/zkevm-keccak/src/util/eth_types.rs b/hashes/zkevm-keccak/src/util/eth_types.rs
index 3217f810..5a072987 100644
--- a/hashes/zkevm-keccak/src/util/eth_types.rs
+++ b/hashes/zkevm-keccak/src/util/eth_types.rs
@@ -1,7 +1,6 @@
 //! Ethereum and Evm types used to deserialize responses from web3 / geth.
 
-use crate::halo2_proofs::halo2curves::group::ff::PrimeField;
-use halo2_base::utils::BigPrimeField;
+use halo2_base::utils::PrimeField;
 
 use ethers_core::types;
 pub use ethers_core::types::{
@@ -11,10 +10,10 @@ pub use ethers_core::types::{
 
 /// Trait used to reduce verbosity with the declaration of the [`FieldExt`]
 /// trait and its repr.
-pub trait Field: BigPrimeField + PrimeField<Repr = [u8; 32]> {}
+pub trait Field: PrimeField {}
 
 // Impl custom `Field` trait
-impl<F> Field for F where F: BigPrimeField + PrimeField<Repr = [u8; 32]> {}
+impl<F> Field for F where F: PrimeField {}
 
 /// Trait used to define types that can be converted to a 256 bit scalar value.
 pub trait ToScalar<F> {

From df741bb417b67f79bdce9241106801c95abe4ce5 Mon Sep 17 00:00:00 2001
From: zhenfei <zhenfei.zhang@hotmail.com>
Date: Thu, 23 Mar 2023 16:53:44 -0400
Subject: [PATCH 24/26] [chore] update cargo.toml

---
 Cargo.toml                                           | 10 +++++-----
 halo2-ecc/configs/secp256k1/ecdsa_circuit.tmp.config |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 halo2-ecc/configs/secp256k1/ecdsa_circuit.tmp.config

diff --git a/Cargo.toml b/Cargo.toml
index 1e7d4ad9..704d3e77 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,12 +39,12 @@ incremental = false
 inherits = "release"
 debug = true
 
-# patch so snark-verifier uses this crate's halo2-base 
-[patch."https://github.com/axiom-crypto/halo2-lib.git"]
-halo2-base = { path = "./halo2-base" }
-halo2-ecc = { path = "./halo2-ecc" }
+# # patch so snark-verifier uses this crate's halo2-base 
+# [patch."https://github.com/axiom-crypto/halo2-lib.git"]
+# halo2-base = { path = "./halo2-base" }
+# halo2-ecc = { path = "./halo2-ecc" }
 
 [patch."https://github.com/privacy-scaling-explorations/halo2.git"]
-halo2_proofs = { git = "https://github.com/scroll-tech/halo2.git", branch = "scroll-dev-0220"  }
+halo2_proofs = { git = "https://github.com/scroll-tech/halo2.git", branch = "halo2-ecc-snark-verifier-0323"  }
 [patch."https://github.com/privacy-scaling-explorations/poseidon.git"]
 poseidon = { git = "https://github.com/scroll-tech/poseidon.git", branch = "scroll-dev-0220" }
\ No newline at end of file
diff --git a/halo2-ecc/configs/secp256k1/ecdsa_circuit.tmp.config b/halo2-ecc/configs/secp256k1/ecdsa_circuit.tmp.config
new file mode 100644
index 00000000..32aab180
--- /dev/null
+++ b/halo2-ecc/configs/secp256k1/ecdsa_circuit.tmp.config
@@ -0,0 +1 @@
+{"strategy":"Simple","degree":19,"num_advice":1,"num_lookup_advice":1,"num_fixed":1,"lookup_bits":18,"limb_bits":88,"num_limbs":3}
\ No newline at end of file

From b1f53b28bb5c8391c2f0143f42a5596ca86602da Mon Sep 17 00:00:00 2001
From: zhenfei <zhenfei.zhang@hotmail.com>
Date: Tue, 28 Mar 2023 22:23:17 -0400
Subject: [PATCH 25/26] [chore] udpate cargo

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 704d3e77..a37bc30e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,7 @@
 members = [
     "halo2-base",
     "halo2-ecc",
-    "hashes/zkevm-keccak",
+    # "hashes/zkevm-keccak",
 ]
 
 [profile.dev]

From 123b53b9d0ace91ae8d2fd240f2e607457dcd1a2 Mon Sep 17 00:00:00 2001
From: zhenfei <zhenfei.zhang@hotmail.com>
Date: Wed, 12 Apr 2023 17:49:31 -0400
Subject: [PATCH 26/26] [chore] rename halo2-pse to halo2-scroll

---
 Cargo.toml                                     |  5 -----
 README.md                                      |  4 ++--
 halo2-base/Cargo.toml                          |  6 +++---
 halo2-base/src/lib.rs                          |  8 ++++----
 halo2-base/src/utils.rs                        | 16 ++++++++--------
 halo2-ecc/Cargo.toml                           |  4 ++--
 hashes/zkevm-keccak/Cargo.toml                 |  2 +-
 hashes/zkevm-keccak/src/keccak_packed_multi.rs |  4 ++--
 8 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index a37bc30e..6a9193a5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,11 +39,6 @@ incremental = false
 inherits = "release"
 debug = true
 
-# # patch so snark-verifier uses this crate's halo2-base 
-# [patch."https://github.com/axiom-crypto/halo2-lib.git"]
-# halo2-base = { path = "./halo2-base" }
-# halo2-ecc = { path = "./halo2-ecc" }
-
 [patch."https://github.com/privacy-scaling-explorations/halo2.git"]
 halo2_proofs = { git = "https://github.com/scroll-tech/halo2.git", branch = "halo2-ecc-snark-verifier-0323"  }
 [patch."https://github.com/privacy-scaling-explorations/poseidon.git"]
diff --git a/README.md b/README.md
index 34a27e8b..d0b799f9 100644
--- a/README.md
+++ b/README.md
@@ -58,10 +58,10 @@ The default features are: "jemallocator", "halo2-axiom", "display".
 You can turn off "display" for a very small performance increase, where certain statistics about the circuit are not
 computed and printed.
 
-**Exactly one** of "halo2-axiom" or "halo2-pse" feature should be turned on at all times.
+**Exactly one** of "halo2-axiom" or "halo2-scroll" feature should be turned on at all times.
 
 - The "halo2-axiom" feature uses our [`halo2_proofs`](https://github.com/axiom-crypto/halo2) which is a fork of the [PSE one](https://github.com/privacy-scaling-explorations/halo2) which we have slightly optimized for proving speed.
-- The "halo2-pse" feature uses the Privacy Scaling Explorations [`halo2_proofs`](https://github.com/privacy-scaling-explorations/halo2) which is the most stable and has the most reviewers.
+- The "halo2-scroll" feature uses the Privacy Scaling Explorations [`halo2_proofs`](https://github.com/privacy-scaling-explorations/halo2) which is the most stable and has the most reviewers.
 
 We guarantee that the proofs generated by the two forks are identical.
 
diff --git a/halo2-base/Cargo.toml b/halo2-base/Cargo.toml
index b25cb43e..d7882107 100644
--- a/halo2-base/Cargo.toml
+++ b/halo2-base/Cargo.toml
@@ -16,7 +16,7 @@ serde_json = "1.0"
 
 # Use Axiom's custom halo2 monorepo for faster proving when feature = "halo2-axiom" is on
 halo2_proofs_axiom = { git = "https://github.com/axiom-crypto/halo2.git", branch = "axiom/dev", package = "halo2_proofs", optional = true }
-# Use PSE halo2 and halo2curves for compatibility when feature = "halo2-pse" is on
+# Use PSE halo2 and halo2curves for compatibility when feature = "halo2-scroll" is on
 halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", tag = "v2023_02_02", optional = true }
 
 # plotting circuit layout
@@ -38,9 +38,9 @@ jemallocator = { version = "0.5", optional = true }
 mimalloc = { version = "0.1", default-features = false, optional = true }
 
 [features]
-default = ["halo2-pse", "display"]
+default = ["halo2-scroll", "display"]
 dev-graph = ["halo2_proofs?/dev-graph", "halo2_proofs_axiom?/dev-graph", "plotters"]
-halo2-pse = ["halo2_proofs"]
+halo2-scroll = ["halo2_proofs"]
 halo2-axiom = ["halo2_proofs_axiom"]
 display = []
 profile = ["halo2_proofs_axiom?/profile"]
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index 0d117988..b468446a 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -18,15 +18,15 @@ use mimalloc::MiMalloc;
 #[global_allocator]
 static GLOBAL: MiMalloc = MiMalloc;
 
-#[cfg(all(feature = "halo2-pse", feature = "halo2-axiom"))]
+#[cfg(all(feature = "halo2-scroll", feature = "halo2-axiom"))]
 compile_error!(
     "Cannot have both \"halo2-pse\" and \"halo2-axiom\" features enabled at the same time!"
 );
-#[cfg(not(any(feature = "halo2-pse", feature = "halo2-axiom")))]
+#[cfg(not(any(feature = "halo2-scroll", feature = "halo2-axiom")))]
 compile_error!("Must enable exactly one of \"halo2-pse\" or \"halo2-axiom\" features to choose which halo2_proofs crate to use.");
 
 // use gates::flex_gate::MAX_PHASE;
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 pub use halo2_proofs;
 #[cfg(feature = "halo2-axiom")]
 pub use halo2_proofs_axiom as halo2_proofs;
@@ -39,7 +39,7 @@ pub mod utils;
 
 #[cfg(feature = "halo2-axiom")]
 pub const SKIP_FIRST_PASS: bool = false;
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 pub const SKIP_FIRST_PASS: bool = true;
 
 #[derive(Clone, Copy, Debug)]
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 152971ac..149ef544 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -1,4 +1,4 @@
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 use crate::halo2_proofs::arithmetic::CurveAffine;
 use crate::halo2_proofs::{arithmetic::FieldExt, circuit::Value};
 use core::hash::Hash;
@@ -50,10 +50,10 @@ where
 
 // Later: will need to separate BigPrimeField from ScalarField when Goldilocks is introduced
 
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 pub trait BigPrimeField = FieldExt<Repr = [u8; 32]> + Hash;
 
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 pub trait ScalarField = FieldExt + Hash;
 
 #[inline(always)]
@@ -117,7 +117,7 @@ pub fn biguint_to_fe<F: BigPrimeField>(e: &BigUint) -> F {
         F::from_u64_digits(&e.to_u64_digits())
     }
 
-    #[cfg(feature = "halo2-pse")]
+    #[cfg(feature = "halo2-scroll")]
     {
         let mut repr = F::Repr::default();
         let bytes = e.to_bytes_le();
@@ -137,7 +137,7 @@ pub fn bigint_to_fe<F: BigPrimeField>(e: &BigInt) -> F {
             F::from_u64_digits(&digits)
         }
     }
-    #[cfg(feature = "halo2-pse")]
+    #[cfg(feature = "halo2-scroll")]
     {
         let (sign, bytes) = e.to_bytes_le();
         let mut repr = F::Repr::default();
@@ -185,7 +185,7 @@ pub fn decompose_fe_to_u64_limbs<F: ScalarField>(
         e.to_u64_limbs(number_of_limbs, bit_len)
     }
 
-    #[cfg(feature = "halo2-pse")]
+    #[cfg(feature = "halo2-scroll")]
     {
         decompose_u64_digits_to_limbs(fe_to_biguint(e).iter_u64_digits(), number_of_limbs, bit_len)
     }
@@ -261,7 +261,7 @@ pub fn compose(input: Vec<BigUint>, bit_len: usize) -> BigUint {
 #[cfg(feature = "halo2-axiom")]
 pub use halo2_proofs_axiom::halo2curves::CurveAffineExt;
 
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 pub trait CurveAffineExt: CurveAffine {
     /// Unlike the `Coordinates` trait, this just returns the raw affine coordinantes without checking `is_on_curve`
     fn into_coordinates(self) -> (Self::Base, Self::Base) {
@@ -269,7 +269,7 @@ pub trait CurveAffineExt: CurveAffine {
         (*coordinates.x(), *coordinates.y())
     }
 }
-#[cfg(feature = "halo2-pse")]
+#[cfg(feature = "halo2-scroll")]
 impl<C: CurveAffine> CurveAffineExt for C {}
 
 pub mod fs {
diff --git a/halo2-ecc/Cargo.toml b/halo2-ecc/Cargo.toml
index f543f5c7..15a05f0f 100644
--- a/halo2-ecc/Cargo.toml
+++ b/halo2-ecc/Cargo.toml
@@ -28,10 +28,10 @@ criterion = "0.4"
 criterion-macro = "0.4"
 
 [features]
-default = ["jemallocator", "halo2-pse", "display"]
+default = ["jemallocator", "halo2-scroll", "display"]
 dev-graph = ["halo2-base/dev-graph"]
 display = ["halo2-base/display"]
-halo2-pse = ["halo2-base/halo2-pse"]
+halo2-scroll = ["halo2-base/halo2-scroll"]
 halo2-axiom = ["halo2-base/halo2-axiom"]
 jemallocator = ["halo2-base/jemallocator"]
 mimalloc = ["halo2-base/mimalloc"]
diff --git a/hashes/zkevm-keccak/Cargo.toml b/hashes/zkevm-keccak/Cargo.toml
index b1bb9344..413a2b9c 100644
--- a/hashes/zkevm-keccak/Cargo.toml
+++ b/hashes/zkevm-keccak/Cargo.toml
@@ -27,7 +27,7 @@ rand_xorshift = "0.3"
 env_logger = "0.10"
 
 [features]
-default = ["halo2-pse", "display"]
+default = ["halo2-scroll", "display"]
 display = ["halo2-base/display"]
 halo2-pse = ["halo2-base/halo2-pse"]
 halo2-axiom = ["halo2-base/halo2-axiom"]
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi.rs b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
index 3edc2e1a..33b975e1 100644
--- a/hashes/zkevm-keccak/src/keccak_packed_multi.rs
+++ b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
@@ -397,7 +397,7 @@ pub fn assign_advice_custom<'v, F: Field>(
     {
         region.assign_advice(column, offset, value)
     }
-    #[cfg(feature = "halo2-pse")]
+    #[cfg(feature = "halo2-scroll")]
     {
         region
             .assign_advice(|| format!("assign advice {}", offset), column, offset, || value)
@@ -415,7 +415,7 @@ pub fn assign_fixed_custom<F: Field>(
     {
         region.assign_fixed(column, offset, value);
     }
-    #[cfg(feature = "halo2-pse")]
+    #[cfg(feature = "halo2-scroll")]
     {
         region
             .assign_fixed(