Specs for opcode DIV and MOD (#149)

* add mul * address comments * rename and update doc
privacy-scaling-explorations · Apr 1, 2022 · a6b8a20 · a6b8a20
1 parent 81028ae
commit a6b8a20
Show file tree

Hide file tree

Showing 32 changed files with 467 additions and 303 deletions.
diff --git a/specs/opcode/01ADD_03SUB.md b/specs/opcode/01ADD_03SUB.md
@@ -40,4 +40,4 @@ It always computes `y = (a + x) % 2**256`,
 
 ## Code
 
-See `src/zkevm_specs/opcode/add.py`
+See `src/zkevm_specs/opcode/add_sub.py`
diff --git a/specs/opcode/02MUL.md b/specs/opcode/02MUL.md
diff --git a/specs/opcode/02MUL_04DIV_06MOD.md b/specs/opcode/02MUL_04DIV_06MOD.md
@@ -0,0 +1,107 @@
+# MUL, DIV, and MOD opcodes
+
+## Procedure
+
+### EVM behavior
+
+Pop two EVM words `a` and `b` from the stack, and push `c` to the stack, where `c` is computed as
+
+- for opcode `MUL`, compute `c = (a * b) % 2^256`
+- for opcode `DIV`, compute `c = a // b` when `b != 0` otherwise `c = 0`
+- for opcode `MOD`, compute `c = a mod b` when `b != 0` otherwise `c = 0`
+
+### Circuit behavior
+
+To prove the `MUL/DIV/MOD` opcode, we first construct a `MulAddWordsGadget` that proves `a * b + c = d (mod 2^256)` where `a, b, c, d` are all 256-bit words.
+As usual, we use 32 cells to represent each word shown as the table below, where
+each cell holds a 8-bit value.
+
+| 0  |  1 |  2 | 3  |  $\dots$  | 8  | $\dots$ |  31 |
+|:---:|:---:|:---:|:---:|:--:|:--:|:--:|:--:|
+|$a_0$|$a_1$|$a_2$|$a_3$| $\dots$ |$a_8$| $\dots$ |$a_{31}$|
+|$b_0$|$b_1$|$b_2$|$b_3$| $\dots$ |$b_8$| $\dots$ |$b_{31}$|
+|$c_0$|$c_1$|$c_2$|$c_3$| $\dots$ |$c_8$| $\dots$ |$c_{31}$|
+|$d_0$|$d_1$|$d_2$|$d_3$| $\dots$ |$d_8$| $\dots$ |$d_{31}$|
+
+We then combine $a$ and $b$ into four 64-bit limbs, denoted by $A_i, B_i$ ($i \in \{0, 1, 2, 3\}$), and split $c$ and $d$ into two 128-bit limbs, denoted by $C_{lo}, C_{hi}$ and $D_{lo}, D_{hi}$.
+
+|      A limbs       |    B limbs          |    C limbs         |  D limbs   |
+|--------------------|---------------------|--------------------|------------|
+|$A_0 = \sum_0^7 {a_i \cdot 256^i}$ | $B_0 = \sum_0^7 {b_i \cdot 256^i}$ | $C_{lo} = \sum_0^{15} {c_i \cdot 256^i}$ | $D_{lo} = \sum_0^{15} {d_i \cdot 256^i}$ |
+|$A_1 = \sum_0^7 {a_{i+8} \cdot 256^i}$ | $B_1 = \sum_0^7 {b_{i+8} \cdot 256^i}$ | $C_{hi} = \sum_0^{15} {c_{i+16} \cdot 256^i}$ | $D_{hi} = \sum_0^{15} {d_{i+16} \cdot 256^i}$ |
+|$A_2 = \sum_0^7 {a_{i+16} \cdot 256^i}$ | $B_2 = \sum_0^7 {b_{i+16} \cdot 256^i}$ | | |
+|$A_3 = \sum_0^7 {a_{i+24} \cdot 256^i}$ | $B_3 = \sum_0^7 {b_{i+24} \cdot 256^i}$ | | |
+
+The gadget computes four intermediate values as follows:
+
+$$
+\begin{align*}
+t_0 &= A_0B_0 \\
+t_1 &= A_0B_1 + A_1B_0 \\
+t_2 &= A_0B_2 + A_1B_1 + A_2B_0\\
+t_3 &= A_0B_3 + A_1B_2 + A_2B_1 + A_3B_0
+\end{align*}
+$$
+
+, and the constraints are:
+
+- $t_0 + t_1 \cdot 2^{64} + C_{lo} == D_{lo} + carry_{lo} \cdot 2^{128}$
+- $t_2 + t_3 \cdot 2^{64} + C_{hi} + carry_{lo} == D_{hi} + carry_{hi} \cdot 2^{128}$
+- $carry_{lo} \in [0, 2^{66})$
+- $carry_{hi} \in [0, 2^{66})$
+
+Note that the $carry_{lo}, carry_{hi}$ should be in the range of $[0, 2^{66})$.
+To make it easy to check the range, we relax the range to $[0, 2^{72})$ and use
+9 byte cells to check the range of $carry_{lo}$ and $carry_{hi}$.
+In addition, the `MulAddWordsGadget` returns an `overflow` expression that sums
+up all parts that are over 256-bit value in `a * b + c`.
+
+$$
+overflow = carry_{hi} + A_1B_3 + A_2B_2 + A_3B_1 + A_2B_3 + A_3B_2 + A_3B_3
+$$
+
+Now back to the opcode circuit for `MUL`, `DIV`, and `MOD`, we first construct
+the `MulAddWordsGadget` with four EVM words `a, b, c, d`.
+Based on different opcode cases, we constrain the stack pops and pushes as follows
+
+- for `MUL`, two stack pops are `a` and `b`, and the stack push is `d`
+- for `DIV`, two stack pops are `d` and `b`, and the stack push is `a` if `b != 0`; otherwise 0.
+- for `MOD`, two stack pops are `d` and `b`, and the stack push is `c` if `b != 0`; otherwise 0.
+
+The opcode circuit also adds extra constraints for different opcodes:
+
+- if the opcode is `MUL`, constrain `c == 0`.
+- if the opcode is not `MUL`,
+  - use a `LtWordGadget` to constrain `c < b` when `b != 0`
+  - constrain `overflow == 0`
+
+## Constraints
+
+1. opcodeId checks
+   1. opId === OpcodeId(0x02) for `MUL`
+   2. opId === OpcodeId(0x04) for `DIV`
+   3. opId === OpcodeId(0x06) for `MOD`
+2. state transition:
+   - gc + 3
+   - stack_pointer + 1
+   - pc + 1
+   - gas + 5
+3. Lookups: 3 busmapping lookups
+   - top of the stack :
+      - when it's `MUL`, `a` is at the top of the stack
+      - when it's `DIV`, `d` is at the top of the stack.
+      - when it's `MOD`, `d` is at the top of the stack.
+   - `b` is at the second position of the stack
+   - new top of the stack
+      - when it's `MUL`, `d` is at the new top of the stack
+      - when it's `DIV`, `a` is at the new top of the stack when `b != 0`, otherwise 0
+      - when it's `MOD`, `c` is at the new top of the stack when `b != 0`, otherwise 0
+
+## Exceptions
+
+1. stack underflow: `1023 <= stack_pointer <= 1024`
+2. out of gas: remaining gas is not enough
+
+## Code
+
+See `src/zkevm_specs/evm/execution/mul_div_mod.py`
diff --git a/src/zkevm_specs/evm/execution/__init__.py b/src/zkevm_specs/evm/execution/__init__.py
@@ -10,7 +10,7 @@
 from .copy_to_log import *
 
 # Opcode's successful cases
-from .add import *
+from .add_sub import *
 from .block_coinbase import *
 from .block_timestamp import *
 from .block_number import *
@@ -25,6 +25,7 @@
 from .iszero import *
 from .jump import *
 from .jumpi import *
+from .mul_div_mod import *
 from .origin import *
 from .push import *
 from .slt_sgt import *
@@ -40,9 +41,10 @@
     ExecutionState.BeginTx: begin_tx,
     ExecutionState.EndTx: end_tx,
     ExecutionState.EndBlock: end_block,
-    ExecutionState.CopyToMemory: copy_to_memory,
     ExecutionState.CopyCodeToMemory: copy_code_to_memory,
-    ExecutionState.ADD: add,
+    ExecutionState.CopyToMemory: copy_to_memory,
+    ExecutionState.ADD: add_sub,
+    ExecutionState.MUL: mul_div_mod,
     ExecutionState.ORIGIN: origin,
     ExecutionState.CALLER: caller,
     ExecutionState.CALLVALUE: callvalue,

diff --git a/src/zkevm_specs/evm/execution/add.py → src/zkevm_specs/evm/execution/add_sub.py b/src/zkevm_specs/evm/execution/add.py → src/zkevm_specs/evm/execution/add_sub.py
@@ -2,7 +2,7 @@
 from ..opcode import Opcode
 
 
-def add(instruction: Instruction):
+def add_sub(instruction: Instruction):
     opcode = instruction.opcode_lookup(True)
 
     is_sub, _ = instruction.pair_select(opcode, Opcode.SUB, Opcode.ADD)

diff --git a/src/zkevm_specs/evm/execution/block_coinbase.py b/src/zkevm_specs/evm/execution/block_coinbase.py
@@ -14,7 +14,7 @@ def coinbase(instruction: Instruction):
         instruction.block_context_lookup(BlockContextFieldTag.Coinbase),
         # NOTE: We can replace this with N_BYTES_WORD if we reuse the 32 byte RLC constraint in
         # all places. See: https://github.com/appliedzkp/zkevm-specs/issues/101
-        instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_ACCOUNT_ADDRESS),
+        instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_ACCOUNT_ADDRESS),
     )
 
     instruction.step_state_transition_in_same_context(

diff --git a/src/zkevm_specs/evm/execution/block_number.py b/src/zkevm_specs/evm/execution/block_number.py
@@ -11,7 +11,7 @@ def number(instruction: Instruction):
     # check block table for number
     instruction.constrain_equal(
         instruction.block_context_lookup(BlockContextFieldTag.Number),
-        instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_U64),
+        instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_U64),
     )
 
     instruction.step_state_transition_in_same_context(

diff --git a/src/zkevm_specs/evm/execution/block_timestamp.py b/src/zkevm_specs/evm/execution/block_timestamp.py
@@ -11,7 +11,7 @@ def timestamp(instruction: Instruction):
     # check block table for timestamp
     instruction.constrain_equal(
         instruction.block_context_lookup(BlockContextFieldTag.Timestamp),
-        instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_U64),
+        instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_U64),
     )
 
     instruction.step_state_transition_in_same_context(

diff --git a/src/zkevm_specs/evm/execution/call.py b/src/zkevm_specs/evm/execution/call.py
@@ -42,8 +42,8 @@ def call(instruction: Instruction):
     instruction.constrain_bool(is_success)
 
     # Recomposition of random linear combination to integer
-    callee_address = instruction.rlc_to_fq_unchecked(callee_address_rlc, N_BYTES_ACCOUNT_ADDRESS)
-    gas = instruction.rlc_to_fq_unchecked(gas_rlc, N_BYTES_GAS)
+    callee_address = instruction.rlc_to_fq(callee_address_rlc, N_BYTES_ACCOUNT_ADDRESS)
+    gas = instruction.rlc_to_fq(gas_rlc, N_BYTES_GAS)
     gas_is_u64 = instruction.is_zero(instruction.sum(gas_rlc.le_bytes[N_BYTES_GAS:]))
     cd_offset, cd_length = instruction.memory_offset_and_length(cd_offset_rlc, cd_length_rlc)
     rd_offset, rd_length = instruction.memory_offset_and_length(rd_offset_rlc, rd_length_rlc)
@@ -87,7 +87,7 @@ def call(instruction: Instruction):
     callee_nonce = instruction.account_read(callee_address, AccountFieldTag.Nonce)
     callee_code_hash = instruction.account_read(callee_address, AccountFieldTag.CodeHash)
     is_empty_code_hash = instruction.is_equal(
-        callee_code_hash, instruction.rlc_encode(EMPTY_CODE_HASH)
+        callee_code_hash, instruction.rlc_encode(EMPTY_CODE_HASH, 32)
     )
     is_account_empty = (
         instruction.is_zero(callee_nonce)

diff --git a/src/zkevm_specs/evm/execution/calldatacopy.py b/src/zkevm_specs/evm/execution/calldatacopy.py
@@ -13,7 +13,7 @@ def calldatacopy(instruction: Instruction):
 
     # convert rlc to FQ
     memory_offset, length = instruction.memory_offset_and_length(memory_offset_word, length_word)
-    data_offset = instruction.rlc_to_fq_exact(data_offset_word, N_BYTES_MEMORY_ADDRESS)
+    data_offset = instruction.rlc_to_fq(data_offset_word, N_BYTES_MEMORY_ADDRESS)
 
     tx_id = instruction.call_context_lookup(CallContextFieldTag.TxId, RW.Read)
     if instruction.curr.is_root:

diff --git a/src/zkevm_specs/evm/execution/calldataload.py b/src/zkevm_specs/evm/execution/calldataload.py
@@ -10,7 +10,7 @@ def calldataload(instruction: Instruction):
     instruction.constrain_equal(opcode, Opcode.CALLDATALOAD)
 
     # offset is the 64-bit offset to start reading 32-bytes from start of calldata.
-    offset = instruction.rlc_to_fq_exact(instruction.stack_pop(), n_bytes=8)
+    offset = instruction.rlc_to_fq(instruction.stack_pop(), n_bytes=8)
 
     tx_id = instruction.call_context_lookup(CallContextFieldTag.TxId, RW.Read)
 

diff --git a/src/zkevm_specs/evm/execution/calldatasize.py b/src/zkevm_specs/evm/execution/calldatasize.py
@@ -14,7 +14,7 @@ def calldatasize(instruction: Instruction):
         instruction.call_context_lookup(CallContextFieldTag.CallDataLength),
         # NOTE: We can replace this with N_BYTES_WORD if we reuse the 32 byte RLC constraint in
         # all places. See: https://github.com/appliedzkp/zkevm-specs/issues/101
-        instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_MEMORY_ADDRESS),
+        instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_MEMORY_ADDRESS),
     )
 
     instruction.step_state_transition_in_same_context(

diff --git a/src/zkevm_specs/evm/execution/caller.py b/src/zkevm_specs/evm/execution/caller.py
@@ -14,7 +14,7 @@ def caller(instruction: Instruction):
         instruction.call_context_lookup(CallContextFieldTag.CallerAddress),
         # NOTE: We can replace this with N_BYTES_WORD if we reuse the 32 byte RLC constraint in
         # all places. See: https://github.com/appliedzkp/zkevm-specs/issues/101
-        instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_ACCOUNT_ADDRESS),
+        instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_ACCOUNT_ADDRESS),
     )
 
     instruction.step_state_transition_in_same_context(

diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py
@@ -15,7 +15,7 @@ def codecopy(instruction: Instruction):
     )
 
     memory_offset, size = instruction.memory_offset_and_length(memory_offset_word, size_word)
-    code_offset = instruction.rlc_to_fq_exact(code_offset_word, N_BYTES_MEMORY_ADDRESS)
+    code_offset = instruction.rlc_to_fq(code_offset_word, N_BYTES_MEMORY_ADDRESS)
 
     code_size = instruction.bytecode_length(instruction.curr.code_source)
 

diff --git a/src/zkevm_specs/evm/execution/extcodehash.py b/src/zkevm_specs/evm/execution/extcodehash.py
@@ -9,7 +9,7 @@
 def extcodehash(instruction: Instruction):
     opcode = instruction.opcode_lookup(True)
 
-    address = instruction.rlc_to_fq_exact(instruction.stack_pop(), 20)
+    address = instruction.rlc_to_fq(instruction.stack_pop(), 20)
 
     tx_id = instruction.call_context_lookup(CallContextFieldTag.TxId)
     is_warm = instruction.add_account_to_access_list(tx_id, address, instruction.reversion_info())
@@ -21,11 +21,11 @@ def extcodehash(instruction: Instruction):
     is_empty = (
         instruction.is_zero(nonce)
         * instruction.is_zero(balance)
-        * instruction.is_equal(code_hash, instruction.rlc_encode(EMPTY_CODE_HASH))
+        * instruction.is_equal(code_hash, instruction.rlc_encode(EMPTY_CODE_HASH, 32))
     )
 
     instruction.constrain_equal(
-        instruction.select(is_empty, FQ(0), code_hash.value),
+        instruction.select(is_empty, FQ(0), code_hash.expr()),
         instruction.stack_push(),
     )
 

diff --git a/src/zkevm_specs/evm/execution/gas.py b/src/zkevm_specs/evm/execution/gas.py
@@ -8,7 +8,7 @@ def gas(instruction: Instruction):
     instruction.constrain_equal(opcode, Opcode.GAS)
 
     # fetch gas from rw table and consider only the lower 8 bytes (uint64)
-    gas = instruction.rlc_to_fq_exact(instruction.stack_push(), N_BYTES_GAS)
+    gas = instruction.rlc_to_fq(instruction.stack_push(), N_BYTES_GAS)
 
     instruction.constrain_equal(
         gas,

diff --git a/src/zkevm_specs/evm/execution/jump.py b/src/zkevm_specs/evm/execution/jump.py
@@ -12,7 +12,7 @@ def jump(instruction: Instruction):
     dest = instruction.stack_pop()
 
     # Get `dest` raw value in max 8 bytes
-    dest_value = instruction.rlc_to_fq_exact(dest, N_BYTES_PROGRAM_COUNTER)
+    dest_value = instruction.rlc_to_fq(dest, N_BYTES_PROGRAM_COUNTER)
 
     # Verify `dest` is code within byte code table
     instruction.constrain_equal(Opcode.JUMPDEST, instruction.opcode_lookup_at(dest_value, True))

diff --git a/src/zkevm_specs/evm/execution/jumpi.py b/src/zkevm_specs/evm/execution/jumpi.py
@@ -17,7 +17,7 @@ def jumpi(instruction: Instruction):
         pc_diff = FQ(1)
     else:
         # Get `dest` raw value in max 8 bytes
-        dest_value = instruction.rlc_to_fq_exact(dest, N_BYTES_PROGRAM_COUNTER)
+        dest_value = instruction.rlc_to_fq(dest, N_BYTES_PROGRAM_COUNTER)
         pc_diff = dest_value - instruction.curr.program_counter
         # assert Opcode.JUMPDEST == instruction.opcode_lookup_at(dest_value, True)
         instruction.constrain_equal(Opcode.JUMPDEST, instruction.opcode_lookup_at(dest_value, True))

diff --git a/src/zkevm_specs/evm/execution/log.py b/src/zkevm_specs/evm/execution/log.py
@@ -12,8 +12,8 @@ def log(instruction: Instruction):
     instruction.range_lookup(opcode - Opcode.LOG0, 5)
 
     # pop `mstart`, `msize` from stack
-    mstart = instruction.rlc_to_fq_exact(instruction.stack_pop(), 8)
-    msize = instruction.rlc_to_fq_exact(instruction.stack_pop(), 8)
+    mstart = instruction.rlc_to_fq(instruction.stack_pop(), 8)
+    msize = instruction.rlc_to_fq(instruction.stack_pop(), 8)
 
     # check not static call
     instruction.constrain_equal(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -40,4 +40,4 @@ It always computes `y = (a + x) % 2**256`,

		## Code

		See `src/zkevm_specs/opcode/add.py`
		See `src/zkevm_specs/opcode/add_sub.py`