feat(invariant): fuzz with values from events and return values #7666

grandizzy · 2024-04-15T05:53:26Z

Motivation

Ref #51
https://forum.openzeppelin.com/t/using-automatic-analysis-tools-with-makerdao-contracts/1021/2

Solution

do not collect state from call if call revert
add BasicTxDetails and CallTargetDetails structs
using abi and function from identified fuzzed contracts, decode call result (if function has outputs) and logs (if any) and insert samples collected in dictionary, per var type. Samples are stored up to configured test run depth and are persisted / reused between test runs
when fuzzing a param from state dict, use saved samples of param type with a weight of 50
some code cleanup and tests for collecting values from result and event log

Tests

forge test --mt invariant_check_dschief_with_return_value

contract SimpleDSChief {
    mapping(bytes32 => address) public slates;
    mapping(address => bytes32) public votes;
    mapping(address => uint256) public approvals;
    mapping(address => uint256) public deposits;
    bool public hacked = false;

    function lock(uint256 wad) public {
        deposits[msg.sender] = add(deposits[msg.sender], wad);
        addWeight(wad, votes[msg.sender]);
    }

    function free(uint256 wad) public {
        deposits[msg.sender] = sub(deposits[msg.sender], wad);
        subWeight(wad, votes[msg.sender]);
    }

    function voteYays(address yay) public returns (bytes32) {
        bytes32 slate = etch(yay);
        voteSlate(slate);
        return slate;
    }

    function etch(address yay) public returns (bytes32 slate) {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        return hash;
    }

    function voteSlate(bytes32 slate) public {
        uint256 weight = deposits[msg.sender];
        subWeight(weight, votes[msg.sender]);
        votes[msg.sender] = slate;
        addWeight(weight, votes[msg.sender]);
    }

    function addWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = add(approvals[yay], weight);
    }

    function subWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = sub(approvals[yay], weight);
    }

    function add(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x + y) >= x);
    }

    function sub(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x - y) <= x);
    }

    function checkInvariant() public {
        bytes32 senderSlate = votes[msg.sender];
        address option = slates[senderSlate];
        uint256 senderDeposit = deposits[msg.sender];
        if (approvals[option] < senderDeposit) {
            hacked = true;
        }
    }
}

contract SimpleDSChiefTest is Test {
    SimpleDSChief dsChief;

    function setUp() public {
        dsChief = new SimpleDSChief();
        targetContract(address(dsChief));
        targetSender(address(0x10000));
        targetSender(address(0x20000));
        targetSender(address(0x30000));
    }

    /// forge-config: default.invariant.runs = 500
    /// forge-config: default.invariant.depth = 500
    function invariant_check_dschief_with_return_value() public view {
        assertFalse(dsChief.hacked());
    }
}

forge test --mt invariant_check_dschief_with_event

contract SimpleDSChiefWithEvent {
    event Slate(bytes32 indexed slate);
    mapping(bytes32 => address) public slates;
    mapping(address => bytes32) public votes;
    mapping(address => uint256) public approvals;
    mapping(address => uint256) public deposits;

    bool public hacked = false;

    function lock(uint256 wad) public {
        deposits[msg.sender] = add(deposits[msg.sender], wad);
        addWeight(wad, votes[msg.sender]);
    }

    function free(uint256 wad) public {
        deposits[msg.sender] = sub(deposits[msg.sender], wad);
        subWeight(wad, votes[msg.sender]);
    }

    function voteYays(address yay) public {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        voteSlate(hash);
    }

    function etch(address yay) public {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        emit Slate(hash);
    }

    function voteSlate(bytes32 slate) public {
        uint256 weight = deposits[msg.sender];
        subWeight(weight, votes[msg.sender]);
        votes[msg.sender] = slate;
        addWeight(weight, votes[msg.sender]);
    }

    function addWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = add(approvals[yay], weight);
    }

    function subWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = sub(approvals[yay], weight);
    }

    function add(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x + y) >= x);
    }

    function sub(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x - y) <= x);
    }

    function checkInvariant() public {
        bytes32 senderSlate = votes[msg.sender];
        address option = slates[senderSlate];
        uint256 senderDeposit = deposits[msg.sender];

        if (approvals[option] < senderDeposit) {
            hacked = true;
        }
    }
}

contract SimpleDSChiefWithEventTest is Test {
    SimpleDSChiefWithEvent dsChief;

    function setUp() public {
        dsChief = new SimpleDSChiefWithEvent();
        targetContract(address(dsChief));
        targetSender(address(0x10000));
        targetSender(address(0x20000));
        targetSender(address(0x30000));
    }

    /// forge-config: default.invariant.runs = 500
    /// forge-config: default.invariant.depth = 500
    function invariant_check_dschief_with_event() public view {
        assertFalse(dsChief.hacked());
    }
}

CC @klkvr @mds1

grandizzy · 2024-04-17T15:11:20Z

With latest PR changes foundry can catch DSChief bug in about 30 seconds (500 runs / 500 depth, missing it ~ 2 out of 10 times) and never missed in ~ 110 seconds (2000 runs with depth of 500)

Ran 1 test suite in 28.02s (28.02s CPU time): 0 tests passed, 1 failed, 0 skipped (1 total tests)
Failing tests:
Encountered 1 failing test in test/research/vera_dschief.t.sol:SimpleDSChiefTest
[FAIL. Reason: assertion failed]
        [Sequence]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=voteSlate(bytes32) args=[0xe100d6d47cca6bc77fdd20fbcbbfc6c8db6b9e81a47e4bda6afca133036d0ab5]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=lock(uint256) args=[1]
                sender=0x0000000000000000000000000000000000010000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=etch(address) args=[0x00000000000000000000000000000000000008DA]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=checkInvariant() args=[]
 invariant_check_dschief() (runs: 500, calls: 249548, reverts: 32928)

The proposed solution is to:

collect a number of samples from target selectors return values and from events. These samples are limited to configured test run depth.
samples are persisted and applied across runs with a weight of 40 (so when fuzzing from state there'll be 60% values generated from unique values collected during run and 40% from values reused across runs)
solution favours multiple runs with average depth configured (rather than testing with less runs with big depth) - so issues like the DSChief one likely won't be caught if running let's say 10 times with depth of thousands
the number of samples and their randomness weight can be made configurable options (with defaults described above) but this will increase the UX complexity, so not sure about

Further improvement that can be considered is to make samples more efficient by collecting and applying them per type. However this introduce code complexity as we'll have to decode results / events with proper target abi and also to update fuzz strategies to take the fuzzed types into account.

@mds1 @klkvr would love to hear your thoughts re this approach, thanks

…ing. Decode results and persist per types. Use typed samples when fuzzing from state.

grandizzy · 2024-04-18T13:05:22Z

I went ahead and committed a change to collect and apply samples per result type and with this approach was able to reproduce the DSChief bug consistently in preliminary testing, with same 500 / 500.
Also, since these samples are now collected and targeted applied (so if you fuzz an address from state fuzzer makes sure value used from collected samples is of address type) I don't think making configurable options for number of samples and randomness would make sense anymore.

klkvr

this makes a lot of sense, the next step should probably be to improve the way we handle logs and storage

only have a couple comments

crates/evm/fuzz/src/strategies/state.rs

klkvr · 2024-04-18T19:51:35Z

crates/evm/fuzz/src/strategies/state.rs

+                    if rand::thread_rng().gen_range(0..100) < 50 {
+                        typed_samples


I believe we should use prop_perturb here as randomness source

I reorged this part in 58fc5a9 not sure we still need perturb, pls lmk wdyt

I see we are still using rand::thread_rng() which I believe causes proptest runs with the same seed generating different outputs, so perhaps we still need prop_perturb to ensure deterministic randomness?

makes sense, I added a slightly different way using (0..100).prop_flat_map(Just) in d00c192 pls check

klkvr · 2024-04-18T19:54:12Z

crates/evm/fuzz/src/strategies/state.rs

+                // Decode result and collect samples to be used in subsequent fuzz runs.
+                if !result.is_empty() {
+                    if let Ok(decoded_result) = func.abi_decode_output(result, true) {
+                        dict.insert_sample_value(decoded_result, run_depth);


what's the motivation behind using run_depth as a limit for values of a certain type?

We need a limit for samples to remain relevant, if it is too low then we're going to use same values / repeat same test too many times, at the same time if it grows too big then samples won't be exercised enough to reveal failures due to potential dependencies.
The test depth is a limit I come up to make sure that, on the extreme case where we run with test depth of n for a fuzz target containing same n functions with one return value, we collect one sample from each function.
We could also introduce a new config for such to replace default behavior.

The test depth is a limit I come up to make sure that, on the extreme case where we run with test depth of n for a fuzz target containing same n functions with one return value, we collect one sample from each function.

Is this assumption true given how fuzz targets and selectors are chosen? i.e. I don't think there's a guarantee you cover all n functions with a depth of n, especially if there's >1 contract. Also related to #2986

But regardless, I'll echo my thoughts in #7666 (comment) — performance-wise (catching DSChief) this PR looks really good, and I think it's better to ship as-is and revisit this limit once we have benchmarks, than to block this PR on deciding a good value. Since before benchmarks we're just taking our best guess at what sensible values are anyway :)

Is this assumption true given how fuzz targets and selectors are chosen? i.e. I don't think there's a guarantee you cover all n functions with a depth of n, especially if there's >1 contract. Also related to #2986

Indeed, that is not a guarantee just the ideal case, as you say it can be improved as we go. Only concern could be that if this limit is too low then other relevant data collected won't be exercised enough.

grandizzy · 2024-04-21T11:03:06Z

this makes a lot of sense, the next step should probably be to improve the way we handle logs and storage

I added logs decoding in d9d8619 , will track storage handle in follow up PR (to keep scope limited for this one) if you OK with

grandizzy · 2024-04-21T11:33:00Z

crates/evm/evm/src/executors/invariant/mod.rs

@@ -247,7 +249,17 @@ impl<'a> InvariantExecutor<'a> {
                    let mut state_changeset =
                        call_result.state_changeset.to_owned().expect("no changesets");

-                    collect_data(&mut state_changeset, sender, &call_result, &fuzz_state);
+                    if !&call_result.reverted {


I don't think there's any valuable data we could collect from a reverted call, hence adding this, pls let me know if I am missing something. (Further improvement when fail on revert set to false is to remove calls reverted from final sequence - should improve shrinking performance)

Agreed that we don't need to collect data from a reverted call

👍 , will follow up with a PR to exclude reverted call from final sequence if running with fail-on-revert=false, should improve shrinking phase a lot

mds1 · 2024-04-26T01:02:55Z

In general the approach described here makes a lot of sense to me, definitely supportive. Regarding:

the number of samples and their randomness weight can be made configurable options (with defaults described above) but this will increase the UX complexity, so not sure about

I think your defaults seem pretty reasonable, so we should avoid increasing UX complexity or spending too much time debating param defaults until we have confidence that the changes matter or that the added complexity is worth it. And we can gain that confidence once we have good benchmarks to compare against. So my suggestion would be to stick what we have here, but document all the defaults/assumptions somewhere (I'm indifferent as to where), that way we can easily revisit these decisions and adjust based on data later

…to call details and use it to generate counterexample

grandizzy · 2024-04-26T11:32:26Z

crates/evm/fuzz/src/strategies/invariants.rs

                (sender, contract)
            })
        })
+        .prop_map(|(sender, call_details)| BasicTxDetails::new(sender, call_details))


Please validate this addition, don't see any issue with but confirmation would be great

sorry could you give a tldr on this change? are we just transforming the type here?

yeah, it's just a code cleanup related change, that is to create a BasicTxDetails which is now a struct (used to be type) introduced in 55fd876

got it, seems reasonable to me but deferring to @klkvr since it's an implementation question :)

crates/evm/fuzz/src/strategies/invariants.rs

mattsse

lgtm

klkvr

lgtm

grandizzy added 5 commits April 15, 2024 08:40

feat(invariant): scrape return values and add to fuzz dictionary

9cb682d

Merge remote-tracking branch 'origin' into scrape-result

492c9f6

Perist mined values between runs

25ad08c

Refactor, add persistent samples

d5f891d

Apply weight to collected sample values

8a8f410

grandizzy added 2 commits April 18, 2024 15:18

Add Function to BasicTxDetails (if has outputs), to be used for decod…

542a2d1

…ing. Decode results and persist per types. Use typed samples when fuzzing from state.

Fix clippy and fmt

316b1a5

grandizzy force-pushed the scrape-result branch from 76d57d0 to 316b1a5 Compare April 18, 2024 12:26

klkvr requested changes Apr 18, 2024

View reviewed changes

grandizzy added 4 commits April 19, 2024 19:32

Use prop-perturb take 1

58fc5a9

Decode logs using target abi, populate type samples

d9d8619

Fmt

c15bcf6

Fix clippy, add calldetails type

9c24971

grandizzy changed the title ~~feat(invariant): scrape return values and add to fuzz dictionary~~ feat(invariant): scrape logs and return values and collect samples Apr 21, 2024

Fix fmt test

fd0fe4b

grandizzy marked this pull request as ready for review April 21, 2024 11:29

grandizzy requested review from DaniPopes, Evalir and mattsse as code owners April 21, 2024 11:29

grandizzy commented Apr 21, 2024

View reviewed changes

grandizzy requested a review from klkvr April 21, 2024 11:33

Insert call sample once

807a95b

grandizzy marked this pull request as draft April 23, 2024 07:44

grandizzy added 4 commits April 23, 2024 11:13

Merge remote-tracking branch 'origin' into scrape-result

52ced13

Merge remote-tracking branch 'origin' into scrape-result

57e4df6

Proper function naming

2dee260

Generate state values bias using strategy

d00c192

grandizzy added 2 commits April 26, 2024 08:12

Merge remote-tracking branch 'origin' into scrape-result

c73ed83

Add BasicTxDetails and CallTargetDetails struct, add Function always …

55fd876

…to call details and use it to generate counterexample

grandizzy force-pushed the scrape-result branch from 942f2b7 to 55fd876 Compare April 26, 2024 11:05

grandizzy marked this pull request as ready for review April 26, 2024 11:20

grandizzy commented Apr 26, 2024

View reviewed changes

grandizzy added 2 commits April 27, 2024 09:05

Tests cleanup

09c74c3

Code cleanup

a8fb3b8

grandizzy commented Apr 27, 2024

View reviewed changes

crates/evm/fuzz/src/strategies/invariants.rs Outdated Show resolved Hide resolved

grandizzy added 7 commits April 27, 2024 10:49

Move args in CallDetails

a90e375

Merge remote-tracking branch 'origin' into scrape-result

5a5ecf0

Merge remote-tracking branch 'origin' into scrape-result

6c62c4e

Fallback to old impl if we are not able to decode logs

59c7ef8

Merge remote-tracking branch 'origin' into scrape-result

e5d3c97

Merge remote-tracking branch 'origin' into scrape-result

719e462

Refactor collect values fn

a22471b

grandizzy marked this pull request as draft May 14, 2024 18:30

grandizzy changed the title ~~feat(invariant): scrape logs and return values and collect samples~~ WIP feat(invariant): fuzz with values from events and return values May 14, 2024

Get abi from FuzzedContracts

e91dcfa

grandizzy force-pushed the scrape-result branch 2 times, most recently from 7968802 to 481971a Compare May 15, 2024 12:17

Lookup function from identified target abi.

875c2bc

grandizzy force-pushed the scrape-result branch from 481971a to 875c2bc Compare May 15, 2024 12:32

grandizzy changed the title ~~WIP feat(invariant): fuzz with values from events and return values~~ feat(invariant): fuzz with values from events and return values May 15, 2024

grandizzy marked this pull request as ready for review May 15, 2024 13:10

mattsse approved these changes May 17, 2024

View reviewed changes

klkvr approved these changes May 17, 2024

View reviewed changes

grandizzy merged commit 1ddea96 into foundry-rs:master May 20, 2024
19 checks passed

grandizzy deleted the scrape-result branch May 20, 2024 07:22

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(invariant): fuzz with values from events and return values #7666

feat(invariant): fuzz with values from events and return values #7666

grandizzy commented Apr 15, 2024 •

edited

grandizzy commented Apr 17, 2024

grandizzy commented Apr 18, 2024 •

edited

klkvr left a comment

klkvr Apr 18, 2024

grandizzy Apr 21, 2024

klkvr Apr 21, 2024

grandizzy Apr 25, 2024

klkvr Apr 18, 2024

grandizzy Apr 21, 2024

mds1 Apr 26, 2024

grandizzy Apr 26, 2024

grandizzy commented Apr 21, 2024

grandizzy Apr 21, 2024

mds1 Apr 26, 2024

grandizzy Apr 26, 2024

mds1 commented Apr 26, 2024

grandizzy Apr 26, 2024

mds1 Apr 26, 2024

grandizzy Apr 26, 2024 •

edited

mds1 Apr 26, 2024

mattsse left a comment

klkvr left a comment

feat(invariant): fuzz with values from events and return values #7666

feat(invariant): fuzz with values from events and return values #7666

Conversation

grandizzy commented Apr 15, 2024 • edited

Motivation

Solution

Tests

grandizzy commented Apr 17, 2024

grandizzy commented Apr 18, 2024 • edited

klkvr left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

grandizzy commented Apr 21, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

mds1 commented Apr 26, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

grandizzy Apr 26, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

mattsse left a comment

Choose a reason for hiding this comment

klkvr left a comment

Choose a reason for hiding this comment

grandizzy commented Apr 15, 2024 •

edited

grandizzy commented Apr 18, 2024 •

edited

grandizzy Apr 26, 2024 •

edited