Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/state: introduce stateupdate structure #29530

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/genesis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func TestVerkleGenesisCommit(t *testing.T) {
},
}

expected := common.Hex2Bytes("14398d42be3394ff8d50681816a4b7bf8d8283306f577faba2d5bc57498de23b")
expected := common.FromHex("14398d42be3394ff8d50681816a4b7bf8d8283306f577faba2d5bc57498de23b")
got := genesis.ToBlock().Root().Bytes()
if !bytes.Equal(got, expected) {
t.Fatalf("invalid genesis state root, expected %x, got %x", expected, got)
Expand All @@ -314,7 +314,7 @@ func TestVerkleGenesisCommit(t *testing.T) {
triedb := triedb.NewDatabase(db, &triedb.Config{IsVerkle: true, PathDB: pathdb.Defaults})
block := genesis.MustCommit(db, triedb)
if !bytes.Equal(block.Root().Bytes(), expected) {
t.Fatalf("invalid genesis state root, expected %x, got %x", expected, got)
t.Fatalf("invalid genesis state root, expected %x, got %x", expected, block.Root())
}

// Test that the trie is verkle
Expand Down
232 changes: 123 additions & 109 deletions core/state/state_object.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ package state
import (
"bytes"
"fmt"
"io"
"maps"
"sync"
"time"

"github.com/ethereum/go-ethereum/common"
Expand All @@ -34,14 +32,6 @@ import (
"github.com/holiman/uint256"
)

// hasherPool holds a pool of hashers used by state objects during concurrent
// trie updates.
var hasherPool = sync.Pool{
New: func() interface{} {
return crypto.NewKeccakState()
},
}

type Storage map[common.Hash]common.Hash

func (s Storage) Copy() Storage {
Expand All @@ -65,9 +55,20 @@ type stateObject struct {
trie Trie // storage trie, which becomes non-nil on first access
code []byte // contract bytecode, which gets set when code is loaded

originStorage Storage // Storage cache of original entries to dedup rewrites
pendingStorage Storage // Storage entries that need to be flushed to disk, at the end of an entire block
dirtyStorage Storage // Storage entries that have been modified in the current transaction execution, reset for every transaction
originStorage Storage // Storage entries that have been accessed within the current block
dirtyStorage Storage // Storage entries that have been modified within the current transaction
pendingStorage Storage // Storage entries that have been modified within the current block

// needCommit tracks a set of storage entries that have been modified but
// not yet committed since the "last commit operation", along with their
// original values before mutation.
//
// Specifically, the commit will be performed after each transaction before
// the byzantium fork, therefore the map is already reset at the transaction
// boundary; however post the byzantium fork, the commit will only be performed
// at the end of block, this set essentially tracks all the modifications
// made within the block.
needCommit Storage
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that the previous 3 items are all XXXStorage, I'd recommend using uncommittedStorage for this one.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though, it's a bit weird, because it contains the "origin" values too, which also somewhat clash with originStorage. Does it make sense to have the origin in 2 different variables? Can't we just use this field as a map[hash]struct{} to track which slots need commit, but otherwise rely on originStorage for the actual value content?


// Cache flags.
dirtyCode bool // true if the code was updated
Expand Down Expand Up @@ -102,16 +103,12 @@ func newObject(db *StateDB, address common.Address, acct *types.StateAccount) *s
origin: origin,
data: *acct,
originStorage: make(Storage),
pendingStorage: make(Storage),
dirtyStorage: make(Storage),
pendingStorage: make(Storage),
needCommit: make(Storage),
}
}

// EncodeRLP implements rlp.Encoder.
func (s *stateObject) EncodeRLP(w io.Writer) error {
return rlp.Encode(w, &s.data)
}

func (s *stateObject) markSelfdestructed() {
s.selfDestructed = true
}
Expand Down Expand Up @@ -160,7 +157,7 @@ func (s *stateObject) getPrefetchedTrie() Trie {
return s.db.prefetcher.trie(s.addrHash, s.data.Root)
}

// GetState retrieves a value from the account storage trie.
// GetState retrieves a value associated with the given storage key.
func (s *stateObject) GetState(key common.Hash) common.Hash {
value, _ := s.getState(key)
return value
Expand All @@ -177,7 +174,8 @@ func (s *stateObject) getState(key common.Hash) (common.Hash, common.Hash) {
return origin, origin
}

// GetCommittedState retrieves a value from the committed account storage trie.
// GetCommittedState retrieves the value associated with the specific key
// without any mutations caused in the current execution.
func (s *stateObject) GetCommittedState(key common.Hash) common.Hash {
// If we have a pending write or clean cached, return that
if value, pending := s.pendingStorage[key]; pending {
Expand All @@ -193,6 +191,7 @@ func (s *stateObject) GetCommittedState(key common.Hash) common.Hash {
// have been handles via pendingStorage above.
// 2) we don't have new values, and can deliver empty response back
if _, destructed := s.db.stateObjectsDestruct[s.address]; destructed {
s.originStorage[key] = common.Hash{} // track the empty slot as origin value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason for starting to track this here?

return common.Hash{}
}
// If no live objects are available, attempt to use snapshots
Expand Down Expand Up @@ -272,17 +271,26 @@ func (s *stateObject) setState(key common.Hash, value common.Hash, origin common
func (s *stateObject) finalise() {
slotsToPrefetch := make([][]byte, 0, len(s.dirtyStorage))
for key, value := range s.dirtyStorage {
// If the slot is different from its original value, move it into the
// pending area to be committed at the end of the block (and prefetch
// the pathways).
if value != s.originStorage[key] {
s.pendingStorage[key] = value
slotsToPrefetch = append(slotsToPrefetch, common.CopyBytes(key[:])) // Copy needed for closure
if origin, exist := s.needCommit[key]; exist && origin == value {
// The slot is reverted to its original value, delete the entry
// to avoid thrashing the data structures.
delete(s.needCommit, key)
} else if exist {
// The slot is modified to another value and the slot has been
// tracked for commit, do nothing here.
} else {
// Otherwise, the slot was reverted to its original value, remove it
// from the pending area to avoid thrashing the data structure.
delete(s.pendingStorage, key)
// The slot is different from its original value and hasn't been
// tracked for commit yet.
s.needCommit[key] = s.GetCommittedState(key)
slotsToPrefetch = append(slotsToPrefetch, common.CopyBytes(key[:])) // Copy needed for closure
}
// Aggregate the dirty storage slots into the pending area. It might
// be possible that the value of tracked slot here is same with the
// one in originStorage (e.g. the slot was modified in tx_a and then
// modified back in tx_b). We can't blindly remove it from pending
// map as the dirty slot might have been committed already (before the
// byzantium fork) and entry is necessary to modify the value back.
s.pendingStorage[key] = value
}
if s.db.prefetcher != nil && len(slotsToPrefetch) > 0 && s.data.Root != types.EmptyRootHash {
if err := s.db.prefetcher.prefetch(s.addrHash, s.data.Root, s.address, slotsToPrefetch); err != nil {
Expand All @@ -308,7 +316,7 @@ func (s *stateObject) finalise() {
// It assumes all the dirty storage slots have been finalized before.
func (s *stateObject) updateTrie() (Trie, error) {
// Short circuit if nothing changed, don't bother with hashing anything
if len(s.pendingStorage) == 0 {
if len(s.needCommit) == 0 {
return s.trie, nil
}
// Retrieve a pretecher populated trie, or fall back to the database
Expand All @@ -325,20 +333,8 @@ func (s *stateObject) updateTrie() (Trie, error) {
return nil, err
}
}

// The snapshot storage map for the object
var (
storage map[common.Hash][]byte
origin map[common.Hash][]byte
)
// Insert all the pending storage updates into the trie
usedStorage := make([][]byte, 0, len(s.pendingStorage))

hasher := hasherPool.Get().(crypto.KeccakState)
defer hasherPool.Put(hasher)

// Perform trie updates before deletions. This prevents resolution of unnecessary trie nodes
// in circumstances similar to the following:
// Perform trie updates before deletions. This prevents resolution of unnecessary trie nodes
// in circumstances similar to the following:
//
// Consider nodes `A` and `B` who share the same full node parent `P` and have no other siblings.
// During the execution of a block:
Expand All @@ -347,61 +343,32 @@ func (s *stateObject) updateTrie() (Trie, error) {
// If the deletion is handled first, then `P` would be left with only one child, thus collapsed
// into a shortnode. This requires `B` to be resolved from disk.
// Whereas if the created node is handled first, then the collapse is avoided, and `B` is not resolved.
var deletions []common.Hash
for key, value := range s.pendingStorage {
var (
deletions []common.Hash
used = make([][]byte, 0, len(s.needCommit))
)
for key, origin := range s.needCommit {
// Skip noop changes, persist actual changes
if value == s.originStorage[key] {
value, exist := s.pendingStorage[key]
if value == origin {
log.Error("Storage update was noop", "address", s.address, "slot", key)
continue
}
if !exist {
log.Error("Storage slot is not found in pending area", s.address, "slot", key)
continue
}
prev := s.originStorage[key]
s.originStorage[key] = value

var encoded []byte // rlp-encoded value to be used by the snapshot
if (value != common.Hash{}) {
// Encoding []byte cannot fail, ok to ignore the error.
trimmed := common.TrimLeftZeroes(value[:])
encoded, _ = rlp.EncodeToBytes(trimmed)
if err := tr.UpdateStorage(s.address, key[:], trimmed); err != nil {
if err := tr.UpdateStorage(s.address, key[:], common.TrimLeftZeroes(value[:])); err != nil {
s.db.setError(err)
return nil, err
}
s.db.StorageUpdated.Add(1)
} else {
deletions = append(deletions, key)
}
// Cache the mutated storage slots until commit
if storage == nil {
s.db.storagesLock.Lock()
if storage = s.db.storages[s.addrHash]; storage == nil {
storage = make(map[common.Hash][]byte)
s.db.storages[s.addrHash] = storage
}
s.db.storagesLock.Unlock()
}
khash := crypto.HashData(hasher, key[:])
storage[khash] = encoded // encoded will be nil if it's deleted

// Cache the original value of mutated storage slots
if origin == nil {
s.db.storagesLock.Lock()
if origin = s.db.storagesOrigin[s.address]; origin == nil {
origin = make(map[common.Hash][]byte)
s.db.storagesOrigin[s.address] = origin
}
s.db.storagesLock.Unlock()
}
// Track the original value of slot only if it's mutated first time
if _, ok := origin[khash]; !ok {
if prev == (common.Hash{}) {
origin[khash] = nil // nil if it was not present previously
} else {
// Encoding []byte cannot fail, ok to ignore the error.
b, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(prev[:]))
origin[khash] = b
}
}
// Cache the items for preloading
usedStorage = append(usedStorage, common.CopyBytes(key[:])) // Copy needed for closure
used = append(used, common.CopyBytes(key[:])) // Copy needed for closure
}
for _, key := range deletions {
if err := tr.DeleteStorage(s.address, key[:]); err != nil {
Expand All @@ -410,15 +377,10 @@ func (s *stateObject) updateTrie() (Trie, error) {
}
s.db.StorageDeleted.Add(1)
}
// If no slots were touched, issue a warning as we shouldn't have done all
// the above work in the first place
if len(usedStorage) == 0 {
log.Error("State object update was noop", "addr", s.address, "slots", len(s.pendingStorage))
}
if s.db.prefetcher != nil {
s.db.prefetcher.used(s.addrHash, s.data.Root, usedStorage)
s.db.prefetcher.used(s.addrHash, s.data.Root, used)
}
s.pendingStorage = make(Storage) // reset pending map
s.needCommit = make(Storage) // empties the commit markers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to use some map clearing method here to retain the memory and not reallocate for the next tx (pre byzantium).

return tr, nil
}

Expand All @@ -434,30 +396,81 @@ func (s *stateObject) updateRoot() {
s.data.Root = tr.Hash()
}

// commit obtains a set of dirty storage trie nodes and updates the account data.
// The returned set can be nil if nothing to commit. This function assumes all
// storage mutations have already been flushed into trie by updateRoot.
// commitStorage overwrites the clean storage with the storage changes and
// fulfills the storage diffs into the given accountUpdate struct.
func (s *stateObject) commitStorage(op *accountUpdate) {
var (
buf = crypto.NewKeccakState()
encode = func(slot common.Hash) []byte {
if slot == (common.Hash{}) {
return nil
}
blob, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(slot[:]))
return blob
}
)
for key, slot := range s.pendingStorage {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls rename slot to value. Slot is really the key itself.

// Skip the noop storage changes, it might be possible the value
// of tracked slot is same in originStorage and pendingStorage
// map, e.g. the storage slot is modified in tx_a and then reset
// back in tx_b.
if slot == s.originStorage[key] {
continue
}
hash := crypto.HashData(buf, key[:])
if op.storages == nil {
op.storages = make(map[common.Hash][]byte)
}
op.storages[hash] = encode(slot)
if op.storagesOrigin == nil {
op.storagesOrigin = make(map[common.Hash][]byte)
}
op.storagesOrigin[hash] = encode(s.originStorage[key])

// Overwrite the clean value of storage slots
s.originStorage[key] = slot
}
s.pendingStorage = make(Storage)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use a map clearing method here

}

// commit obtains the account changes (metadata, storage slots, code) caused by
// state execution along with the dirty storage trie nodes.
//
// Note, commit may run concurrently across all the state objects. Do not assume
// thread-safe access to the statedb.
func (s *stateObject) commit() (*trienode.NodeSet, error) {
// Short circuit if trie is not even loaded, don't bother with committing anything
if s.trie == nil {
func (s *stateObject) commit() (*accountUpdate, *trienode.NodeSet, error) {
// commit the account metadata changes
op := &accountUpdate{
address: s.address,
data: types.SlimAccountRLP(s.data),
}
if s.origin == nil {
op.origin = nil // the account was not present
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a noop, right? Can't we just have a single if branch to set origin if s.origin != nil?

} else {
op.origin = types.SlimAccountRLP(*s.origin)
}
// commit the contract code if it's modified
if s.dirtyCode {
op.code = &contractCode{
hash: common.BytesToHash(s.CodeHash()),
blob: s.code,
}
s.dirtyCode = false // reset the dirty flag
}
// Commit storage changes and the associated storage trie
s.commitStorage(op)
if len(op.storages) == 0 {
// nothing changed, don't bother to commit the trie
s.origin = s.data.Copy()
return nil, nil
return op, nil, nil
}
// The trie is currently in an open state and could potentially contain
// cached mutations. Call commit to acquire a set of nodes that have been
// modified, the set can be nil if nothing to commit.
root, nodes, err := s.trie.Commit(false)
if err != nil {
return nil, err
return nil, nil, err
}
s.data.Root = root

// Update original account data after commit
s.origin = s.data.Copy()
return nodes, nil
return op, nodes, nil
}

// AddBalance adds amount to s's balance.
Expand Down Expand Up @@ -509,6 +522,7 @@ func (s *stateObject) deepCopy(db *StateDB) *stateObject {
originStorage: s.originStorage.Copy(),
pendingStorage: s.pendingStorage.Copy(),
dirtyStorage: s.dirtyStorage.Copy(),
needCommit: s.needCommit.Copy(),
dirtyCode: s.dirtyCode,
selfDestructed: s.selfDestructed,
newContract: s.newContract,
Expand Down