Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing actions in some transitions for MOESI_CMP_directory-dir.sm #1129

Open
sidit77 opened this issue May 14, 2024 · 0 comments
Open

Missing actions in some transitions for MOESI_CMP_directory-dir.sm #1129

sidit77 opened this issue May 14, 2024 · 0 comments
Labels

Comments

@sidit77
Copy link

sidit77 commented May 14, 2024

Describe the bug
When the directory tries to make a MM =Exclusive_Unblock=> M transition the protocol fails the assertion assert(is_valid(tbe)) in w_deallocateTBE.

The transitions from M or O to MM don't seem to allocate a TBE so they should have a valid TBE already. This does not seems to be the case based on these transitions for example.

transition(OS, Unblock, O) {
    //In OS state there is no TBE for some reason
    // w_deallocateTBE;
    j_popIncomingUnblockQueue;
  }

transition(MO, Unblock, O) {
    w_deallocateTBE;
    m_addUnlockerToSharers;
    j_popIncomingUnblockQueue;
  }

transition(IS, Exclusive_Unblock, M) {
    w_deallocateTBE;
    cc_clearSharers;
    e_ownerIsUnblocker;
    j_popIncomingUnblockQueue;
  }

Adding a v_allocateTBE action to the M/O -> MM transitions seems to fix the issue for me, but I'm no sure if this is theoretically sound.

Affects version
branch: stable
commit: e8bc4fc

gem5 Modifications
I changed how the L2cache is selected in the L1cache

// out_msg.Destination.add(mapAddressToMachine(address, MachineType:L2Cache));
out_msg.Destination.add(createMachineID(MachineType:L2Cache, intToID(l2_id)));

I have a custom setup script which creates a NUMA system that roughly looks like this:
Untitled presentation

import m5
from m5.objects import *
import argparse
import math

m5.util.addToPath("../gem5/configs")
from common.FileSystemConfig import config_filesystem

parser = argparse.ArgumentParser()

parser.add_argument("-c", "--cores", help="Number of cores", type=int, default=2)
# parser.add_argument('-m', '--memory', help="Memory subsystem", choices=["Custom", "Classic", "MI", "MESI"], default="Custom", action="store") 
parser.add_argument('-ro', "--redirect-output", help="redirect the output", action='store_true') 
parser.add_argument("cmd", nargs=argparse.REMAINDER)

args = parser.parse_args()


class L1Cache(L1Cache_Controller):
    _version = 0

    @classmethod
    def versionCount(cls):
        cls._version += 1  # Use count for this particular type
        return cls._version - 1

    def __init__(self, system, ruby_system, cpu, l2_id):
        """CPUs are needed to grab the clock domain and system is needed for
        the cache block size.
        """
        super().__init__()

        self.version = self.versionCount()
        # This is the cache memory object that stores the cache data and tags
        self.L1Icache = RubyCache(
            size="16kB", assoc=8, start_index_bit=self.getBlockSizeBits(system), is_icache=True
        )
        self.L1Dcache = RubyCache(
            size="16kB", assoc=8, start_index_bit=self.getBlockSizeBits(system), is_icache=False
        )
        self.l2_id = l2_id
        self.clk_domain = cpu.clk_domain
        self.send_evictions = self.sendEvicts(cpu)
        self.ruby_system = ruby_system
        # self.enable_prefetch = False
        # self.prefetcher = RubyPrefetcher()
        self.connectQueues(ruby_system)

    def getBlockSizeBits(self, system):
        bits = int(math.log(system.cache_line_size, 2))
        if 2**bits != system.cache_line_size.value:
            panic("Cache line size not a power of 2!")
        return bits

    def sendEvicts(self, cpu):
        """True if the CPU model or ISA requires sending evictions from caches
        to the CPU. Two scenarios warrant forwarding evictions to the CPU:
        1. The O3 model must keep the LSQ coherent with the caches
        2. The x86 mwait instruction is built on top of coherence
        3. The local exclusive monitor in ARM systems

        As this is an X86 simulation we return True.
        """
        return True

    def connectQueues(self, ruby_system):
        """Connect all of the queues for this controller."""
        self.mandatoryQueue = MessageBuffer()
        self.triggerQueue = MessageBuffer(ordered=True)


        self.requestFromL1Cache = MessageBuffer()
        self.requestFromL1Cache.out_port = ruby_system.network.in_port

        self.responseFromL1Cache = MessageBuffer()
        self.responseFromL1Cache.out_port = ruby_system.network.in_port

       
        self.requestToL1Cache = MessageBuffer()
        self.requestToL1Cache.in_port = ruby_system.network.out_port

        self.responseToL1Cache = MessageBuffer()
        self.responseToL1Cache.in_port = ruby_system.network.out_port


class L2Cache(L2Cache_Controller):
    _version = 0

    @classmethod
    def versionCount(cls):
        cls._version += 1  # Use count for this particular type
        return cls._version - 1

    def __init__(self, system, ruby_system):
        """CPUs are needed to grab the clock domain and system is needed for
        the cache block size.
        """
        super().__init__()

        self.version = self.versionCount()
        # This is the cache memory object that stores the cache data and tags
        self.L2cache = RubyCache(
            size="256kB", assoc=8, start_index_bit=self.getBlockSizeBits(system)
        )
        self.ruby_system = ruby_system
        self.connectQueues(ruby_system)

    def getBlockSizeBits(self, system):
        bits = int(math.log(system.cache_line_size, 2))
        if 2**bits != system.cache_line_size.value:
            panic("Cache line size not a power of 2!")
        return bits

    def sendEvicts(self, cpu):
        """True if the CPU model or ISA requires sending evictions from caches
        to the CPU. Two scenarios warrant forwarding evictions to the CPU:
        1. The O3 model must keep the LSQ coherent with the caches
        2. The x86 mwait instruction is built on top of coherence
        3. The local exclusive monitor in ARM systems

        As this is an X86 simulation we return True.
        """
        return True

    def connectQueues(self, ruby_system):
        """Connect all of the queues for this controller."""
        self.triggerQueue = MessageBuffer(ordered=True)

        self.GlobalRequestFromL2Cache = MessageBuffer()
        self.GlobalRequestFromL2Cache.out_port = ruby_system.network.in_port
        self.L1RequestFromL2Cache = MessageBuffer()
        self.L1RequestFromL2Cache.out_port = ruby_system.network.in_port
        self.responseFromL2Cache = MessageBuffer()
        self.responseFromL2Cache.out_port = ruby_system.network.in_port

        self.GlobalRequestToL2Cache = MessageBuffer()
        self.GlobalRequestToL2Cache.in_port = ruby_system.network.out_port
        self.L1RequestToL2Cache = MessageBuffer()
        self.L1RequestToL2Cache.in_port = ruby_system.network.out_port
        self.responseToL2Cache = MessageBuffer()
        self.responseToL2Cache.in_port = ruby_system.network.out_port


class DirController(Directory_Controller):
    _version = 0

    @classmethod
    def versionCount(cls):
        cls._version += 1  # Use count for this particular type
        return cls._version - 1

    def __init__(self, ruby_system, ranges, mem_ctrl):
        """ranges are the memory ranges assigned to this controller."""
        if len(mem_ctrls) > 1:
            panic("This cache system can only be connected to one mem ctrl")
        super().__init__()
        self.version = self.versionCount()
        self.addr_ranges = ranges
        self.ruby_system = ruby_system
        self.directory = RubyDirectoryMemory()
        # Connect this directory to the memory side.
        # self.memory = mem_ctrls[0].port
        self.memory_out_port = mem_ctrl.port
        self.connectQueues(ruby_system)

    def connectQueues(self, ruby_system):
        self.requestToMemory = MessageBuffer()
        self.responseFromMemory = MessageBuffer()
        self.triggerQueue = MessageBuffer(ordered=True)

        self.responseFromDir = MessageBuffer()
        self.responseFromDir.out_port = ruby_system.network.in_port
        self.forwardFromDir = MessageBuffer()
        self.forwardFromDir.out_port = ruby_system.network.in_port

        self.requestToDir = MessageBuffer()
        self.requestToDir.in_port = ruby_system.network.out_port
        self.responseToDir = MessageBuffer()
        self.responseToDir.in_port = ruby_system.network.out_port
        


class MyNetwork(SimpleNetwork):
    """A simple point-to-point network. This doesn't not use garnet."""

    def __init__(self, ruby_system):
        super().__init__()
        self.netifs = []
        self.routers = []
        self.ext_links = []
        self.int_links = []
        self.ruby_system = ruby_system

    def create_router(self, controller):
        id = len(self.routers)
        router = Switch(router_id=id)
        self.routers.append(router)
        self.ext_links.append(SimpleExtLink(link_id=id, ext_node=controller, int_node=router))
        return router

    def connect_routers(self, routers):
        link_count = len(self.int_links)
        for ri in routers:
            for rj in routers:
                if ri == rj:
                    continue  # Don't connect a router to itself!
                link_count += 1
                self.int_links.append(
                    SimpleIntLink(link_id=link_count, src_node=ri, dst_node=rj)
                )

    def connectControllers(self, controllers):
        """Connect all of the controllers to routers and connec the routers
        together in a point-to-point network.
        """
        # Create one router/switch per controller in the system
        self.routers = [Switch(router_id=i) for i in range(len(controllers))]

        # Make a link from each controller to the router. The link goes
        # externally to the network.
        self.ext_links = [
            SimpleExtLink(link_id=i, ext_node=c, int_node=self.routers[i])
            for i, c in enumerate(controllers)
        ]

        # Make an "internal" link (internal to the network) between every pair
        # of routers.
        link_count = 0
        int_links = []
        for ri in self.routers:
            for rj in self.routers:
                if ri == rj:
                    continue  # Don't connect a router to itself!
                link_count += 1
                int_links.append(
                    SimpleIntLink(link_id=link_count, src_node=ri, dst_node=rj)
                )
        self.int_links = int_links


system = System()

# Set the clock frequency of the system (and all of its children)
system.clk_domain = SrcClockDomain()
system.clk_domain.clock = "2GHz"
system.clk_domain.voltage_domain = VoltageDomain()


ruby_system = RubySystem()
system.caches = ruby_system

# Ruby's global network.
network = MyNetwork(ruby_system)
ruby_system.network = network

# MI example uses 5 virtual networks
ruby_system.number_of_virtual_networks = 3
ruby_system.network.number_of_virtual_networks = 3

number_of_nodes = 2
number_of_cores_per_node = 2
mem_range = AddrRange("1GB")
cache_line_size = 64



global_routers = []
cpus = []
sequencers = []
mem_ctrls = []


intlv_bits = int(math.log(number_of_nodes, 2))
intlv_low_bit = int(math.log(cache_line_size, 2))

for i in range(number_of_nodes):
    local_routers = []
    l2cache = L2Cache(system, ruby_system)
    exec("ruby_system.l2_cntrl%d = l2cache" % i)
    # controllers.append(l2cache)
    l2router = network.create_router(l2cache)
    # print(l2router.router_id)
    global_routers.append(l2router)
    local_routers.append(l2router)

    local_mem_range = AddrRange(
        start=mem_range.start, 
        size=mem_range.size(),
        intlvHighBit=intlv_low_bit + intlv_bits - 1,
        intlvBits=intlv_bits,
        intlvMatch=i
    )
    mem_ctrl = MemCtrl()
    mem_ctrl.dram = DDR3_1600_8x8()
    mem_ctrl.dram.range = local_mem_range
    dir = DirController(ruby_system, [local_mem_range], mem_ctrl)
    exec("ruby_system.dir_cntrl%d = dir" % i)
    local_routers.append(network.create_router(dir))
    mem_ctrls.append(mem_ctrl)
    # controllers.append(dir)


    for j in range(number_of_cores_per_node):
        cpu_index = i * number_of_cores_per_node + j;
        cpu = X86TimingSimpleCPU(cpu_id=cpu_index)
        cpu.createInterruptController()
        cpus.append(cpu)

        l1cache = L1Cache(system, ruby_system, cpu, l2cache.version)
        exec("ruby_system.l1_cntrl%d = l1cache" % cpu_index)
        # controllers.append(l1cache)
        l1router = network.create_router(l1cache)
        local_routers.append(l1router)

        sequencer = RubySequencer(
            version=cpu_index,
            # I/D cache is combined and grab from ctrl
            dcache=l1cache.L1Dcache,
            clk_domain=l1cache.clk_domain,
        )
        l1cache.sequencer = sequencer
        sequencer.connectCpuPorts(cpu)
        sequencers.append(sequencer)

    network.connect_routers(local_routers)

network.connect_routers(global_routers)

system.cpu = cpus
system.mem_mode = "timing"  # Use timing accesses
system.mem_ranges = [mem_range]  # Create an address range
system.mem_ctrl = mem_ctrls

# ruby_system.sequencers = sequencers
ruby_system.num_of_sequencers = len(sequencers)

# Create the network and connect the controllers.
# NOTE: This is quite different if using Garnet!
# ruby_system.network.connectControllers(ruby_system.controllers)
network.setup_buffers()

# Set up a proxy port for the system_port. Used for load binaries and
# other functional-only things.
ruby_system.sys_port_proxy = RubyPortProxy()
system.system_port = ruby_system.sys_port_proxy.in_ports





system.workload = SEWorkload.init_compatible(args.cmd[0])

process = Process()

if args.redirect_output:
    process.output = "output.txt"

# Set the command
# cmd is a list which begins with the executable (like argv)

process.cmd = args.cmd

# Set the cpu to use the process as its workload and create thread contexts
for cpu in system.cpu:
    cpu.workload = process
    cpu.createThreads()


# Set up the pseudo file system for the threads function above
config_filesystem(system)

# set up the root SimObject and start the simulation
root = Root(full_system=False, system=system)
# instantiate all of the objects we've created above
m5.instantiate()

print(f"Beginning simulation!")
exit_event = m5.simulate()
print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
@sidit77 sidit77 added the bug label May 14, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant