Empathy List Archives

gem5-users@gem5.org

The gem5 Users mailing list

Core Communication Latency

Kazi Asifuzzaman

Thu, Sep 21, 2023 7:10 PM

Hi,

I was wondering if there is a way to quantify the core to core
communication latency with gem5? If so, can anyone please provide some
guidelines to extract that information from simulation results?

Thanks,

Kazi

Hi, I was wondering if there is a way to quantify the core to core communication latency with gem5? If so, can anyone please provide some guidelines to extract that information from simulation results? Thanks, Kazi

Mahyar Samani

Thu, Sep 21, 2023 8:26 PM

Hello Kazi,

If by core to core communication latency, you are referring to the latency
imposed by read sharing a cache block, you can use TrafficGenerator from
gem5 stdlib. The example below is most probably not the best way to do
this, but I could successfully measure the latency of moving the cache
block with address "addr' from core "src" to core "dst". After simulation
you should search for "generator.avgReadLatency". Note: This script
includes specific details about my use case (e.g. you should replace
CohrenetMeshNetwork with your cache hierarchy).

Best,

import argparse

import m5

from m5.debug import flags
from m5.objects import Root, DDR4_2400_8x8

from gem5.components.boards.test_board import TestBoard
from gem5.components.memory.memory import ChanneledMemory
from gem5.components.processors.traffic_generator import TrafficGenerator

from components.cmn import CoherentMeshNetwork
def get_inputs():
parser = argparse.ArgumentParser()
parser.add_argument(
"num_cores", type=int, help="Number of generator cores to simulate."
)
parser.add_argument(
"addr",
type=int,
help="Address to move",
)
parser.add_argument("src_id", type=int, help="Number of source core.")
parser.add_argument("dst_id", type=int, help="Number of destination core.")
args = parser.parse_args()
assert args.src_id < args.num_cores
assert args.dst_id < args.num_cores
assert args.src_id != args.dst_id
return args.num_cores, args.addr, args.src_id, args.dst_id
def generate_config_files(num_cores, addr, src_id, dst_id):
ret = []
for i in range(num_cores):
with open(f"/tmp/core_{i}.cfg", "w") as config:
lines = [
f"STATE 0 0 LINEAR 100 {i*2048} {(i+1)*2048} 64 500 500 2048\n",
]
transition_lines = ["INIT 0\n"]

if not i in [src_id, dst_id]:
lines += [f"STATE 1 10000000 IDLE\n", f"STATE 2 0 EXIT\n"]
transition_lines += [
"TRANSITION 0 1 1\n",
"TRANSITION 1 2 1\n",
"TRANSITION 2 2 1\n",
]
elif i == src_id:
lines += [
f"STATE 1 0 LINEAR 100 {addr} {addr} 64 500 500 64\n",
f"STATE 2 0 EXIT\n",
f"STATE 3 10000000 IDLE\n",
f"STATE 4 0 EXIT\n",
]
transition_lines += [
"TRANSITION 0 1 1\n",
"TRANSITION 1 2 1\n",
"TRANSITION 2 3 1\n",
"TRANSITION 3 4 1\n",
"TRANSITION 4 4 1\n",
]
elif i == dst_id:
lines += [
f"STATE 1 500000 IDLE\n",
f"STATE 2 0 EXIT\n",
f"STATE 3 0 LINEAR 100 {addr} {addr} 64 500 500 64\n",
f"STATE 4 500000 IDLE\n",
f"STATE 5 0 EXIT\n",
f"STATE 6 10000000 IDLE\n",
]
transition_lines += [
"TRANSITION 0 1 1\n",
"TRANSITION 1 2 1\n",
"TRANSITION 2 3 1\n",
"TRANSITION 3 4 1\n",
"TRANSITION 4 5 1\n",
"TRANSITION 5 6 1\n",
"TRANSITION 6 6 1\n",
]
else:
raise ValueError
config.writelines(lines + transition_lines)
ret.append(f"/tmp/core_{i}.cfg")
return ret


def MultiChannelDDR4(num_channels):
return ChanneledMemory(DDR4_2400_8x8, num_channels, 128, size="16GiB")


if __name__ == "__m5_main__":
num_cores, addr, src_id, dst_id = get_inputs()
generator = TrafficGenerator(
generate_config_files(num_cores, addr, src_id, dst_id)
)
cache = CoherentMeshNetwork()
memory = MultiChannelDDR4(8)
board = TestBoard(
clk_freq="4GHz",
generator=generator,
cache_hierarchy=cache,
memory=memory,
)

root = Root(full_system=False, system=board)

board._pre_instantiate()
m5.instantiate()

generator.start_traffic()
print("Beginning simulation!")
exit_events_countered = 0
while True:
exit_event = m5.simulate()
exit_events_countered += 1
print(
f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}."
)
print(f"Received {exit_events_countered} exit events.")
if exit_events_countered == 1:
print("Source core done reading.")
if exit_events_countered == 2:
print("Resetting stats.")
m5.stats.reset()
if exit_events_countered == 3:
print("Exiting while loop.")
break
print("Simulation over.")

Hello Kazi, If by core to core communication latency, you are referring to the latency imposed by read sharing a cache block, you can use TrafficGenerator from gem5 stdlib. The example below is most probably not the best way to do this, but I could successfully measure the latency of moving the cache block with address "addr' from core "src" to core "dst". After simulation you should search for "generator.avgReadLatency". Note: This script includes specific details about my use case (e.g. you should replace CohrenetMeshNetwork with your cache hierarchy). Best, ``` import argparse import m5 from m5.debug import flags from m5.objects import Root, DDR4_2400_8x8 from gem5.components.boards.test_board import TestBoard from gem5.components.memory.memory import ChanneledMemory from gem5.components.processors.traffic_generator import TrafficGenerator from components.cmn import CoherentMeshNetwork def get_inputs(): parser = argparse.ArgumentParser() parser.add_argument( "num_cores", type=int, help="Number of generator cores to simulate." ) parser.add_argument( "addr", type=int, help="Address to move", ) parser.add_argument("src_id", type=int, help="Number of source core.") parser.add_argument("dst_id", type=int, help="Number of destination core.") args = parser.parse_args() assert args.src_id < args.num_cores assert args.dst_id < args.num_cores assert args.src_id != args.dst_id return args.num_cores, args.addr, args.src_id, args.dst_id def generate_config_files(num_cores, addr, src_id, dst_id): ret = [] for i in range(num_cores): with open(f"/tmp/core_{i}.cfg", "w") as config: lines = [ f"STATE 0 0 LINEAR 100 {i*2048} {(i+1)*2048} 64 500 500 2048\n", ] transition_lines = ["INIT 0\n"] if not i in [src_id, dst_id]: lines += [f"STATE 1 10000000 IDLE\n", f"STATE 2 0 EXIT\n"] transition_lines += [ "TRANSITION 0 1 1\n", "TRANSITION 1 2 1\n", "TRANSITION 2 2 1\n", ] elif i == src_id: lines += [ f"STATE 1 0 LINEAR 100 {addr} {addr} 64 500 500 64\n", f"STATE 2 0 EXIT\n", f"STATE 3 10000000 IDLE\n", f"STATE 4 0 EXIT\n", ] transition_lines += [ "TRANSITION 0 1 1\n", "TRANSITION 1 2 1\n", "TRANSITION 2 3 1\n", "TRANSITION 3 4 1\n", "TRANSITION 4 4 1\n", ] elif i == dst_id: lines += [ f"STATE 1 500000 IDLE\n", f"STATE 2 0 EXIT\n", f"STATE 3 0 LINEAR 100 {addr} {addr} 64 500 500 64\n", f"STATE 4 500000 IDLE\n", f"STATE 5 0 EXIT\n", f"STATE 6 10000000 IDLE\n", ] transition_lines += [ "TRANSITION 0 1 1\n", "TRANSITION 1 2 1\n", "TRANSITION 2 3 1\n", "TRANSITION 3 4 1\n", "TRANSITION 4 5 1\n", "TRANSITION 5 6 1\n", "TRANSITION 6 6 1\n", ] else: raise ValueError config.writelines(lines + transition_lines) ret.append(f"/tmp/core_{i}.cfg") return ret def MultiChannelDDR4(num_channels): return ChanneledMemory(DDR4_2400_8x8, num_channels, 128, size="16GiB") if __name__ == "__m5_main__": num_cores, addr, src_id, dst_id = get_inputs() generator = TrafficGenerator( generate_config_files(num_cores, addr, src_id, dst_id) ) cache = CoherentMeshNetwork() memory = MultiChannelDDR4(8) board = TestBoard( clk_freq="4GHz", generator=generator, cache_hierarchy=cache, memory=memory, ) root = Root(full_system=False, system=board) board._pre_instantiate() m5.instantiate() generator.start_traffic() print("Beginning simulation!") exit_events_countered = 0 while True: exit_event = m5.simulate() exit_events_countered += 1 print( f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}." ) print(f"Received {exit_events_countered} exit events.") if exit_events_countered == 1: print("Source core done reading.") if exit_events_countered == 2: print("Resetting stats.") m5.stats.reset() if exit_events_countered == 3: print("Exiting while loop.") break print("Simulation over.") ```