///////////////////////////////////////////////////////////////////////// // File: tsarv4_vgmn_generic_32_top.cpp // Author: Alain Greiner // Copyright: UPMC/LIP6 // Date : november 5 2010 // This program is released under the GNU public license ///////////////////////////////////////////////////////////////////////// // This file define a generic TSAR architecture without virtual memory. // - It uses the vci_vgmn as global interconnect // - It uses the vci_local_crossbar as local interconnect // - It uses the vci_cc_xcache (No MMU) // The physical address space is 32 bits. // The number of clusters cannot be larger than 256. // The three parameters are // - xmax : number of clusters in a row // - ymax : number of clusters in a column // - nprocs : number of processor per cluster // // Each cluster contains nprocs processors, one Memory Cache, // and one XICU component. // The peripherals BDEV, CDMA, FBUF, MTTY and the boot BROM // are in the cluster containing address 0xBFC00000. // - The bdev_irq is connected to IRQ_IN[0] // - The cdma_irq is connected to IRQ_IN[1] // - The tty_irq[i] is connected to IRQ_IN[i+2] // For all clusters, the XICU component contains nprocs timers. // // As we target up to 256 clusters, each cluster can contain // at most 16 Mbytes (in a 4Gbytes address space). // There is one MEMC segment and one XICU segment per cluster. // - Each memory cache contains 8 Mbytes. // - The Frame buffer contains 4 Mbytes. // - The Boot ROM contains 1 Mbytes // // General policy for 32 bits address decoding: // To simplifly, all segments base addresses are aligned // on 64 Kbytes addresses. Therefore the 16 address MSB bits // define the target in the direct address space. // In these 16 bits, the (x_width + y_width) MSB bits define // the cluster index, and the 8 LSB bits define the local index: // // | X_ID | Y_ID |---| L_ID | OFFSET | // |x_width|y_width|---| 8 | 16 | ///////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #include #include "mapping_table.h" #include "tsarv4_cluster_xbar.h" #include "mips32.h" #include "vci_simple_ram.h" #include "alloc_elems.h" #include "config.h" #if USE_OPENMP #include #endif #if USE_VCI_PROFILER #include "vci_profiler.h" #endif #if USE_GDBSERVER #include "gdbserver.h" #endif #if USE_ALMOS #define BOOT_INFO_BLOCK 0xbfc08000 #define KERNEL_BIN_IMG 0xbfc10000 #endif // cluster index (computed from x,y coordinates) #define cluster(x,y) (y + ymax*x) #define _TO_STR(_str) #_str #define TO_STR(_str) _TO_STR(_str) // flit widths for the DSPIN network #define cmd_width 40 #define rsp_width 33 // VCI format #define cell_width 4 #define address_width 32 #define plen_width 8 #define error_width 1 #define clen_width 1 #define rflag_width 1 #define srcid_width 14 #define pktid_width 4 #define trdid_width 4 #define wrplen_width 1 ///////////////////////////////// int _main(int argc, char *argv[]) { using namespace sc_core; using namespace soclib::caba; using namespace soclib::common; uint64_t ms1, ms2; struct timeval t1, t2; char soft_name[BDEV_NAME_LEN] = "to_be_defined"; // pathname to binary code char disk_name[BDEV_NAME_LEN] = BDEV_IMAGE_NAME; // pathname to the disk image size_t ncycles = 1000000000; // simulated cycles size_t xmax = 2; // number of clusters in a row size_t ymax = 2; // number of clusters in a column size_t nprocs = 1; // number of processors per cluster size_t xfb = 512; // frameBuffer column number size_t yfb = 512; // frameBuffer lines number size_t fb_mode = 420; #define DEBUG_OK no size_t from_cycle = 0; // debug start cycle size_t memc_size = MEMC_SIZE; size_t blk_size = SECTOR_SIZE; size_t l1_i_ways = L1_IWAYS; size_t l1_d_ways = L1_DWAYS; size_t l1_i_sets = L1_ISETS; size_t l1_d_sets = L1_DSETS; size_t memc_sets = MEMC_SETS; size_t memc_ways = MEMC_WAYS; size_t itlb_ways = TLB_IWAYS; size_t itlb_sets = TLB_ISETS; size_t dtlb_ways = TLB_DWAYS; size_t dtlb_sets = TLB_DSETS; size_t xram_latency = CONFIG_XRAM_LATENCY; size_t omp_threads = 1; ////////////// command line arguments ////////////////////// if (argc > 1) { for( int n=1 ; n vci_param; size_t cluster_io_index; size_t x_width; size_t y_width; if (xmax == 2) x_width = 1; else if (xmax <= 4) x_width = 2; else if (xmax <= 8) x_width = 3; else x_width = 4; if (ymax == 2) y_width = 1; else if (ymax <= 4) y_width = 2; else if (ymax <= 8) y_width = 3; else y_width = 4; cluster_io_index = 0xBF >> (8 - x_width - y_width); ///////////////////// // Mapping Tables ///////////////////// // direct network MappingTable maptabd(address_width, IntTab(x_width + y_width, 16 - x_width - y_width), IntTab(x_width + y_width, srcid_width - x_width - y_width), 0x00F00000); for ( size_t x = 0 ; x < xmax ; x++) { for ( size_t y = 0 ; y < ymax ; y++) { sc_uint offset = cluster(x,y) << (address_width-x_width-y_width); std::ostringstream sm; sm << "d_seg_memc_" << x << "_" << y; maptabd.add(Segment(sm.str(), MEMC_BASE+offset, memc_size, IntTab(cluster(x,y),MEMC_TGTID), true)); std::ostringstream si; si << "d_seg_xicu_" << x << "_" << y; maptabd.add(Segment(si.str(), XICU_BASE+offset, XICU_SIZE, IntTab(cluster(x,y),XICU_TGTID), false)); if ( cluster(x,y) == cluster_io_index ) { maptabd.add(Segment("d_seg_fbuf", FBUF_BASE, FBUF_SIZE, IntTab(cluster(x,y),FBUF_TGTID), false)); maptabd.add(Segment("d_seg_bdev", BDEV_BASE, BDEV_SIZE, IntTab(cluster(x,y),BDEV_TGTID), false)); maptabd.add(Segment("d_seg_mtty", MTTY_BASE, MTTY_SIZE, IntTab(cluster(x,y),MTTY_TGTID), false)); maptabd.add(Segment("d_seg_brom", BROM_BASE, BROM_SIZE, IntTab(cluster(x,y),BROM_TGTID), true)); maptabd.add(Segment("d_seg_cdma", CDMA_BASE, CDMA_SIZE, IntTab(cluster(x,y),CDMA_TGTID), false)); } } } std::cout << maptabd << std::endl; // coherence network MappingTable maptabc(address_width, IntTab(x_width + y_width, 12 - x_width - y_width), IntTab(x_width + y_width, srcid_width - x_width - y_width), 0xF0000000); for ( size_t x = 0 ; x < xmax ; x++) { for ( size_t y = 0 ; y < ymax ; y++) { sc_uint offset = cluster(x,y) << (address_width-x_width-y_width); std::ostringstream sm; sm << "c_seg_memc_" << x << "_" << y; maptabc.add(Segment(sm.str(), MEMC_BASE+offset, memc_size, IntTab(cluster(x,y), nprocs), false)); // the segment base and size will be modified // when the segmentation of the coherence space will be simplified if ( cluster(x,y) == cluster_io_index ) { std::ostringstream sr; sr << "c_seg_brom_" << x << "_" << y; maptabc.add(Segment(sr.str(), BROM_BASE, BROM_SIZE, IntTab(cluster(x,y), nprocs), false)); } sc_uint avoid_collision = 0; for ( size_t p = 0 ; p < nprocs ; p++) { sc_uint base = memc_size + (p*0x100000) + offset; // the following test is to avoid a collision between the c_seg_brom segment // and a c_seg_proc segment (all segments base addresses being multiple of 1Mbytes) if ( base == BROM_BASE ) avoid_collision = 0x100000; std::ostringstream sp; sp << "c_seg_proc_" << x << "_" << y << "_" << p; maptabc.add(Segment(sp.str(), base + avoid_collision, 0x20, IntTab(cluster(x,y), p), false, true, IntTab(cluster(x,y), p))); // the two last arguments will be removed // when the segmentation of the coherence space will be simplified } } } std::cout << maptabc << std::endl; // external network MappingTable maptabx(address_width, IntTab(1), IntTab(10), 0xF0000000); for ( size_t x = 0 ; x < xmax ; x++) { for ( size_t y = 0 ; y < ymax ; y++) { sc_uint offset = cluster(x,y) << (address_width-x_width-y_width); std::ostringstream sx; sx << "seg_xram_" << x << "_" << y; maptabx.add(Segment(sx.str(), MEMC_BASE + offset, memc_size, IntTab(cluster(x,y)), false)); } } std::cout << maptabx << std::endl; //////////////////// // Signals /////////////////// sc_clock signal_clk("clk"); sc_signal signal_resetn("resetn"); sc_signal signal_false; // Horizontal inter-clusters DSPIN signals DspinSignals*** signal_dspin_h_cmd_inc = alloc_elems >("signal_dspin_h_cmd_inc", xmax-1, ymax, 2); DspinSignals*** signal_dspin_h_cmd_dec = alloc_elems >("signal_dspin_h_cmd_dec", xmax-1, ymax, 2); DspinSignals*** signal_dspin_h_rsp_inc = alloc_elems >("signal_dspin_h_rsp_inc", xmax-1, ymax, 2); DspinSignals*** signal_dspin_h_rsp_dec = alloc_elems >("signal_dspin_h_rsp_dec", xmax-1, ymax, 2); // Vertical inter-clusters DSPIN signals DspinSignals*** signal_dspin_v_cmd_inc = alloc_elems >("signal_dspin_v_cmd_inc", xmax, ymax-1, 2); DspinSignals*** signal_dspin_v_cmd_dec = alloc_elems >("signal_dspin_v_cmd_dec", xmax, ymax-1, 2); DspinSignals*** signal_dspin_v_rsp_inc = alloc_elems >("signal_dspin_v_rsp_inc", xmax, ymax-1, 2); DspinSignals*** signal_dspin_v_rsp_dec = alloc_elems >("signal_dspin_v_rsp_dec", xmax, ymax-1, 2); // Mesh boundaries DSPIN signals DspinSignals**** signal_dspin_false_cmd_in = alloc_elems >("signal_dspin_false_cmd_in", xmax, ymax, 2, 4); DspinSignals**** signal_dspin_false_cmd_out = alloc_elems >("signal_dspin_false_cmd_out", xmax, ymax, 2, 4); DspinSignals**** signal_dspin_false_rsp_in = alloc_elems >("signal_dspin_false_rsp_in", xmax, ymax, 2, 4); DspinSignals**** signal_dspin_false_rsp_out = alloc_elems >("signal_dspin_false_rsp_out", xmax, ymax, 2, 4); // Xternal network VCI signals VciSignals signal_vci_tgt_x_xram("signal_vci_tgt_x_xram"); //////////////////////////// // Components //////////////////////////// #if USE_ALMOS soclib::common::Loader loader("bootloader.bin", "arch-info.bin@"TO_STR(BOOT_INFO_BLOCK)":D", "kernel-soclib.bin@"TO_STR(KERNEL_BIN_IMG)":D"); #else soclib::common::Loader loader(soft_name); #endif #if USE_GDBSERVER typedef soclib::common::GdbServer proc_iss; proc_iss::set_loader(loader); #else typedef soclib::common::Mips32ElIss proc_iss; #endif TsarV4ClusterXbar* clusters[xmax][ymax]; #if USE_OPENMP #pragma omp parallel { #pragma omp for for( int i = 0 ; i < (xmax * ymax); i++) { size_t x = i / ymax; size_t y = i % ymax; #pragma omp critical { std::ostringstream sc; sc << "cluster_" << x << "_" << y; clusters[x][y] = new TsarV4ClusterXbar (sc.str().c_str(), nprocs, x, y, cluster(x,y), maptabd, maptabc, maptabx, x_width, y_width, MEMC_TGTID, XICU_TGTID, FBUF_TGTID, MTTY_TGTID, BROM_TGTID, BDEV_TGTID, CDMA_TGTID, memc_ways, memc_sets, l1_i_ways, l1_i_sets, l1_d_ways, l1_d_sets, xram_latency, (cluster(x,y) == cluster_io_index), xfb, yfb, fb_mode, disk_name, blk_size, loader); } } } #else // USE_OPENMP for( size_t x = 0 ; x < xmax ; x++) { for( size_t y = 0 ; y < ymax ; y++ ) { std::ostringstream sc; sc << "cluster_" << x << "_" << y; clusters[x][y] = new TsarV4ClusterXbar (sc.str().c_str(), nprocs, x, y, cluster(x,y), maptabd, maptabc, maptabx, x_width, y_width, MEMC_TGTID, XICU_TGTID, FBUF_TGTID, MTTY_TGTID, BROM_TGTID, BDEV_TGTID, CDMA_TGTID, memc_ways, memc_sets, l1_i_ways, l1_i_sets, l1_d_ways, l1_d_sets, xram_latency, (cluster(x,y) == cluster_io_index), xfb, yfb, fb_mode, disk_name, blk_size, loader); } } #endif // USE_OPENMP /////////////////////////////////////////////////////////////// // Net-list /////////////////////////////////////////////////////////////// // Clock & RESET for ( size_t x = 0 ; x < (xmax) ; x++ ) { for ( size_t y = 0 ; y < ymax ; y++ ) { clusters[x][y]->p_clk (signal_clk); clusters[x][y]->p_resetn (signal_resetn); } } // Inter Clusters horizontal connections if ( xmax > 1 ) { for ( size_t x = 0 ; x < (xmax-1) ; x++ ) { for ( size_t y = 0 ; y < ymax ; y++ ) { for ( size_t k = 0 ; k < 2 ; k++ ) { clusters[x][y]->p_cmd_out[k][EAST] (signal_dspin_h_cmd_inc[x][y][k]); clusters[x+1][y]->p_cmd_in[k][WEST] (signal_dspin_h_cmd_inc[x][y][k]); clusters[x][y]->p_cmd_in[k][EAST] (signal_dspin_h_cmd_dec[x][y][k]); clusters[x+1][y]->p_cmd_out[k][WEST] (signal_dspin_h_cmd_dec[x][y][k]); clusters[x][y]->p_rsp_out[k][EAST] (signal_dspin_h_rsp_inc[x][y][k]); clusters[x+1][y]->p_rsp_in[k][WEST] (signal_dspin_h_rsp_inc[x][y][k]); clusters[x][y]->p_rsp_in[k][EAST] (signal_dspin_h_rsp_dec[x][y][k]); clusters[x+1][y]->p_rsp_out[k][WEST] (signal_dspin_h_rsp_dec[x][y][k]); } } } } std::cout << "Horizontal connections established" << std::endl; // Inter Clusters vertical connections if ( ymax > 1 ) { for ( size_t y = 0 ; y < (ymax-1) ; y++ ) { for ( size_t x = 0 ; x < xmax ; x++ ) { for ( size_t k = 0 ; k < 2 ; k++ ) { clusters[x][y]->p_cmd_out[k][NORTH] (signal_dspin_v_cmd_inc[x][y][k]); clusters[x][y+1]->p_cmd_in[k][SOUTH] (signal_dspin_v_cmd_inc[x][y][k]); clusters[x][y]->p_cmd_in[k][NORTH] (signal_dspin_v_cmd_dec[x][y][k]); clusters[x][y+1]->p_cmd_out[k][SOUTH] (signal_dspin_v_cmd_dec[x][y][k]); clusters[x][y]->p_rsp_out[k][NORTH] (signal_dspin_v_rsp_inc[x][y][k]); clusters[x][y+1]->p_rsp_in[k][SOUTH] (signal_dspin_v_rsp_inc[x][y][k]); clusters[x][y]->p_rsp_in[k][NORTH] (signal_dspin_v_rsp_dec[x][y][k]); clusters[x][y+1]->p_rsp_out[k][SOUTH] (signal_dspin_v_rsp_dec[x][y][k]); } } } } std::cout << "Vertical connections established" << std::endl; // East & West boundary cluster connections for ( size_t y = 0 ; y < ymax ; y++ ) { for ( size_t k = 0 ; k < 2 ; k++ ) { clusters[0][y]->p_cmd_in[k][WEST] (signal_dspin_false_cmd_in[0][y][k][WEST]); clusters[0][y]->p_cmd_out[k][WEST] (signal_dspin_false_cmd_out[0][y][k][WEST]); clusters[0][y]->p_rsp_in[k][WEST] (signal_dspin_false_rsp_in[0][y][k][WEST]); clusters[0][y]->p_rsp_out[k][WEST] (signal_dspin_false_rsp_out[0][y][k][WEST]); clusters[xmax-1][y]->p_cmd_in[k][EAST] (signal_dspin_false_cmd_in[xmax-1][y][k][EAST]); clusters[xmax-1][y]->p_cmd_out[k][EAST] (signal_dspin_false_cmd_out[xmax-1][y][k][EAST]); clusters[xmax-1][y]->p_rsp_in[k][EAST] (signal_dspin_false_rsp_in[xmax-1][y][k][EAST]); clusters[xmax-1][y]->p_rsp_out[k][EAST] (signal_dspin_false_rsp_out[xmax-1][y][k][EAST]); } } // North & South boundary clusters connections for ( size_t x = 0 ; x < xmax ; x++ ) { for ( size_t k = 0 ; k < 2 ; k++ ) { clusters[x][0]->p_cmd_in[k][SOUTH] (signal_dspin_false_cmd_in[x][0][k][SOUTH]); clusters[x][0]->p_cmd_out[k][SOUTH] (signal_dspin_false_cmd_out[x][0][k][SOUTH]); clusters[x][0]->p_rsp_in[k][SOUTH] (signal_dspin_false_rsp_in[x][0][k][SOUTH]); clusters[x][0]->p_rsp_out[k][SOUTH] (signal_dspin_false_rsp_out[x][0][k][SOUTH]); clusters[x][ymax-1]->p_cmd_in[k][NORTH] (signal_dspin_false_cmd_in[x][ymax-1][k][NORTH]); clusters[x][ymax-1]->p_cmd_out[k][NORTH] (signal_dspin_false_cmd_out[x][ymax-1][k][NORTH]); clusters[x][ymax-1]->p_rsp_in[k][NORTH] (signal_dspin_false_rsp_in[x][ymax-1][k][NORTH]); clusters[x][ymax-1]->p_rsp_out[k][NORTH] (signal_dspin_false_rsp_out[x][ymax-1][k][NORTH]); } } //////////////////////////////////////////////////////// // Simulation /////////////////////////////////////////////////////// sc_start(sc_core::sc_time(0, SC_NS)); signal_resetn = false; // network boundaries signals for(size_t x=0; x