///////////////////////////////////////////////////////////////////////// // File: tsarv4_vgmn_generic_32_top.cpp // Author: Alain Greiner // Copyright: UPMC/LIP6 // Date : november 5 2010 // This program is released under the GNU public license ///////////////////////////////////////////////////////////////////////// // This file define a generic TSAR architecture without virtual memory. // - It uses the vci_vgmn as global interconnect // - It uses the vci_local_crossbar as local interconnect // - It uses the vci_cc_xcache (No MMU) // The physical address space is 32 bits. // The number of clusters cannot be larger than 256. // The three parameters are // - xmax : number of clusters in a row // - ymax : number of clusters in a column // - nprocs : number of processor per cluster // // Each cluster contains nprocs processors, one Memory Cache, // and one XICU component. // The peripherals BDEV, CDMA, FBUF, MTTY and the boot BROM // are in the cluster containing address 0xBFC00000. // - The bdev_irq is connected to IRQ_IN[0] // - The cdma_irq is connected to IRQ_IN[1] // - The tty_irq[i] is connected to IRQ_IN[i+2] // For all clusters, the XICU component contains nprocs timers. // // As we target up to 256 clusters, each cluster can contain // at most 16 Mbytes (in a 4Gbytes address space). // - Each memory cache contains 9 Mbytes. // - The Frame buffer contains 2 Mbytes. // - The Boot ROM contains 1 Mbytes. // // General policy for 32 bits address decoding: // To simplifly, all segments base addresses are aligned // on 1 Mbyte addresses. Therefore the 12 address MSB bits // define the target in the direct address space. // In these 12 bits, the (x_width + y_width) MSB bits define // the cluster index, and the 4 LSB bits define the local index: // // | X_ID | Y_ID |---| L_ID | OFFSET | // |x_width|y_width|---| 4 | 20 | ///////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #include #include "mapping_table.h" #include "mips32.h" #include "vci_simple_ram.h" #include "vci_multi_tty.h" #include "vci_mem_cache_v4.h" #include "vci_cc_vcache_wrapper_v4.h" //#include "vci_xicu.h" #include "vci_multi_icu.h" #include "vci_vgmn.h" #include "vci_framebuffer.h" #include "vci_dma_tsar_v2.h" #include "vci_block_device_tsar_v4.h" //#include "vci_block_device.h" //#include "vci_io_bridge.h" #include "gdbserver.h" //#define SECTOR_SIZE 2048 #define SECTOR_SIZE 512 #define FBUF_XSIZE 128 #define FBUF_YSIZE 128 #define NB_TTYS 9 ////////////////////////////////////////////// // segments definition in direct space. // There is 16 Mbytes address space per cluster. // The 8 MSB bits define the cluster index (x,y), // even if the number of clusters is less than 256. // Each memory cache contains up to 9 Mbytes. // There is one MEMC segment and one XICU segment per cluster // The peripherals BDEV, FBUF, MTTY, CDMA and the boot BROM // are mapped in cluster containing address 0xBFC00000 //#define MEMC_BASE 0x00000000 //#define MEMC_SIZE 0x00900000 #define BROM_BASE 0xBFC00000 #define BROM_SIZE 0x00010000 #define USER_BASE 0x00000000 #define USER_SIZE 0x01000000 #define KERNEL_BASE 0x80000000 #define KERNEL_SIZE 0x00100000 //#define XICU_BASE 0x00900000 //#define XICU_SIZE 0x00001000 #define MTTY_BASE 0x90000000 #define MTTY_SIZE 0x00000200 #define TIM_BASE 0x91000000 #define TIM_SIZE 0x00000080 #define BDEV_BASE 0x92000000 #define BDEV_SIZE 0x00000020 #define CDMA_BASE 0x93000000 #define CDMA_SIZE 0x00000100 #define FBUF_BASE 0x96000000 #define FBUF_SIZE 0x00004000 //#define IOB_BASE 0x9E000000 //#define IOB_SIZE 0x00000100 #define ICU_BASE 0x9F000000 #define ICU_SIZE 0x00000100 /* Pour ALMOS #define BOOT_INFO_BLOCK 0xbfc08000 #define KERNEL_BIN_IMG 0xbfc10000 */ //////////////////////////////////////////////////////////////////// // TGTID & SRCID definition in direct space // For all components: global TGTID = global SRCID = cluster_index // For processors, the local SRCID is between 0 & nprocs-1 #define PROC_SRCID 0 #define BDEV_SRCID 1 #define CDMA_SRCID 2 #define MEMC_TGTID 4 #define BROM_TGTID 1 //#define XICU_TGTID 2 #define ICU_TGTID 2 #define MTTY_TGTID 3 //#define IOB_TGTID 0 #define BDEV_TGTID 0 #define CDMA_TGTID 5 #define FBUF_TGTID 6 //////////////////////////////////////////////////////////////////// // TGTID & SRCID definition in IO space //#define IOB_TGTID_IO 0 //#define BDEV_TGTID_IO 1 //#define CDMA_TGTID_IO 2 //#define FBUF_TGTID_IO 3 //#define IOB_SRCID_IO 0 //#define BDEV_SRCID_IO 1 //#define CDMA_SRCID_IO 2 //////////////////////////////////////////////////////// // TGTID & SRCID definition in coherence space // For all components: global TGTID = global SRCID = cluster_index // For MEMC : local SRCID = local TGTID = nprocs // For processors : local SRCID = local TGTID = PROC_ID /////////////// // VCI format #define cell_width 4 #define address_width 32 // 40 à terme #define address_width_io 32 #define plen_width 8 #define error_width 1 #define clen_width 1 #define rflag_width 1 #define srcid_width 14 #define pktid_width 4 #define trdid_width 4 #define wrplen_width 1 // cluster index (computed from x,y coordinates) #define cluster(x,y) (y + ymax*x) ///////////////////////////////// int _main(int argc, char *argv[]) { using namespace sc_core; using namespace soclib::caba; using namespace soclib::common; char soft_name[128] = "giet_vm171/soft.elf"; // pathname to binary code char disk_name[128] = "giet_vm171/apps/display/images.raw"; // pathname to the disk image size_t ncycles = 1000000000; // simulated cycles size_t nprocs = 1; // number of processors per cluster bool debug_ok = false; // debug activated size_t from_cycle = 0; // debug start cycle size_t to_cycle = 1000000000; // debug end cycle ////////////// command line arguments ////////////////////// if (argc > 1) { for( int n=1 ; n vci_param; typedef soclib::caba::VciParams vci_param_io; ///////////////////// // Mapping Tables ///////////////////// // direct network MappingTable maptabd(address_width, IntTab(12), IntTab(srcid_width), 0xFFF00000); // maptabd.add(Segment("d_seg_memc", MEMC_BASE, MEMC_SIZE, IntTab(MEMC_TGTID), true)); maptabd.add(Segment("d_seg_memc_user", USER_BASE, USER_SIZE, IntTab(MEMC_TGTID), true)); maptabd.add(Segment("d_seg_memc_kernel", KERNEL_BASE, KERNEL_SIZE, IntTab(MEMC_TGTID), true)); maptabd.add(Segment("d_seg_brom", BROM_BASE, BROM_SIZE, IntTab(BROM_TGTID), true)); // maptabd.add(Segment("d_seg_xicu", XICU_BASE, XICU_SIZE, IntTab(XICU_TGTID), false)); maptabd.add(Segment("d_seg_icu", ICU_BASE, ICU_SIZE, IntTab(ICU_TGTID), false)); maptabd.add(Segment("d_seg_mtty", MTTY_BASE, MTTY_SIZE, IntTab(MTTY_TGTID), false)); //maptabd.add(Segment("d_seg_tim" , TIM_BASE , TIM_SIZE , IntTab(MEMC_TGTID ), false)); maptabd.add(Segment("d_seg_bdev", BDEV_BASE, BDEV_SIZE, IntTab(BDEV_TGTID ), false)); maptabd.add(Segment("d_seg_cdma", CDMA_BASE, CDMA_SIZE, IntTab(CDMA_TGTID ), false)); maptabd.add(Segment("d_seg_fbuf", FBUF_BASE, FBUF_SIZE, IntTab(FBUF_TGTID ), false)); std::cout << maptabd << std::endl; // coherence network MappingTable maptabc(address_width, IntTab(12), IntTab(srcid_width), 0xF0000000); std::ostringstream sm; sm << "c_seg_memc_0"; // maptabc.add(Segment(sm.str(), MEMC_BASE, MEMC_SIZE, IntTab(nprocs), false)); maptabc.add(Segment(sm.str(), USER_BASE, USER_SIZE, IntTab(nprocs), false)); maptabc.add(Segment("c_seb_memc_kernel", KERNEL_BASE, KERNEL_SIZE, IntTab(nprocs), false)); // the segment base and size will be modified // when the segmentation of the coherence space will be simplified std::ostringstream sr; sr << "c_seg_brom_0"; maptabc.add(Segment(sr.str(), BROM_BASE, BROM_SIZE, IntTab(nprocs), false)); sc_uint avoid_collision = 0; for ( size_t p = 0 ; p < nprocs ; p++) { sc_uint base = USER_SIZE + KERNEL_SIZE + (p*0x100000); // the following test is to avoid a collision between the c_seg_brom segment // and a c_seg_proc segment (all segments base addresses being multiple of 1Mbytes) if ( base == BROM_BASE ) avoid_collision = 0x100000; std::ostringstream sp; sp << "c_seg_proc_" << p; maptabc.add(Segment(sp.str(), base + avoid_collision, 0x20, IntTab(p), false, true, IntTab(p))); // the two last arguments will be removed // when the segmentation of the coherence space will be simplified } std::cout << maptabc << std::endl; // external network MappingTable maptabx(address_width, IntTab(1), IntTab(srcid_width), 0xF0000000); // maptabx.add(Segment("seg_memc_x", MEMC_BASE, MEMC_SIZE, IntTab(0), false)); maptabx.add(Segment("seg_memc_x_user", USER_BASE, USER_SIZE, IntTab(0), false)); maptabx.add(Segment("seg_memc_x_kernel", KERNEL_BASE, KERNEL_SIZE, IntTab(0), false)); std::cout << maptabx << std::endl; //////////////////// // Signals /////////////////// sc_clock signal_clk("clk"); sc_signal signal_resetn("resetn"); sc_signal signal_false; // IRQ signals (one signal per proc) sc_signal* signal_proc_it = alloc_elems >("signal_proc_it", nprocs); sc_signal* signal_irq_mtty = alloc_elems >("signal_irq_mtty", NB_TTYS); sc_signal signal_irq_bdev; sc_signal signal_irq_cdma; sc_signal empty; // Direct VCI signals VciSignals* signal_vci_ini_d_proc = alloc_elems >("signal_vci_ini_d_proc", nprocs); VciSignals signal_vci_ini_d_bdev("signal_vci_ini_d_bdev"); VciSignals signal_vci_ini_d_cdma("signal_vci_ini_d_cdma"); VciSignals signal_vci_tgt_d_memc("signal_vci_tgt_d_memc"); VciSignals signal_vci_tgt_d_brom("signal_vci_tgt_d_brom"); // VciSignals signal_vci_tgt_d_xicu("signal_vci_tgt_d_xicu"); VciSignals signal_vci_tgt_d_icu("signal_vci_tgt_d_icu"); VciSignals signal_vci_tgt_d_mtty("signal_vci_tgt_d_mtty"); VciSignals signal_vci_tgt_d_bdev("signal_vci_tgt_d_bdev"); VciSignals signal_vci_tgt_d_cmda("signal_vci_tgt_d_cmda"); VciSignals signal_vci_tgt_d_fbuf("signal_vci_tgt_d_fbuf"); // Coherence VCI signals VciSignals* signal_vci_ini_c_proc = alloc_elems >("signal_vci_ini_c_proc", nprocs); VciSignals* signal_vci_tgt_c_proc = alloc_elems >("signal_vci_tgt_c_proc", nprocs); VciSignals signal_vci_ini_c_memc("signal_vci_ini_c_memc"); VciSignals signal_vci_tgt_c_memc("signal_vci_tgt_c_memc"); // Xternal network VCI signals VciSignals signal_vci_tgt_x_xram("signal_vci_tgt_x_xram"); VciSignals signal_vci_ini_x_memc("signal_vci_ini_x_memc"); //////////////////////////// // Components //////////////////////////// typedef soclib::common::GdbServer proc_iss; soclib::common::Loader loader(soft_name); proc_iss::set_loader(loader); // External RAM VciSimpleRam xram( "xram", IntTab(0), maptabx, loader); // External network VciVgmn xnoc( "xnoc", maptabx, 1, // initiators 1, // targets 2, 2); // Direct network VciVgmn dnoc( "dnoc", maptabd, nprocs+2, // nb of initiators 7, // nb of targets 2, 2); //latence, FIFO depth // Coherence network VciVgmn cnoc( "cnoc", maptabc, nprocs+1, nprocs+1, 2, 2); // Peripherals : TTY, Frame Buffer, Block Device, Boot ROM, & DMA VciSimpleRam brom( "brom", IntTab(BROM_TGTID), maptabd, loader); VciMultiTty mtty( "mtty", IntTab(MTTY_TGTID), maptabd, "tty0","tty1","tty2","tty3", "tty4","tty5","tty6","tty7", "tty8", NULL); VciFrameBuffer fbuf( "fbuf", IntTab(FBUF_TGTID), maptabd, FBUF_XSIZE, FBUF_YSIZE); VciBlockDeviceTsarV4 bdev( // VciBlockDevice bdev( "bdev", maptabd, IntTab(BDEV_SRCID), // SRCID_D IntTab(BDEV_TGTID), // TGTID_D disk_name, SECTOR_SIZE, 32); // burst size VciDmaTsarV2 cdma( "cdma", maptabd, IntTab(CDMA_SRCID), // SRCID_D IntTab(CDMA_TGTID), // TGTID_D 64); // processors (nprocs per cluster) VciCcVCacheWrapperV4 *proc[nprocs]; for ( size_t p = 0 ; p < nprocs ; p++ ) { std::ostringstream sp; sp << "proc_" << "_" << p; proc[p] = new VciCcVCacheWrapperV4( sp.str().c_str(), p, maptabd, maptabc, IntTab(PROC_SRCID+p), // SRCID_D IntTab(PROC_SRCID+p), // SRCID_C IntTab(PROC_SRCID+p), // TGTID_C 4,4, // itlb ways, sets 4,4, // dtlb ways, sets 4,64,16,4,64,16, // Icache and Dcache sizes (way, set, words) 4,8, 20000000, from_cycle, false ); } // memory caches (one per cluster) VciMemCacheV4 memc( sm.str().c_str(), maptabd, maptabc, maptabx, IntTab(0), // SRCID_X IntTab(nprocs), // SRCID_C IntTab(MEMC_TGTID), // TGTID_D IntTab(nprocs), // TGTID_C 16,256,16, // CACHE SIZE 4096, // HEAP SIZE 4,4, // TRANSACTION and UPDATE TAB lines from_cycle, debug_ok ); /* // XICU (one per cluster) VciXicu xicu( "vci_xicu", maptabd, IntTab(XICU_TGTID), // TGTID_D nprocs, // number of TIMERS NB_TTYS, // number of hard IRQs nprocs+1, // number of soft IRQs nprocs); // number of output IRQ lines */ // ICU /* VciIcu icu( "vci_icu", IntTab(ICU_TGTID), // TGTID_D maptabd, NB_TTYS + 2 // number of hard IRQs ); */ VciMultiIcu *icu; icu = new VciMultiIcu("icu", IntTab(ICU_TGTID), maptabd, 32, // number of irq in 1); //NB_PROCS number of irq out std::cout << "all components created" << std::endl; /////////////////////////////////////////////////////////////// // Net-list /////////////////////////////////////////////////////////////// // External Ram (one instance) xram.p_clk (signal_clk); xram.p_resetn (signal_resetn); xram.p_vci (signal_vci_tgt_x_xram); // External Network (one instance) xnoc.p_clk (signal_clk); xnoc.p_resetn (signal_resetn); xnoc.p_to_target[0] (signal_vci_tgt_x_xram); xnoc.p_to_initiator[0] (signal_vci_ini_x_memc); // Direct Network (one instance) dnoc.p_clk (signal_clk); dnoc.p_resetn (signal_resetn); dnoc.p_to_target[MEMC_TGTID] (signal_vci_tgt_d_memc); dnoc.p_to_target[BROM_TGTID] (signal_vci_tgt_d_brom); // dnoc.p_to_target[XICU_TGTID] (signal_vci_tgt_d_xicu); dnoc.p_to_target[ICU_TGTID] (signal_vci_tgt_d_icu); dnoc.p_to_target[MTTY_TGTID] (signal_vci_tgt_d_mtty); dnoc.p_to_target[BDEV_TGTID] (signal_vci_tgt_d_bdev); dnoc.p_to_target[CDMA_TGTID] (signal_vci_tgt_d_cmda); dnoc.p_to_target[FBUF_TGTID] (signal_vci_tgt_d_fbuf); dnoc.p_to_initiator[BDEV_SRCID] (signal_vci_ini_d_bdev); dnoc.p_to_initiator[CDMA_SRCID] (signal_vci_ini_d_cdma); // Coherence Network (one instance) cnoc.p_clk (signal_clk); cnoc.p_resetn (signal_resetn); cnoc.p_to_initiator[nprocs] (signal_vci_ini_c_memc); cnoc.p_to_target[nprocs] (signal_vci_tgt_c_memc); // Processors for ( size_t p = 0 ; p < nprocs ; p++ ) { dnoc.p_to_initiator[p] (signal_vci_ini_d_proc[p]); cnoc.p_to_initiator[p] (signal_vci_ini_c_proc[p]); cnoc.p_to_target[p] (signal_vci_tgt_c_proc[p]); proc[p]->p_clk (signal_clk); proc[p]->p_resetn (signal_resetn); proc[p]->p_vci_ini_d (signal_vci_ini_d_proc[p]); proc[p]->p_vci_ini_c (signal_vci_ini_c_proc[p]); proc[p]->p_vci_tgt_c (signal_vci_tgt_c_proc[p]); proc[p]->p_irq[0] (signal_proc_it[p]); for ( size_t j = 1 ; j < 6 ; j++ ) { proc[p]->p_irq[j] (signal_false); } } /* // XICU xicu.p_clk (signal_clk); xicu.p_resetn (signal_resetn); xicu.p_vci (signal_vci_tgt_d_xicu); for ( size_t p = 0 ; p < nprocs ; p++ ) { xicu.p_irq[p] (signal_proc_it[p]); } for(size_t i=0 ; ip_clk (signal_clk); icu->p_resetn (signal_resetn); icu->p_vci (signal_vci_tgt_d_icu); icu->p_irq_out[0] (signal_proc_it[0]); for (size_t i = 0 ; i < 32 ; i++ ) { // if ( i < NB_TIMERS ) icu->p_irq_in[i] (signal_irq_tim[i]); if ( i < 8 ) icu->p_irq_in[i] (signal_false); else if ( i == 8) icu->p_irq_in[i] (signal_irq_cdma); else if ( i < 16 ) icu->p_irq_in[i] (signal_false); else if ( i < (16 + NB_TTYS) ) icu->p_irq_in[i] (signal_irq_mtty[i-16]); else if ( i < 31 ) icu->p_irq_in[i] (signal_false); else icu->p_irq_in[i] (signal_irq_bdev); } // MEMC memc.p_clk (signal_clk); memc.p_resetn (signal_resetn); memc.p_vci_tgt (signal_vci_tgt_d_memc); memc.p_vci_ini (signal_vci_ini_c_memc); memc.p_vci_tgt_cleanup (signal_vci_tgt_c_memc); memc.p_vci_ixr (signal_vci_ini_x_memc); brom.p_clk (signal_clk); brom.p_resetn (signal_resetn); brom.p_vci (signal_vci_tgt_d_brom); mtty.p_clk (signal_clk); mtty.p_resetn (signal_resetn); mtty.p_vci (signal_vci_tgt_d_mtty); for(size_t i=0 ; i from_cycle) && (i < to_cycle) ) { std::cout << std::dec << "*************** cycle " << i << " ***********************" << std::endl; proc[0]->print_trace(); std::cout << std::endl; memc.print_trace(); std::cout << std::endl; icu->print_trace(); std::cout << std::endl; // bdev.print_trace(); // std::cout << std::endl; xram.print_trace(); std::cout << std::endl; } } std::cout << "Hit ENTER to end simulation" << std::endl; std::cin.getline(buf,1); // for ( size_t p = 0 ; p < nprocs ; p++ ) // proc[p]->print_stats(); return EXIT_SUCCESS; } int sc_main (int argc, char *argv[]) { try { return _main(argc, argv); } catch (std::exception &e) { std::cout << e.what() << std::endl; } catch (...) { std::cout << "Unknown exception occured" << std::endl; throw; } return 1; }