/* -*- c++ -*- * * File : dspin_router_tsar.cpp * Copyright (c) UPMC, Lip6 * Authors : Alain Greiner, Abbas Sheibanyrad, Ivan Miro, Zhen Zhang * * SOCLIB_LGPL_HEADER_BEGIN * * This file is part of SoCLib, GNU LGPLv2.1. * * SoCLib is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; version 2.1 of the License. * * SoCLib is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with SoCLib; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA * * SOCLIB_LGPL_HEADER_END * */ //////////////////////////////////////////////////////////////////////////////// // This component implements a variant of the standard (SocLib) DSPIN router: // The routing function has been modified to handle the special case of // cluster_iob0 (containing component IOB0) and cluster_iob1 (containing // component IOB1). In those two cluster, the response router must decode // both the SRCID global bits AND the SRCID local bits to distinguish // between the IOB and MEMC initiators. // This component contains the following modifications: // - 4 extra constructor arguments, // - 6 new member variables // - a modified routing function //////////////////////////////////////////////////////////////////////////////// #include "../include/dspin_router_tsar.h" namespace soclib { namespace caba { using namespace soclib::common; using namespace soclib::caba; #define tmpl(x) template x DspinRouterTsar //////////////////////////////////////////////// // constructor //////////////////////////////////////////////// tmpl(/**/)::DspinRouterTsar( sc_module_name name, const size_t x, // x coordinate const size_t y, // y cordinate const size_t x_width, // x field width in first flit const size_t y_width, // y field width in first flit const size_t in_fifo_depth, // input fifo depth const size_t out_fifo_depth, // output fifo depth const size_t cluster_iob0, // cluster containing IOB0 const size_t cluster_iob1, // cluster containing IOB0 const size_t l_width, // local field width in first flit const size_t iob_local_id ) // IOB local index : soclib::caba::BaseModule(name), p_clk( "p_clk" ), p_resetn( "p_resetn" ), p_in( alloc_elems >("p_in", 5) ), p_out( alloc_elems >("p_out", 5) ), r_alloc_out( alloc_elems >("r_alloc_out", 5)), r_index_out( soclib::common::alloc_elems >("r_index_out", 5)), r_fsm_in( alloc_elems >("r_fsm_in", 5)), r_index_in( alloc_elems >("r_index_in", 5)), m_local_x( x ), m_local_y( y ), m_x_width( x_width ), m_x_shift( flit_width - x_width ), m_x_mask( (0x1 << x_width) - 1 ), m_y_width( y_width ), m_y_shift( flit_width - x_width - y_width ), m_y_mask( (0x1 << y_width) - 1 ), m_l_width( l_width ), m_l_shift( flit_width - x_width - y_width - l_width ), m_l_mask( (0x1 << l_width) - 1 ), m_is_iob0( cluster_iob0 == ((x<*) malloc(sizeof(GenericFifo)*5); r_fifo_out = (GenericFifo*) malloc(sizeof(GenericFifo)*5); for( size_t i = 0 ; i < 5 ; i++ ) { std::ostringstream stri; stri << "r_in_fifo_" << i; new(&r_fifo_in[i]) GenericFifo(stri.str(), in_fifo_depth); std::ostringstream stro; stro << "r_out_fifo_" << i; new(&r_fifo_out[i]) GenericFifo(stro.str(), out_fifo_depth); } } // end constructor ////////////////////////////////////////////////// tmpl(size_t)::route( sc_uint data ) { size_t xdest = (size_t)(data >> m_x_shift) & m_x_mask; size_t ydest = (size_t)(data >> m_y_shift) & m_y_mask; size_t ldest = (size_t)(data >> m_l_shift) & m_l_mask; if (xdest < m_local_x ) return DSPIN_WEST; else if (xdest > m_local_x ) return DSPIN_EAST; else if (ydest < m_local_y ) return DSPIN_SOUTH; else if (ydest > m_local_y ) return DSPIN_NORTH; else // handling IOB0 & IOB1 special cases { if ((m_is_iob0) and (ldest > 0xA)) return DSPIN_WEST; else if ((m_is_iob1) and (ldest > 0xA)) return DSPIN_EAST; else return DSPIN_LOCAL; } } // end route() ///////////////////////// tmpl(void)::print_trace() { const char* port_name[] = {"NORTH","SOUTH","EAST ","WEST ","LOCAL"}; std::cout << "DSPIN_ROUTER " << name() << std::hex; for ( size_t out=0 ; out<5 ; out++) // loop on output ports { if ( r_alloc_out[out].read() ) { int in = r_index_out[out]; std::cout << " / " << port_name[in] << " -> " << port_name[out] ; } } std::cout << std::endl; } //////////////////////// tmpl(void)::transition() { // Long wires connecting input and output ports size_t req_in[5]; // input ports -> output ports size_t get_out[5]; // output ports -> input ports bool put_in[5]; // input ports -> output ports internal_flit_t flit_in[5]; // input ports -> output ports // control signals for the input fifos bool fifo_in_write[5]; bool fifo_in_read[5]; internal_flit_t fifo_in_wdata[5]; // control signals for the output fifos bool fifo_out_write[5]; bool fifo_out_read[5]; internal_flit_t fifo_out_wdata[5]; // Reset if ( p_resetn == false ) { for(size_t i = 0 ; i < 5 ; i++) { r_alloc_out[i] = false; r_index_out[i] = 0; r_index_in[i] = 0; r_fsm_in[i] = INFSM_IDLE; r_fifo_in[i].init(); r_fifo_out[i].init(); } return; } // fifos signals default values for(size_t i = 0 ; i < 5 ; i++) { fifo_in_read[i] = false; fifo_in_write[i] = p_in[i].write.read(); fifo_in_wdata[i].data = p_in[i].data.read(); fifo_in_wdata[i].eop = p_in[i].eop.read(); fifo_out_read[i] = p_out[i].read.read(); fifo_out_write[i] = false; } // loop on the output ports: // compute get_out[j] depending on the output port state // and combining fifo_out[j].wok and r_alloc_out[j] for ( size_t j = 0 ; j < 5 ; j++ ) { if( r_alloc_out[j].read() and (r_fifo_out[j].wok()) ) { get_out[j] = r_index_out[j].read(); } else { get_out[j] = 0xFFFFFFFF; } } // loop on the input ports : // The port state is defined by r_fsm_in[i], r_index_in[i] // The req_in[i] computation implements the X-FIRST algorithm. // Both put_in[i] and req_in[i] depend on the input port state. for ( size_t i = 0 ; i < 5 ; i++ ) { switch ( r_fsm_in[i].read() ) { case INFSM_IDLE: // no output port allocated { put_in[i] = false; if ( r_fifo_in[i].rok() ) // packet available in input fifo { req_in[i] = route( r_fifo_in[i].read().data ); r_index_in[i] = req_in[i]; r_fsm_in[i] = INFSM_REQ; } else { req_in[i] = 0xFFFFFFFF; // no request } break; } case INFSM_REQ: // waiting output port allocation { flit_in[i] = r_fifo_in[i].read(); put_in[i] = r_fifo_in[i].rok(); req_in[i] = r_index_in[i]; if ( get_out[r_index_in[i].read()] == i ) // first flit transfered { if ( r_fifo_in[i].read().eop ) r_fsm_in[i] = INFSM_IDLE; else r_fsm_in[i] = INFSM_ALLOC; } break; } case INFSM_ALLOC: // output port allocated { flit_in[i] = r_fifo_in[i].read(); put_in[i] = r_fifo_in[i].rok(); req_in[i] = 0xFFFFFFFF; // no request if ( r_fifo_in[i].read().eop and r_fifo_in[i].rok() and (get_out[r_index_in[i].read()] == i) ) // last flit transfered { r_fsm_in[i] = INFSM_IDLE; } break; } } // end switch } // end for input ports // loop on the output ports : // The r_alloc_out[j] and r_index_out[j] computation // implements the round-robin allocation policy. // These two registers implement a 10 states FSM. for( size_t j = 0 ; j < 5 ; j++ ) { if( not r_alloc_out[j].read() ) // not allocated: possible new allocation { for( size_t k = r_index_out[j].read() + 1 ; k < (r_index_out[j] + 6) ; k++) { size_t i = k % 5; if( req_in[i] == j ) { r_alloc_out[j] = true; r_index_out[j] = i; break; } } // end loop on input ports } else // allocated: possible desallocation { if ( flit_in[r_index_out[j]].eop and r_fifo_out[j].wok() and put_in[r_index_out[j]] ) { r_alloc_out[j] = false; } } } // end loop on output ports // loop on input ports : // fifo_in_read[i] computation (get data from fifo_in[i] // (computed here because it depends on get_out[]) for( size_t i = 0 ; i < 5 ; i++ ) { if ( r_fsm_in[i].read() != INFSM_IDLE ) { fifo_in_read[i] = (get_out[r_index_in[i].read()] == i); } else { fifo_in_read[i] = false; } } // end loop on input ports // loop on the output ports : // The fifo_out_write[j] and fifo_out_wdata[j] computation // implements the output port mux. for( size_t j = 0 ; j < 5 ; j++ ) { if( r_alloc_out[j] ) // output port allocated { fifo_out_write[j] = put_in[r_index_out[j]]; fifo_out_wdata[j] = flit_in[r_index_out[j]]; } } // end loop on the output ports // FIFOS update for(size_t i = 0 ; i < 5 ; i++) { r_fifo_in[i].update(fifo_in_read[i], fifo_in_write[i], fifo_in_wdata[i]); r_fifo_out[i].update(fifo_out_read[i], fifo_out_write[i], fifo_out_wdata[i]); } } // end transition //////////////////////////////// // genMoore //////////////////////////////// tmpl(void)::genMoore() { for(size_t i = 0 ; i < 5 ; i++) { // input ports : READ signals p_in[i].read = r_fifo_in[i].wok(); // output ports : DATA & WRITE signals p_out[i].data = r_fifo_out[i].read().data; p_out[i].eop = r_fifo_out[i].read().eop; p_out[i].write = r_fifo_out[i].rok(); } } // end genMoore }} // end namespace // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4