/* i*- c++ -*-C * File : vci_cc_vcache_wrapper_v4.cpp * Copyright (c) UPMC, Lip6, SoC * Authors : Alain GREINER, Yang GAO * * SOCLIB_LGPL_HEADER_BEGIN * * This file is part of SoCLib, GNU LGPLv2.1. * * SoCLib is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; version 2.1 of the License. * * SoCLib is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with SoCLib; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA * * SOCLIB_LGPL_HEADER_END */ #include #include "arithmetics.h" #include "../include/vci_cc_vcache_wrapper_v4.h" #define DEBUG_DCACHE 1 #define DEBUG_ICACHE 1 #define DEBUG_CLEANUP 0 namespace soclib { namespace caba { namespace { const char *icache_fsm_state_str[] = { "ICACHE_IDLE", "ICACHE_XTN_TLB_FLUSH", "ICACHE_XTN_CACHE_FLUSH", "ICACHE_XTN_TLB_INVAL", "ICACHE_XTN_CACHE_INVAL_VA", "ICACHE_XTN_CACHE_INVAL_PA", "ICACHE_XTN_CACHE_INVAL_GO", "ICACHE_TLB_WAIT", "ICACHE_MISS_VICTIM", "ICACHE_MISS_INVAL", "ICACHE_MISS_WAIT", "ICACHE_MISS_UPDT", "ICACHE_UNC_WAIT", "ICACHE_CC_CHECK", "ICACHE_CC_INVAL", "ICACHE_CC_UPDT", }; const char *dcache_fsm_state_str[] = { "DCACHE_IDLE", "DCACHE_TLB_MISS", "DCACHE_TLB_PTE1_GET", "DCACHE_TLB_PTE1_SELECT", "DCACHE_TLB_PTE1_UPDT", "DCACHE_TLB_PTE2_GET", "DCACHE_TLB_PTE2_SELECT", "DCACHE_TLB_PTE2_UPDT", "DCACHE_TLB_LR_UPDT", "DCACHE_TLB_LR_WAIT", "DCACHE_TLB_RETURN", "DCACHE_XTN_SWITCH", "DCACHE_XTN_SYNC", "DCACHE_XTN_IC_INVAL_VA", "DCACHE_XTN_IC_FLUSH", "DCACHE_XTN_IC_INVAL_PA", "DCACHE_XTN_IT_INVAL", "DCACHE_XTN_DC_FLUSH", "DCACHE_XTN_DC_INVAL_VA", "DCACHE_XTN_DC_INVAL_PA", "DCACHE_XTN_DC_INVAL_END", "DCACHE_XTN_DC_INVAL_GO", "DCACHE_XTN_DT_INVAL", "DCACHE_DIRTY_PTE_GET", "DCACHE_DIRTY_WAIT", "DCACHE_MISS_VICTIM", "DCACHE_MISS_INVAL", "DCACHE_MISS_WAIT", "DCACHE_MISS_UPDT", "DCACHE_UNC_WAIT", "DCACHE_LL_WAIT", "DCACHE_SC_WAIT", "DCACHE_CC_CHECK", "DCACHE_CC_INVAL", "DCACHE_CC_UPDT", "DCACHE_INVAL_TLB_SCAN", }; const char *cmd_fsm_state_str[] = { "CMD_IDLE", "CMD_INS_MISS", "CMD_INS_UNC", "CMD_DATA_MISS", "CMD_DATA_UNC", "CMD_DATA_WRITE", "CMD_DATA_LL", "CMD_DATA_SC", "CMD_DATA_CAS", }; const char *rsp_fsm_state_str[] = { "RSP_IDLE", "RSP_INS_MISS", "RSP_INS_UNC", "RSP_DATA_MISS", "RSP_DATA_UNC", "RSP_DATA_LL", "RSP_DATA_WRITE", }; const char *cleanup_fsm_state_str[] = { "CLEANUP_DATA_IDLE", "CLEANUP_DATA_GO", "CLEANUP_INS_IDLE", "CLEANUP_INS_GO", }; const char *tgt_fsm_state_str[] = { "TGT_IDLE", "TGT_UPDT_WORD", "TGT_UPDT_DATA", "TGT_REQ_BROADCAST", "TGT_REQ_ICACHE", "TGT_REQ_DCACHE", "TGT_RSP_BROADCAST", "TGT_RSP_ICACHE", "TGT_RSP_DCACHE", }; } #define tmpl(...) template __VA_ARGS__ VciCcVCacheWrapperV4 using soclib::common::uint32_log2; ///////////////////////////////// tmpl(/**/)::VciCcVCacheWrapperV4( sc_module_name name, int proc_id, const soclib::common::MappingTable &mtd, const soclib::common::MappingTable &mtc, const soclib::common::IntTab &initiator_index_d, const soclib::common::IntTab &initiator_index_c, const soclib::common::IntTab &target_index_c, size_t itlb_ways, size_t itlb_sets, size_t dtlb_ways, size_t dtlb_sets, size_t icache_ways, size_t icache_sets, size_t icache_words, size_t dcache_ways, size_t dcache_sets, size_t dcache_words, size_t wbuf_nlines, size_t wbuf_nwords, size_t x_width, size_t y_width, uint32_t memory_cache_local_id, uint32_t max_frozen_cycles, uint32_t debug_start_cycle, bool debug_ok) : soclib::caba::BaseModule(name), p_clk("clk"), p_resetn("resetn"), p_vci_ini_d("vci_ini_d"), p_vci_ini_c("vci_ini_c"), p_vci_tgt_c("vci_tgt_d"), m_cacheability_table(mtd.getCacheabilityTable()), m_segment(mtc.getSegment(target_index_c)), m_srcid_d(mtd.indexForId(initiator_index_d)), m_srcid_c(mtc.indexForId(initiator_index_c)), m_itlb_ways(itlb_ways), m_itlb_sets(itlb_sets), m_dtlb_ways(dtlb_ways), m_dtlb_sets(dtlb_sets), m_icache_ways(icache_ways), m_icache_sets(icache_sets), m_icache_yzmask((~0)<<(uint32_log2(icache_words) + 2)), m_icache_words(icache_words), m_dcache_ways(dcache_ways), m_dcache_sets(dcache_sets), m_dcache_yzmask((~0)<<(uint32_log2(dcache_words) + 2)), m_dcache_words(dcache_words), m_x_width(x_width), m_y_width(y_width), m_memory_cache_local_id(memory_cache_local_id), m_proc_id(proc_id), m_max_frozen_cycles(max_frozen_cycles), m_paddr_nbits(vci_param::N), m_debug_start_cycle(debug_start_cycle), m_debug_ok(debug_ok), r_mmu_ptpr("r_mmu_ptpr"), r_mmu_mode("r_mmu_mode"), r_mmu_word_lo("r_mmu_word_lo"), r_mmu_word_hi("r_mmu_word_hi"), r_mmu_ibvar("r_mmu_ibvar"), r_mmu_dbvar("r_mmu_dbvar"), r_mmu_ietr("r_mmu_ietr"), r_mmu_detr("r_mmu_detr"), r_icache_fsm("r_icache_fsm"), r_icache_fsm_save("r_icache_fsm_save"), r_icache_vci_paddr("r_icache_vci_paddr"), r_icache_vaddr_save("r_icache_vaddr_save"), r_icache_miss_way("r_icache_miss_way"), r_icache_miss_set("r_icache_miss_set"), r_icache_miss_word("r_icache_miss_word"), r_icache_miss_inval("r_icache_miss_inval"), r_icache_cc_way("r_icache_cc_way"), r_icache_cc_set("r_icache_cc_set"), r_icache_cc_word("r_icache_cc_word"), r_icache_flush_count("r_icache_flush_count"), r_icache_miss_req("r_icache_miss_req"), r_icache_unc_req("r_icache_unc_req"), r_icache_tlb_miss_req("r_icache_tlb_read_req"), r_icache_tlb_rsp_error("r_icache_tlb_rsp_error"), r_icache_cleanup_req("r_icache_cleanup_req"), r_icache_cleanup_line("r_icache_cleanup_line"), r_dcache_fsm("r_dcache_fsm"), r_dcache_fsm_cc_save("r_dcache_fsm_cc_save"), r_dcache_fsm_scan_save("r_dcache_fsm_scan_save"), r_dcache_p0_valid("r_dcache_p0_valid"), r_dcache_p0_vaddr("r_dcache_p0_vaddr"), r_dcache_p0_wdata("r_dcache_p0_wdata"), r_dcache_p0_be("r_dcache_p0_be"), r_dcache_p0_paddr("r_dcache_p0_paddr"), r_dcache_p0_cacheable("r_dcache_p0_cacheable"), r_dcache_p1_valid("r_dcache_p1_valid"), r_dcache_p1_wdata("r_dcache_p1_wdata"), r_dcache_p1_be("r_dcache_p1_be"), r_dcache_p1_paddr("r_dcache_p1_paddr"), r_dcache_p1_cache_way("r_dcache_p1_cache_way"), r_dcache_p1_cache_set("r_dcache_p1_cache_set"), r_dcache_p1_cache_word("r_dcache_p1_word_save"), r_dcache_dirty_paddr("r_dcache_dirty_paddr"), r_dcache_dirty_way("r_dcache_dirty_way"), r_dcache_dirty_set("r_dcache_dirty_set"), r_dcache_vci_paddr("r_dcache_vci_paddr"), r_dcache_vci_miss_req("r_dcache_vci_miss_req"), r_dcache_vci_unc_req("r_dcache_vci_unc_req"), r_dcache_vci_unc_be("r_dcache_vci_unc_be"), r_dcache_vci_cas_req("r_dcache_vci_cas_req"), r_dcache_vci_cas_old("r_dcache_vci_cas_old"), r_dcache_vci_cas_new("r_dcache_vci_cas_new"), r_dcache_vci_ll_req("r_dcache_vci_ll_req"), r_dcache_vci_sc_req("r_dcache_vci_sc_req"), r_dcache_vci_sc_data("r_dcache_vci_sc_data"), r_dcache_xtn_way("r_dcache_xtn_way"), r_dcache_xtn_set("r_dcache_xtn_set"), r_dcache_pending_unc_write("r_dcache_pending_unc_write"), r_dcache_miss_type("r_dcache_miss_type"), r_dcache_miss_word("r_dcache_miss_word"), r_dcache_miss_way("r_dcache_miss_way"), r_dcache_miss_set("r_dcache_miss_set"), r_dcache_miss_inval("r_dcache_miss_inval"), r_dcache_cc_way("r_dcache_cc_way"), r_dcache_cc_set("r_dcache_cc_set"), r_dcache_cc_word("r_dcache_cc_word"), r_dcache_flush_count("r_dcache_flush_count"), r_dcache_tlb_vaddr("r_dcache_tlb_vaddr"), r_dcache_tlb_ins("r_dcache_tlb_ins"), r_dcache_tlb_pte_flags("r_dcache_tlb_pte_flags"), r_dcache_tlb_pte_ppn("r_dcache_tlb_pte_ppn"), r_dcache_tlb_cache_way("r_dcache_tlb_cache_way"), r_dcache_tlb_cache_set("r_dcache_tlb_cache_set"), r_dcache_tlb_cache_word("r_dcache_tlb_cache_word"), r_dcache_tlb_way("r_dcache_tlb_way"), r_dcache_tlb_set("r_dcache_tlb_set"), r_dcache_tlb_inval_line("r_dcache_tlb_inval_line"), r_dcache_tlb_inval_count("r_dcache_tlb_inval_count"), r_dcache_xtn_req("r_dcache_xtn_req"), r_dcache_xtn_opcode("r_dcache_xtn_opcode"), r_dcache_cleanup_req("r_dcache_cleanup_req"), r_dcache_cleanup_line("r_dcache_cleanup_line"), r_vci_cmd_fsm("r_vci_cmd_fsm"), r_vci_cmd_min("r_vci_cmd_min"), r_vci_cmd_max("r_vci_cmd_max"), r_vci_cmd_cpt("r_vci_cmd_cpt"), r_vci_cmd_imiss_prio("r_vci_cmd_imiss_prio"), r_vci_rsp_fsm("r_vci_rsp_fsm"), r_vci_rsp_cpt("r_vci_rsp_cpt"), r_vci_rsp_ins_error("r_vci_rsp_ins_error"), r_vci_rsp_data_error("r_vci_rsp_data_error"), r_vci_rsp_fifo_icache("r_vci_rsp_fifo_icache", 2), // 2 words depth r_vci_rsp_fifo_dcache("r_vci_rsp_fifo_dcache", 2), // 2 words depth r_cleanup_fsm("r_cleanup_fsm"), r_cleanup_trdid("r_cleanup_trdid"), r_cleanup_buffer(4), // up to 4 simultaneous cleanups r_tgt_fsm("r_tgt_fsm"), r_tgt_paddr("r_tgt_paddr"), r_tgt_word_count("r_tgt_word_count"), r_tgt_word_min("r_tgt_word_min"), r_tgt_word_max("r_tgt_word_max"), r_tgt_update("r_tgt_update"), r_tgt_update_data("r_tgt_update_data"), r_tgt_srcid("r_tgt_srcid"), r_tgt_pktid("r_tgt_pktid"), r_tgt_trdid("r_tgt_trdid"), r_tgt_icache_req("r_tgt_icache_req"), r_tgt_dcache_req("r_tgt_dcache_req"), r_tgt_icache_rsp("r_tgt_icache_rsp"), r_tgt_dcache_rsp("r_tgt_dcache_rsp"), r_iss(this->name(), proc_id), r_wbuf("wbuf", wbuf_nwords, wbuf_nlines, dcache_words ), r_icache("icache", icache_ways, icache_sets, icache_words), r_dcache("dcache", dcache_ways, dcache_sets, dcache_words), r_itlb("itlb", proc_id, itlb_ways,itlb_sets,vci_param::N), r_dtlb("dtlb", proc_id, dtlb_ways,dtlb_sets,vci_param::N) { assert( ((icache_words*vci_param::B) < (1< 2) and ((1<<(vci_param::T-1)) >= (wbuf_nlines)) and "Need more TRDID bits."); assert( (icache_words == dcache_words) and "icache_words and dcache_words parameters must be equal"); assert( (itlb_sets == dtlb_sets) and "itlb_sets and dtlb_sets parameters must be etqual"); assert( (itlb_ways == dtlb_ways) and "itlb_ways and dtlb_ways parameters must be etqual"); r_mmu_params = (uint32_log2(m_dtlb_ways) << 29) | (uint32_log2(m_dtlb_sets) << 25) | (uint32_log2(m_dcache_ways) << 22) | (uint32_log2(m_dcache_sets) << 18) | (uint32_log2(m_itlb_ways) << 15) | (uint32_log2(m_itlb_sets) << 11) | (uint32_log2(m_icache_ways) << 8) | (uint32_log2(m_icache_sets) << 4) | (uint32_log2(m_icache_words<<2)); r_mmu_release = (uint32_t)(1 << 16) | 0x1; r_tgt_buf = new uint32_t[dcache_words]; r_tgt_be = new vci_be_t[dcache_words]; r_dcache_in_tlb = new bool[dcache_ways*dcache_sets]; r_dcache_contains_ptd = new bool[dcache_ways*dcache_sets]; SC_METHOD(transition); dont_initialize(); sensitive << p_clk.pos(); SC_METHOD(genMoore); dont_initialize(); sensitive << p_clk.neg(); typename iss_t::CacheInfo cache_info; cache_info.has_mmu = true; cache_info.icache_line_size = icache_words*sizeof(uint32_t); cache_info.icache_assoc = icache_ways; cache_info.icache_n_lines = icache_sets; cache_info.dcache_line_size = dcache_words*sizeof(uint32_t); cache_info.dcache_assoc = dcache_ways; cache_info.dcache_n_lines = dcache_sets; r_iss.setCacheInfo(cache_info); } ///////////////////////////////////// tmpl(/**/)::~VciCcVCacheWrapperV4() ///////////////////////////////////// { delete [] r_tgt_be; delete [] r_tgt_buf; delete [] r_dcache_in_tlb; delete [] r_dcache_contains_ptd; } //////////////////////// tmpl(void)::print_cpi() //////////////////////// { std::cout << name() << " CPI = " << (float)m_cpt_total_cycles/(m_cpt_total_cycles - m_cpt_frz_cycles) << std::endl ; } //////////////////////////////////// tmpl(void)::print_trace(size_t mode) //////////////////////////////////// { // b0 : write buffer trace // b1 : write buffer verbose // b2 : dcache trace // b3 : icache trace // b4 : dtlb trace // b5 : itlb trace std::cout << std::dec << "PROC " << name() << std::endl; std::cout << " " << m_ireq << std::endl; std::cout << " " << m_irsp << std::endl; std::cout << " " << m_dreq << std::endl; std::cout << " " << m_drsp << std::endl; std::cout << " " << icache_fsm_state_str[r_icache_fsm.read()] << " | " << dcache_fsm_state_str[r_dcache_fsm.read()] << " | " << cmd_fsm_state_str[r_vci_cmd_fsm.read()] << " | " << rsp_fsm_state_str[r_vci_rsp_fsm.read()] << " | " << tgt_fsm_state_str[r_tgt_fsm.read()] << " | " << cleanup_fsm_state_str[r_cleanup_fsm.read()]; if (r_dcache_p0_valid.read() ) std::cout << " | P1_WRITE"; if (r_dcache_p1_valid.read() ) std::cout << " | P2_WRITE"; std::cout << std::endl; if(mode & 0x01) { r_wbuf.printTrace((mode>>1)&1); } if(mode & 0x04) { std::cout << " Data Cache" << std::endl; r_dcache.printTrace(); } if(mode & 0x08) { std::cout << " Instruction Cache" << std::endl; r_icache.printTrace(); } if(mode & 0x10) { std::cout << " Data TLB" << std::endl; r_dtlb.printTrace(); } if(mode & 0x20) { std::cout << " Instruction TLB" << std::endl; r_itlb.printTrace(); } } ////////////////////////////////////////// tmpl(void)::cache_monitor( paddr_t addr ) ////////////////////////////////////////// { size_t cache_way; size_t cache_set; size_t cache_word; uint32_t cache_rdata; bool cache_hit = r_dcache.read_neutral( addr, &cache_rdata, &cache_way, &cache_set, &cache_word ); bool icache_hit = r_icache.read_neutral( addr, &cache_rdata, &cache_way, &cache_set, &cache_word ); if ( cache_hit != m_debug_previous_hit ) { std::cout << "PROC " << name() << " dcache change at cycle " << std::dec << m_cpt_total_cycles << " for adresse " << std::hex << addr << " / HIT = " << std::dec << cache_hit << std::endl; m_debug_previous_hit = cache_hit; } if ( icache_hit != m_idebug_previous_hit ) { std::cout << "PROC " << name() << " icache change at cycle " << std::dec << m_cpt_total_cycles << " for adresse " << std::hex << addr << " / HIT = " << icache_hit << std::endl; m_idebug_previous_hit = icache_hit; } } /* //////////////////////// tmpl(void)::print_stats() //////////////////////// { float run_cycles = (float)(m_cpt_total_cycles - m_cpt_frz_cycles); std::cout << name() << std::endl << "- CPI = " << (float)m_cpt_total_cycles/run_cycles << std::endl << "- READ RATE = " << (float)m_cpt_read/run_cycles << std::endl << "- WRITE RATE = " << (float)m_cpt_write/run_cycles << std::endl << "- IMISS_RATE = " << (float)m_cpt_ins_miss/m_cpt_ins_read << std::endl << "- DMISS RATE = " << (float)m_cpt_data_miss/(m_cpt_read-m_cpt_unc_read) << std::endl << "- INS MISS COST = " << (float)m_cost_ins_miss_frz/m_cpt_ins_miss << std::endl << "- DATA MISS COST = " << (float)m_cost_data_miss_frz/m_cpt_data_miss << std::endl << "- WRITE COST = " << (float)m_cost_write_frz/m_cpt_write << std::endl << "- UNC COST = " << (float)m_cost_unc_read_frz/m_cpt_unc_read << std::endl << "- UNCACHED READ RATE = " << (float)m_cpt_unc_read/m_cpt_read << std::endl << "- CACHED WRITE RATE = " << (float)m_cpt_write_cached/m_cpt_write << std::endl << "- INS TLB MISS RATE = " << (float)m_cpt_ins_tlb_miss/m_cpt_ins_tlb_read << std::endl << "- DATA TLB MISS RATE = " << (float)m_cpt_data_tlb_miss/m_cpt_data_tlb_read << std::endl << "- ITLB MISS COST = " << (float)m_cost_ins_tlb_miss_frz/m_cpt_ins_tlb_miss << std::endl << "- DTLB MISS COST = " << (float)m_cost_data_tlb_miss_frz/m_cpt_data_tlb_miss << std::endl << "- ITLB UPDATE ACC COST = " << (float)m_cost_ins_tlb_update_acc_frz/m_cpt_ins_tlb_update_acc << std::endl << "- DTLB UPDATE ACC COST = " << (float)m_cost_data_tlb_update_acc_frz/m_cpt_data_tlb_update_acc << std::endl << "- DTLB UPDATE DIRTY COST = " << (float)m_cost_data_tlb_update_dirty_frz/m_cpt_data_tlb_update_dirty << std::endl << "- ITLB HIT IN DCACHE RATE= " << (float)m_cpt_ins_tlb_hit_dcache/m_cpt_ins_tlb_miss << std::endl << "- DTLB HIT IN DCACHE RATE= " << (float)m_cpt_data_tlb_hit_dcache/m_cpt_data_tlb_miss << std::endl << "- DCACHE FROZEN BY ITLB = " << (float)m_cost_ins_tlb_occup_cache_frz/m_cpt_dcache_frz_cycles << std::endl << "- DCACHE FOR TLB % = " << (float)m_cpt_tlb_occup_dcache/(m_dcache_ways*m_dcache_sets) << std::endl << "- NB CC BROADCAST = " << m_cpt_cc_broadcast << std::endl << "- NB CC UPDATE DATA = " << m_cpt_cc_update_data << std::endl << "- NB CC INVAL DATA = " << m_cpt_cc_inval_data << std::endl << "- NB CC INVAL INS = " << m_cpt_cc_inval_ins << std::endl << "- CC BROADCAST COST = " << (float)m_cost_broadcast_frz/m_cpt_cc_broadcast << std::endl << "- CC UPDATE DATA COST = " << (float)m_cost_updt_data_frz/m_cpt_cc_update_data << std::endl << "- CC INVAL DATA COST = " << (float)m_cost_inval_data_frz/m_cpt_cc_inval_data << std::endl << "- CC INVAL INS COST = " << (float)m_cost_inval_ins_frz/m_cpt_cc_inval_ins << std::endl << "- NB CC CLEANUP DATA = " << m_cpt_cc_cleanup_data << std::endl << "- NB CC CLEANUP INS = " << m_cpt_cc_cleanup_ins << std::endl << "- IMISS TRANSACTION = " << (float)m_cost_imiss_transaction/m_cpt_imiss_transaction << std::endl << "- DMISS TRANSACTION = " << (float)m_cost_dmiss_transaction/m_cpt_dmiss_transaction << std::endl << "- UNC TRANSACTION = " << (float)m_cost_unc_transaction/m_cpt_unc_transaction << std::endl << "- WRITE TRANSACTION = " << (float)m_cost_write_transaction/m_cpt_write_transaction << std::endl << "- WRITE LENGTH = " << (float)m_length_write_transaction/m_cpt_write_transaction << std::endl << "- ITLB MISS TRANSACTION = " << (float)m_cost_itlbmiss_transaction/m_cpt_itlbmiss_transaction << std::endl << "- DTLB MISS TRANSACTION = " << (float)m_cost_dtlbmiss_transaction/m_cpt_dtlbmiss_transaction << std::endl; } //////////////////////// tmpl(void)::clear_stats() //////////////////////// { m_cpt_dcache_data_read = 0; m_cpt_dcache_data_write = 0; m_cpt_dcache_dir_read = 0; m_cpt_dcache_dir_write = 0; m_cpt_icache_data_read = 0; m_cpt_icache_data_write = 0; m_cpt_icache_dir_read = 0; m_cpt_icache_dir_write = 0; m_cpt_frz_cycles = 0; m_cpt_dcache_frz_cycles = 0; m_cpt_total_cycles = 0; m_cpt_read = 0; m_cpt_write = 0; m_cpt_data_miss = 0; m_cpt_ins_miss = 0; m_cpt_unc_read = 0; m_cpt_write_cached = 0; m_cpt_ins_read = 0; m_cost_write_frz = 0; m_cost_data_miss_frz = 0; m_cost_unc_read_frz = 0; m_cost_ins_miss_frz = 0; m_cpt_imiss_transaction = 0; m_cpt_dmiss_transaction = 0; m_cpt_unc_transaction = 0; m_cpt_write_transaction = 0; m_cpt_icache_unc_transaction = 0; m_cost_imiss_transaction = 0; m_cost_dmiss_transaction = 0; m_cost_unc_transaction = 0; m_cost_write_transaction = 0; m_cost_icache_unc_transaction = 0; m_length_write_transaction = 0; m_cpt_ins_tlb_read = 0; m_cpt_ins_tlb_miss = 0; m_cpt_ins_tlb_update_acc = 0; m_cpt_data_tlb_read = 0; m_cpt_data_tlb_miss = 0; m_cpt_data_tlb_update_acc = 0; m_cpt_data_tlb_update_dirty = 0; m_cpt_ins_tlb_hit_dcache = 0; m_cpt_data_tlb_hit_dcache = 0; m_cpt_ins_tlb_occup_cache = 0; m_cpt_data_tlb_occup_cache = 0; m_cost_ins_tlb_miss_frz = 0; m_cost_data_tlb_miss_frz = 0; m_cost_ins_tlb_update_acc_frz = 0; m_cost_data_tlb_update_acc_frz = 0; m_cost_data_tlb_update_dirty_frz = 0; m_cost_ins_tlb_occup_cache_frz = 0; m_cost_data_tlb_occup_cache_frz = 0; m_cpt_itlbmiss_transaction = 0; m_cpt_itlb_ll_transaction = 0; m_cpt_itlb_sc_transaction = 0; m_cpt_dtlbmiss_transaction = 0; m_cpt_dtlb_ll_transaction = 0; m_cpt_dtlb_sc_transaction = 0; m_cpt_dtlb_ll_dirty_transaction = 0; m_cpt_dtlb_sc_dirty_transaction = 0; m_cost_itlbmiss_transaction = 0; m_cost_itlb_ll_transaction = 0; m_cost_itlb_sc_transaction = 0; m_cost_dtlbmiss_transaction = 0; m_cost_dtlb_ll_transaction = 0; m_cost_dtlb_sc_transaction = 0; m_cost_dtlb_ll_dirty_transaction = 0; m_cost_dtlb_sc_dirty_transaction = 0; m_cpt_cc_update_data = 0; m_cpt_cc_inval_ins = 0; m_cpt_cc_inval_data = 0; m_cpt_cc_broadcast = 0; m_cost_updt_data_frz = 0; m_cost_inval_ins_frz = 0; m_cost_inval_data_frz = 0; m_cost_broadcast_frz = 0; m_cpt_cc_cleanup_data = 0; m_cpt_cc_cleanup_ins = 0; } */ ///////////////////////// tmpl(void)::transition() ///////////////////////// { #define LLSCLocalTable GenericLLSCLocalTable<8000, 1, paddr_t, vci_trdid_t, vci_data_t> if ( not p_resetn.read() ) { r_iss.reset(); r_wbuf.reset(); r_icache.reset(); r_dcache.reset(); r_itlb.reset(); r_dtlb.reset(); r_dcache_fsm = DCACHE_IDLE; r_icache_fsm = ICACHE_IDLE; r_vci_cmd_fsm = CMD_IDLE; r_vci_rsp_fsm = RSP_IDLE; r_tgt_fsm = TGT_IDLE; r_cleanup_fsm = CLEANUP_DATA_IDLE; // reset dcache directory extension for (size_t i=0 ; i< m_dcache_ways*m_dcache_sets ; i++) { r_dcache_in_tlb[i] = false; r_dcache_contains_ptd[i] = false; } // Response FIFOs and cleanup buffer r_vci_rsp_fifo_icache.init(); r_vci_rsp_fifo_dcache.init(); r_cleanup_buffer.reset(); // ICACHE & DCACHE activated r_mmu_mode = 0x3; // No request from ICACHE FSM to CMD FSM r_icache_miss_req = false; r_icache_unc_req = false; // No request from ICACHE_FSM to DCACHE FSM r_icache_tlb_miss_req = false; // No request from ICACHE_FSM to CLEANUP FSMs r_icache_cleanup_req = false; // No pending write in pipeline r_dcache_p0_valid = false; r_dcache_p1_valid = false; // No request from DCACHE_FSM to CMD_FSM r_dcache_vci_miss_req = false; r_dcache_vci_unc_req = false; r_dcache_vci_cas_req = false; r_dcache_vci_ll_req = false; r_dcache_vci_sc_req = false; // No uncacheable write pending r_dcache_pending_unc_write = false; // No processor XTN request pending r_dcache_xtn_req = false; // No request from DCACHE FSM to CLEANUP FSMs r_dcache_cleanup_req = false; // No request from TGT FSM to ICACHE/DCACHE FSMs r_tgt_icache_req = false; r_tgt_dcache_req = false; // No signalisation of a coherence request matching a pending miss r_icache_miss_inval = false; r_dcache_miss_inval = false; // No signalisation of errors r_vci_rsp_ins_error = false; r_vci_rsp_data_error = false; // Debug variables m_debug_previous_hit = false; m_idebug_previous_hit = false; m_debug_dcache_fsm = false; m_debug_icache_fsm = false; m_debug_cleanup_fsm = false; // activity counters m_cpt_dcache_data_read = 0; m_cpt_dcache_data_write = 0; m_cpt_dcache_dir_read = 0; m_cpt_dcache_dir_write = 0; m_cpt_icache_data_read = 0; m_cpt_icache_data_write = 0; m_cpt_icache_dir_read = 0; m_cpt_icache_dir_write = 0; m_cpt_frz_cycles = 0; m_cpt_total_cycles = 0; m_cpt_stop_simulation = 0; m_cpt_data_miss = 0; m_cpt_ins_miss = 0; m_cpt_unc_read = 0; m_cpt_write_cached = 0; m_cpt_ins_read = 0; m_cost_write_frz = 0; m_cost_data_miss_frz = 0; m_cost_unc_read_frz = 0; m_cost_ins_miss_frz = 0; m_cpt_imiss_transaction = 0; m_cpt_dmiss_transaction = 0; m_cpt_unc_transaction = 0; m_cpt_write_transaction = 0; m_cpt_icache_unc_transaction = 0; m_cost_imiss_transaction = 0; m_cost_dmiss_transaction = 0; m_cost_unc_transaction = 0; m_cost_write_transaction = 0; m_cost_icache_unc_transaction = 0; m_length_write_transaction = 0; m_cpt_ins_tlb_read = 0; m_cpt_ins_tlb_miss = 0; m_cpt_ins_tlb_update_acc = 0; m_cpt_data_tlb_read = 0; m_cpt_data_tlb_miss = 0; m_cpt_data_tlb_update_acc = 0; m_cpt_data_tlb_update_dirty = 0; m_cpt_ins_tlb_hit_dcache = 0; m_cpt_data_tlb_hit_dcache = 0; m_cpt_ins_tlb_occup_cache = 0; m_cpt_data_tlb_occup_cache = 0; m_cost_ins_tlb_miss_frz = 0; m_cost_data_tlb_miss_frz = 0; m_cost_ins_tlb_update_acc_frz = 0; m_cost_data_tlb_update_acc_frz = 0; m_cost_data_tlb_update_dirty_frz = 0; m_cost_ins_tlb_occup_cache_frz = 0; m_cost_data_tlb_occup_cache_frz = 0; m_cpt_ins_tlb_inval = 0; m_cpt_data_tlb_inval = 0; m_cost_ins_tlb_inval_frz = 0; m_cost_data_tlb_inval_frz = 0; m_cpt_cc_broadcast = 0; m_cost_updt_data_frz = 0; m_cost_inval_ins_frz = 0; m_cost_inval_data_frz = 0; m_cost_broadcast_frz = 0; m_cpt_cc_cleanup_data = 0; m_cpt_cc_cleanup_ins = 0; m_cpt_itlbmiss_transaction = 0; m_cpt_itlb_ll_transaction = 0; m_cpt_itlb_sc_transaction = 0; m_cpt_dtlbmiss_transaction = 0; m_cpt_dtlb_ll_transaction = 0; m_cpt_dtlb_sc_transaction = 0; m_cpt_dtlb_ll_dirty_transaction = 0; m_cpt_dtlb_sc_dirty_transaction = 0; m_cost_itlbmiss_transaction = 0; m_cost_itlb_ll_transaction = 0; m_cost_itlb_sc_transaction = 0; m_cost_dtlbmiss_transaction = 0; m_cost_dtlb_ll_transaction = 0; m_cost_dtlb_sc_transaction = 0; m_cost_dtlb_ll_dirty_transaction = 0; m_cost_dtlb_sc_dirty_transaction = 0; /* m_cpt_dcache_frz_cycles = 0; m_cpt_read = 0; m_cpt_write = 0; m_cpt_cc_update_data = 0; m_cpt_cc_inval_ins = 0; m_cpt_cc_inval_data = 0; */ for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_icache [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_dcache [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_cmd [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_rsp [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_tgt [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_cmd_cleanup [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_rsp_cleanup [i] = 0; // init the llsc local registration table r_llsc_table.init(); return; } // Response FIFOs default values bool vci_rsp_fifo_icache_get = false; bool vci_rsp_fifo_icache_put = false; uint32_t vci_rsp_fifo_icache_data = 0; bool vci_rsp_fifo_dcache_get = false; bool vci_rsp_fifo_dcache_put = false; uint32_t vci_rsp_fifo_dcache_data = 0; #ifdef INSTRUMENTATION m_cpt_fsm_dcache [r_dcache_fsm.read() ] ++; m_cpt_fsm_icache [r_icache_fsm.read() ] ++; m_cpt_fsm_cmd [r_vci_cmd_fsm.read()] ++; m_cpt_fsm_rsp [r_vci_rsp_fsm.read()] ++; m_cpt_fsm_tgt [r_tgt_fsm.read() ] ++; m_cpt_fsm_cleanup [r_cleanup_fsm.read()] ++; #endif m_cpt_total_cycles++; m_debug_cleanup_fsm = m_debug_cleanup_fsm || ((m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok); m_debug_icache_fsm = m_debug_icache_fsm || ((m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok); m_debug_dcache_fsm = m_debug_dcache_fsm || ((m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok); ///////////////////////////////////////////////////////////////////// // The TGT_FSM controls the following ressources: // - r_tgt_fsm // - r_tgt_buf[nwords] // - r_tgt_be[nwords] // - r_tgt_update // - r_tgt_word_min // - r_tgt_word_max // - r_tgt_word_count // - r_tgt_paddr // - r_tgt_srcid // - r_tgt_trdid // - r_tgt_pktid // - r_tgt_icache_req (set) // - r_tgt_dcache_req (set) // // All VCI commands must be CMD_WRITE. // - If the 2 LSB bits of the VCI address are 11, it is a broadcast request. // It is a multicast request otherwise. // - For multicast requests, the ADDRESS[2] bit distinguishes DATA/INS // (0 for data / 1 for instruction), and the ADDRESS[3] bit distinguishes // INVAL/UPDATE (0 for invalidate / 1 for UPDATE). // // For all types of coherence request, the line index (i.e. the Z & Y fields) // is coded on 34 bits, and is contained in the WDATA and BE fields // of the first VCI flit. // - for a multicast invalidate or for a broadcast invalidate request // the VCI packet length is 1 word. // - for an update request the VCI packet length is (n+2) words. // The WDATA field of the second VCI word contains the word index. // The WDATA field of the n following words contains the values. // - for all transaction types, the VCI response is one single word. // In case of errors in the VCI command packet, the simulation // is stopped with an error message. // // This FSM is NOT pipelined : It consumes a new coherence request // on the VCI port only when the previous request is completed. // // The VCI_TGT FSM stores the external request arguments in the // IDLE, UPDT_WORD & UPDT_DATA states. It sets the r_tgt_icache_req // and/or the r_tgt_dcache_req flip-flops to signal the coherence request // to the ICACHE & DCACHE FSMs in the REQ_ICACHE, REQ_DCACHE & REQ_BROADCAST // states. It waits the completion of the coherence request by polling the // r_tgt_*cache_req flip-flops in the RSP_ICACHE, RSP_DCACHE & RSP_BROADCAST // states. These flip-flops are reset by the ICACHE and DCACHE FSMs. // These two FSMs signal if a VCI answer must be send by setting // the r_tgt_icache_rsp and/or the r_tgt_dcache_rsp flip_flops. /////////////////////////////////////////////////////////////////////////////// switch( r_tgt_fsm.read() ) { ////////////// case TGT_IDLE: { if ( p_vci_tgt_c.cmdval.read() ) { paddr_t address = p_vci_tgt_c.address.read(); // command checking if ( p_vci_tgt_c.cmd.read() != vci_param::CMD_WRITE) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the received VCI coherence command is not a write" << std::endl; exit(0); } // address checking if ( ( (address & 0x3) != 0x3 ) && ( not m_segment.contains(address)) ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "out of segment VCI coherence command received" << std::endl; exit(0); } r_tgt_srcid = p_vci_tgt_c.srcid.read(); r_tgt_trdid = p_vci_tgt_c.trdid.read(); r_tgt_pktid = p_vci_tgt_c.pktid.read(); if (sizeof(paddr_t) <= 32) { assert(p_vci_tgt_c.be.read() == 0 && "byte enable should be 0 for 32bits paddr"); r_tgt_paddr = (paddr_t)p_vci_tgt_c.wdata.read() * m_dcache_words * 4; } else { r_tgt_paddr = (paddr_t)(p_vci_tgt_c.be.read() & 0x3) << 32 | (paddr_t)p_vci_tgt_c.wdata.read() * m_dcache_words * 4; } if ( (address&0x3) == 0x3 ) // broadcast invalidate for data or instruction type { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the BROADCAST INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_BROADCAST; #ifdef INSTRUMENTATION m_cpt_cc_broadcast++; #endif } else // multi-update or multi-invalidate for data type { paddr_t cell = address - m_segment.baseAddress(); if (cell == 0) // invalidate data { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_DCACHE; #ifdef INSTRUMENTATION m_cpt_cc_inval_dcache++; #endif } else if (cell == 4) // invalidate instruction { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_ICACHE; #ifdef INSTRUMENTATION m_cpt_cc_inval_icache++; #endif } else if (cell == 8) // update data { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } r_tgt_update = true; r_tgt_update_data = true; r_tgt_fsm = TGT_UPDT_WORD; #ifdef INSTRUMENTATION m_cpt_cc_update_dcache++; #endif } else // update instruction { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } r_tgt_update = true; r_tgt_update_data = false; r_tgt_fsm = TGT_UPDT_WORD; #ifdef INSTRUMENTATION m_cpt_cc_update_icache++; #endif } } // end if multi } // end if cmdval break; } /////////////////// case TGT_UPDT_WORD: // first word index acquisition { if (p_vci_tgt_c.cmdval.read()) { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } for ( size_t i=0 ; i= m_dcache_words) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the reveived MULTI-UPDATE command is wrong" << std::endl; exit(0); } r_tgt_buf[word] = p_vci_tgt_c.wdata.read(); r_tgt_be[word] = p_vci_tgt_c.be.read(); r_tgt_word_count = word + 1; if (p_vci_tgt_c.eop.read()) // last word { r_tgt_word_max = word; if ( r_tgt_update_data.read() ) r_tgt_fsm = TGT_REQ_DCACHE; else r_tgt_fsm = TGT_REQ_ICACHE; } } break; } /////////////////////// case TGT_REQ_BROADCAST: // set requests to DCACHE & ICACHE FSMs { if ( not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() ) { r_tgt_fsm = TGT_RSP_BROADCAST; r_tgt_icache_req = true; r_tgt_dcache_req = true; } break; } ///////////////////// case TGT_REQ_ICACHE: // set request to ICACHE FSM (if no previous request pending) { if ( not r_tgt_icache_req.read() ) { r_tgt_fsm = TGT_RSP_ICACHE; r_tgt_icache_req = true; } break; } //////////////////// case TGT_REQ_DCACHE: // set request to DCACHE FSM (if no previous request pending) { if ( not r_tgt_dcache_req.read() ) { r_tgt_fsm = TGT_RSP_DCACHE; r_tgt_dcache_req = true; } break; } /////////////////////// case TGT_RSP_BROADCAST: // waiting acknowledge from both DCACHE & ICACHE FSMs // no response when r_tgt_*cache_rsp is false { if ( not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() ) // both completed { if ( r_tgt_icache_rsp.read() or r_tgt_dcache_rsp.read() ) // at least one response { if ( p_vci_tgt_c.rspack.read() ) { // reset dcache first if activated if (r_tgt_dcache_rsp) r_tgt_dcache_rsp = false; else r_tgt_icache_rsp = false; } } else { r_tgt_fsm = TGT_IDLE; } } break; } //////////////////// case TGT_RSP_ICACHE: // waiting acknowledge from ICACHE FSM { // no response when r_tgt_icache_rsp is false if ( not r_tgt_icache_req.read() and (p_vci_tgt_c.rspack.read() or not r_tgt_icache_rsp.read())) { r_tgt_fsm = TGT_IDLE; r_tgt_icache_rsp = false; } break; } //////////////////// case TGT_RSP_DCACHE: { // no response when r_tgt_dcache_rsp is false if ( not r_tgt_dcache_req.read() and (p_vci_tgt_c.rspack.read() or not r_tgt_dcache_rsp.read())) { r_tgt_fsm = TGT_IDLE; r_tgt_dcache_rsp = false; } break; } } // end switch TGT_FSM ///////////////////////////////////////////////////////////////////// // Get data and instruction requests from processor /////////////////////////////////////////////////////////////////////// r_iss.getRequests(m_ireq, m_dreq); //////////////////////////////////////////////////////////////////////////////////// // ICACHE_FSM // // There is 9 conditions to exit the IDLE state: // One condition is a coherence request from TGT FSM : // - Coherence operation => ICACHE_CC_CHEK // Five configurations corresponding to XTN processor requests sent by DCACHE FSM : // - Flush TLB => ICACHE_XTN_TLB_FLUSH // - Flush cache => ICACHE_XTN_CACHE_FLUSH // - Invalidate a TLB entry => ICACHE_XTN_TLB_INVAL // - Invalidate a cache line => ICACHE_XTN_CACHE_INVAL_VA@ // - Invalidate a cache line using physical address => ICACHE_XTN_CACHE_INVAL_PA // three configurations corresponding to instruction processor requests : // - tlb miss => ICACHE_TLB_WAIT // - cacheable read miss => ICACHE_MISS_VICTIM // - uncacheable read miss => ICACHE_UNC_REQ // // In case of cache miss, the ICACHE FSM request a VCI transaction to CMD FSM // using the r_icache_tlb_miss_req flip-flop, that reset this flip-flop when the // transaction starts. Then the ICACHE FSM goes to the ICACHE_MISS VICTIM // state to select a slot and request a VCI transaction to the CLEANUP FSM. // It goes next to the ICACHE_MISS_WAIT state waiting a response from RSP FSM. // The availability of the missing cache line is signaled by the response fifo, // and the cache update is done (one word per cycle) in the ICACHE_MISS_UPDT state. // // In case of uncacheable address, the ICACHE FSM request an uncached VCI transaction // to CMD FSM using the r_icache_unc_req flip-flop, that reset this flip-flop // when the transaction starts. The ICACHE FSM goes to ICACHE_UNC_WAIT to wait // the response from the RSP FSM, through the response fifo. The missing instruction // is directly returned to processor in this state. // // In case of tlb miss, the ICACHE FSM request to the DCACHE FSM to update the tlb // using the r_icache_tlb_miss_req flip-flop and the r_icache_tlb_miss_vaddr register, // and goes to the ICACHE_TLB_WAIT state. // The tlb update is entirely done by the DCACHE FSM (who becomes the owner of dtlb until // the update is completed, and reset r_icache_tlb_miss_req to signal the completion. // // The DCACHE FSM signals XTN processor requests to ICACHE_FSM // using the r_dcache_xtn_req flip-flop. // The request opcode and the address to be invalidated are transmitted // in the r_dcache_xtn_opcode and r_dcache_p0_wdata registers respectively. // The r_dcache_xtn_req flip-flop is reset by the ICACHE_FSM when the operation // is completed. // // The r_vci_rsp_ins_error flip-flop is set by the RSP FSM in case of bus error // in a cache miss or uncacheable read VCI transaction. Nothing is written // in the response fifo. This flip-flop is reset by the ICACHE-FSM. //////////////////////////////////////////////////////////////////////////////////////// // default value for m_irsp m_irsp.valid = false; m_irsp.error = false; m_irsp.instruction = 0; switch( r_icache_fsm.read() ) { ///////////////// case ICACHE_IDLE: // In this state, we handle processor requests, XTN requests sent // by DCACHE FSM, and coherence requests with a fixed priority: // coherence > XTN > instruction // We access the itlb and dcache in parallel with the virtual address // for itlb, and with a speculative physical address for icache, // computed during the previous cycle. { // coherence request from the target FSM if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } // Decoding processor XTN requests sent by DCACHE FSM // These request are not executed in this IDLE state, because // they require access to icache or itlb, that are already accessed if ( r_dcache_xtn_req.read() ) { if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_PTPR ) { r_icache_fsm = ICACHE_XTN_TLB_FLUSH; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ICACHE_FLUSH) { r_icache_flush_count = 0; r_icache_fsm = ICACHE_XTN_CACHE_FLUSH; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ITLB_INVAL) { r_icache_fsm = ICACHE_XTN_TLB_INVAL; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ICACHE_INVAL) { r_icache_fsm = ICACHE_XTN_CACHE_INVAL_VA; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_MMU_ICACHE_PA_INV) { if (sizeof(paddr_t) <= 32) { assert(r_mmu_word_hi.read() == 0 && "high bits should be 0 for 32bit paddr"); r_icache_vci_paddr = (paddr_t)r_mmu_word_lo.read(); } else { r_icache_vci_paddr = (paddr_t)r_mmu_word_hi.read() << 32 | (paddr_t)r_mmu_word_lo.read(); } r_icache_fsm = ICACHE_XTN_CACHE_INVAL_PA; break; } } // end if xtn_req // processor request if ( m_ireq.valid ) { bool cacheable; paddr_t paddr; // We register processor request r_icache_vaddr_save = m_ireq.addr; // speculative icache access (if cache activated) // we use the speculative PPN computed during the previous cycle uint32_t cache_inst = 0; bool cache_hit = false; if ( r_mmu_mode.read() & INS_CACHE_MASK ) { paddr_t spc_paddr = (r_icache_vci_paddr.read() & ~PAGE_K_MASK) | ((paddr_t)m_ireq.addr & PAGE_K_MASK); #ifdef INSTRUMENTATION m_cpt_icache_data_read++; m_cpt_icache_dir_read++; #endif cache_hit = r_icache.read( spc_paddr, &cache_inst ); } // systematic itlb access (if tlb activated) // we use the virtual address paddr_t tlb_paddr; pte_info_t tlb_flags; size_t tlb_way; size_t tlb_set; paddr_t tlb_nline; bool tlb_hit = false;; if ( r_mmu_mode.read() & INS_TLB_MASK ) { #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif tlb_hit = r_itlb.translate( m_ireq.addr, &tlb_paddr, &tlb_flags, &tlb_nline, // unused &tlb_way, // unused &tlb_set ); // unused } // We compute cacheability, physical address and check access rights: // - If MMU activated : cacheability is defined by the C bit in the PTE, // the physical address is obtained from the TLB, and the access rights are // defined by the U and X bits in the PTE. // - If MMU not activated : cacheability is defined by the segment table, // the physical address is equal to the virtual address (identity mapping) // and there is no access rights checking if ( not (r_mmu_mode.read() & INS_TLB_MASK) ) // tlb not activated: { // cacheability if ( not (r_mmu_mode.read() & INS_CACHE_MASK) ) cacheable = false; else cacheable = m_cacheability_table[m_ireq.addr]; // physical address paddr = (paddr_t)m_ireq.addr; } else // itlb activated { if ( tlb_hit ) // tlb hit { // cacheability if ( not (r_mmu_mode.read() & INS_CACHE_MASK) ) cacheable = false; else cacheable = tlb_flags.c; // physical address paddr = tlb_paddr; // access rights checking if ( not tlb_flags.u && (m_ireq.mode == iss_t::MODE_USER) ) { r_mmu_ietr = MMU_READ_PRIVILEGE_VIOLATION; r_mmu_ibvar = m_ireq.addr; m_irsp.valid = true; m_irsp.error = true; m_irsp.instruction = 0; break; } else if ( not tlb_flags.x ) { r_mmu_ietr = MMU_READ_EXEC_VIOLATION; r_mmu_ibvar = m_ireq.addr; m_irsp.valid = true; m_irsp.error = true; m_irsp.instruction = 0; break; } } // in case of TLB miss we send an itlb miss request to DCACHE FSM and break else { #ifdef INSTRUMENTATION m_cpt_itlb_miss++; #endif r_icache_fsm = ICACHE_TLB_WAIT; r_icache_tlb_miss_req = true; break; } } // end if itlb activated // physical address registration (for next cycle) r_icache_vci_paddr = paddr; // We enter this section only in case of TLB hit: // Finally, we get the instruction depending on cacheability, // we send the response to processor, and compute next state if ( cacheable ) // cacheable read { if ( (r_icache_vci_paddr.read() & ~PAGE_K_MASK) != (paddr & ~PAGE_K_MASK) ) // speculative access KO { #ifdef INSTRUMENTATION m_cpt_icache_spc_miss++; #endif // we return an invalid response and stay in IDLE state // the cache access will cost one extra cycle. break; } if ( not cache_hit ) // cache miss { #ifdef INSTRUMENTATION m_cpt_icache_miss++; #endif r_icache_fsm = ICACHE_MISS_VICTIM; r_icache_miss_req = true; } else // cache hit { #ifdef INSTRUMENTATION m_cpt_ins_read++; #endif m_irsp.valid = true; m_irsp.instruction = cache_inst; } } else // non cacheable read { r_icache_unc_req = true; r_icache_fsm = ICACHE_UNC_WAIT; } } // end if m_ireq.valid break; } ///////////////////// case ICACHE_TLB_WAIT: // Waiting the itlb update by the DCACHE FSM after a tlb miss // the itlb is udated by the DCACHE FSM, as well as the // r_mmu_ietr and r_mmu_ibvar registers in case of error. // the itlb is not accessed by ICACHE FSM until DCACHE FSM // reset the r_icache_tlb_miss_req flip-flop // external coherence request are accepted in this state. { // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( m_ireq.valid ) m_cost_ins_tlb_miss_frz++; // DCACHE FSM signals response by reseting the request flip-flop if ( not r_icache_tlb_miss_req.read() ) { if ( r_icache_tlb_rsp_error.read() ) // error reported : tlb not updated { r_icache_tlb_rsp_error = false; m_irsp.error = true; m_irsp.valid = true; r_icache_fsm = ICACHE_IDLE; } else // tlb updated : return to IDLE state { r_icache_fsm = ICACHE_IDLE; } } break; } ////////////////////////// case ICACHE_XTN_TLB_FLUSH: // invalidate in one cycle all non global TLB entries { r_itlb.flush(); r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; break; } //////////////////////////// case ICACHE_XTN_CACHE_FLUSH: // Invalidate sequencially all cache lines using // the r_icache_flush_count register as a slot counter. // We loop in this state until all slots have been visited. // A cleanup request is generated for each valid line // and we are blocked until the previous cleanup is completed { if ( not r_icache_cleanup_req.read() ) { size_t way = r_icache_flush_count.read()/m_icache_sets; size_t set = r_icache_flush_count.read()%m_icache_sets; paddr_t nline; bool cleanup_req = r_icache.inval( way, set, &nline ); if ( cleanup_req ) { r_icache_cleanup_req = true; r_icache_cleanup_line = nline; } r_icache_flush_count = r_icache_flush_count.read() + 1; if ( r_icache_flush_count.read() == (m_icache_sets*m_icache_ways - 1) ) { r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } } break; } ////////////////////////// case ICACHE_XTN_TLB_INVAL: // invalidate one TLB entry selected by the virtual address // stored in the r_dcache_p0_wdata register { r_itlb.inval(r_dcache_p0_wdata.read()); r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_VA: // Selective cache line invalidate with virtual address // requires 3 cycles (in case of hit on itlb and icache). // In this state, we access TLB to translate virtual address // stored in the r_dcache_p0_wdata register. { paddr_t paddr; bool hit; // read physical address in TLB when MMU activated if ( r_mmu_mode.read() & INS_TLB_MASK ) // itlb activated { #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif hit = r_itlb.translate(r_dcache_p0_wdata.read(), &paddr); } else // itlb not activated { paddr = (paddr_t)r_dcache_p0_wdata.read(); hit = true; } if ( hit ) // continue the selective inval process { r_icache_vci_paddr = paddr; r_icache_fsm = ICACHE_XTN_CACHE_INVAL_PA; } else // miss : send a request to DCACHE FSM { #ifdef INSTRUMENTATION m_cpt_itlb_miss++; #endif r_icache_tlb_miss_req = true; r_icache_vaddr_save = r_dcache_p0_wdata.read(); r_icache_fsm = ICACHE_TLB_WAIT; } break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_PA: // selective invalidate cache line with physical address // require 2 cycles. In this state, we read dcache, // with address stored in r_icache_vci_paddr register. { uint32_t data; size_t way; size_t set; size_t word; bool hit = r_icache.read(r_icache_vci_paddr.read(), &data, &way, &set, &word); if ( hit ) // inval to be done { r_icache_miss_way = way; r_icache_miss_set = set; r_icache_fsm = ICACHE_XTN_CACHE_INVAL_GO; } else // miss : acknowlege the XTN request and return { r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_GO: // In this state, we invalidate the cache line & cleanup. // We are blocked if the previous cleanup is not completed { paddr_t nline; if ( not r_icache_cleanup_req.read() ) { bool hit; hit = r_icache.inval( r_icache_miss_way.read(), r_icache_miss_set.read(), &nline ); assert(hit && "XTN_ICACHE_INVAL way/set should still be in icache"); // request cleanup r_icache_cleanup_req = true; r_icache_cleanup_line = nline; // acknowledge the XTN request and return r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } break; } //////////////////////// case ICACHE_MISS_VICTIM: // Selects a victim line // Set the r_icache_cleanup_req flip-flop // when the selected slot is not empty { m_cost_ins_miss_frz++; size_t index; // unused bool hit = r_cleanup_buffer.hit( r_icache_vci_paddr.read()>>(uint32_log2(m_icache_words)+2), &index ); if ( not hit and not r_icache_cleanup_req.read() ) { bool valid; size_t way; size_t set; paddr_t victim; valid = r_icache.victim_select(r_icache_vci_paddr.read(), &victim, &way, &set); r_icache_miss_way = way; r_icache_miss_set = set; if ( valid ) { r_icache_cleanup_req = true; r_icache_cleanup_line = victim; r_icache_fsm = ICACHE_MISS_INVAL; } else { r_icache_fsm = ICACHE_MISS_WAIT; } } break; } /////////////////////// case ICACHE_MISS_INVAL: // invalidate the victim line { paddr_t nline; bool hit; hit = r_icache.inval( r_icache_miss_way.read(), r_icache_miss_set.read(), &nline ); // unused assert(hit && "selected way/set line should be in icache"); r_icache_fsm = ICACHE_MISS_WAIT; break; } ////////////////////// case ICACHE_MISS_WAIT: // waiting a response to a miss request from VCI_RSP FSM { if ( m_ireq.valid ) m_cost_ins_miss_frz++; // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( r_vci_rsp_ins_error.read() ) // bus error { r_mmu_ietr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_ibvar = r_icache_vaddr_save.read(); m_irsp.valid = true; m_irsp.error = true; r_vci_rsp_ins_error = false; r_icache_fsm = ICACHE_IDLE; } else if ( r_vci_rsp_fifo_icache.rok() ) // response available { r_icache_miss_word = 0; r_icache_fsm = ICACHE_MISS_UPDT; } break; } ////////////////////// case ICACHE_MISS_UPDT: // update the cache (one word per cycle) { if ( m_ireq.valid ) m_cost_ins_miss_frz++; if ( r_vci_rsp_fifo_icache.rok() ) // response available { if ( r_icache_miss_inval ) // Matching coherence request // We pop the response FIFO, without updating the cache // We send a cleanup for the missing line at the last word // Blocked if the previous cleanup is not completed { if ( r_icache_miss_word.read() < m_icache_words-1 ) // not the last word { vci_rsp_fifo_icache_get = true; r_icache_miss_word = r_icache_miss_word.read() + 1; } else // last word { if ( not r_icache_cleanup_req.read() ) // no pending cleanup { vci_rsp_fifo_icache_get = true; r_icache_cleanup_req = true; r_icache_cleanup_line = r_icache_vci_paddr.read() >> (uint32_log2(m_icache_words<<2)); r_icache_miss_inval = false; r_icache_fsm = ICACHE_IDLE; } } } else // No matching coherence request // We pop the FIFO and update the cache // We update the directory at the last word { #ifdef INSTRUMENTATION m_cpt_icache_data_write++; #endif r_icache.write( r_icache_miss_way.read(), r_icache_miss_set.read(), r_icache_miss_word.read(), r_vci_rsp_fifo_icache.read() ); vci_rsp_fifo_icache_get = true; r_icache_miss_word = r_icache_miss_word.read() + 1; if ( r_icache_miss_word.read() == m_icache_words-1 ) // last word { #ifdef INSTRUMENTATION m_cpt_icache_dir_write++; #endif r_icache.victim_update_tag( r_icache_vci_paddr.read(), r_icache_miss_way.read(), r_icache_miss_set.read() ); r_icache_fsm = ICACHE_IDLE; } } } break; } //////////////////// case ICACHE_UNC_WAIT: // waiting a response to an uncacheable read from VCI_RSP FSM // { // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( r_vci_rsp_ins_error.read() ) // bus error { r_mmu_ietr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_ibvar = m_ireq.addr; r_vci_rsp_ins_error = false; m_irsp.valid = true; m_irsp.error = true; r_icache_fsm = ICACHE_IDLE; } else if (r_vci_rsp_fifo_icache.rok() ) // instruction available { vci_rsp_fifo_icache_get = true; r_icache_fsm = ICACHE_IDLE; if ( m_ireq.valid and (m_ireq.addr == r_icache_vaddr_save.read()) ) // request not modified { m_irsp.valid = true; m_irsp.instruction = r_vci_rsp_fifo_icache.read(); } } break; } ///////////////////// case ICACHE_CC_CHECK: // This state is the entry point of a sub-fsm // handling coherence requests. // the return state is defined in r_icache_fsm_save. { paddr_t paddr = r_tgt_paddr.read(); paddr_t mask = ~((m_icache_words<<2)-1); if( (r_icache_fsm_save.read() == ICACHE_MISS_WAIT) and ((r_icache_vci_paddr.read() & mask) == (paddr & mask))) // matching a pending miss { r_icache_miss_inval = true; // signaling the matching r_tgt_icache_req = false; // coherence request completed r_tgt_icache_rsp = r_tgt_update.read(); // response required if update r_icache_fsm = r_icache_fsm_save.read(); } else // no match { #ifdef INSTRUMENTATION m_cpt_icache_dir_read++; #endif uint32_t inst; size_t way; size_t set; size_t word; bool hit = r_icache.read(paddr, &inst, &way, &set, &word); r_icache_cc_way = way; r_icache_cc_set = set; if ( hit and r_tgt_update.read() ) // hit update { r_icache_fsm = ICACHE_CC_UPDT; r_icache_cc_word = r_tgt_word_min.read(); } else if ( hit and not r_tgt_update.read() ) // hit inval { r_icache_fsm = ICACHE_CC_INVAL; } else // miss can happen { r_tgt_icache_req = false; r_tgt_icache_rsp = r_tgt_update.read(); r_icache_fsm = r_icache_fsm_save.read(); } } break; } ///////////////////// case ICACHE_CC_INVAL: // invalidate a cache line { paddr_t nline; bool hit; hit = r_icache.inval( r_icache_cc_way.read(), r_icache_cc_set.read(), &nline ); assert (hit && "ICACHE_CC_INVAL way/set should still be in icache"); r_tgt_icache_req = false; r_tgt_icache_rsp = true; r_icache_fsm = r_icache_fsm_save.read(); break; } //////////////////// case ICACHE_CC_UPDT: // write one word per cycle (from word_min to word_max) { size_t word = r_icache_cc_word.read(); size_t way = r_icache_cc_way.read(); size_t set = r_icache_cc_set.read(); r_icache.write( way, set, word, r_tgt_buf[word], r_tgt_be[word] ); r_icache_cc_word = word+1; if ( word == r_tgt_word_max.read() ) // last word { r_tgt_icache_req = false; r_tgt_icache_rsp = true; r_icache_fsm = r_icache_fsm_save.read(); } break; } } // end switch r_icache_fsm //////////////////////////////////////////////////////////////////////////////////// // DCACHE FSM // // Both the Cacheability Table, and the MMU cacheable bit are used to define // the cacheability, depending on the MMU mode. // // 1/ Coherence requests : // There is a coherence request when the tgt_dcache_req flip-flop is set, // requesting a line invalidation or a line update. // Coherence requests are taken into account in IDLE, UNC_WAIT, MISS_WAIT states. // The actions associated to the pre-empted state are not executed, the DCACHE FSM // goes to the CC_CHECK state to execute the requested action, and returns to the // pre-empted state. // // 2/ TLB miss // The page tables can be cacheable. // In case of miss in itlb or dtlb, the tlb miss is handled by a dedicated // sub-fsm (DCACHE_TLB_MISS state), that handle possible miss in DCACHE, // this sub-fsm implement the table-walk... // // 3/ processor requests : // Processor READ, WRITE, LL or SC requests are taken in IDLE state only. // The IDLE state implements a three stages pipe-line to handle write bursts: // - The physical address is computed by dtlb in stage P0. // - The registration in wbuf and the dcache hit are computed in stage P1. // - The dcache update is done in stage P2. // WRITE or SC requests can require a PTE Dirty bit update (in memory), // that is done (before handling the processor request) by a dedicated sub-fsm // (DCACHE_DIRTY_TLB_SET state). // If a PTE is modified, both the itlb and dtlb are selectively, but sequencially // cleared by a dedicated sub_fsm (DCACHE_INVAL_TLB_SCAN state). // If there is no write in the pipe, dcache and dtlb are accessed in parallel, // (virtual address for itlb, and speculative physical address computed during // previous cycle for dcache) in order to return the data in one cycle for a READ // request. We just pay an extra cycle when the speculative access is failing. // // 4/ Atomic instructions LL/SC // The LL/SC address can be cacheable or non cacheable. // The llsc_local_table holds a registration for an active LL/SC // operation (with an address, a registration key, an aging counter and a // valid bit). // - LL requests from the processor are transmitted as a one flit VCI // CMD_LOCKED_READ transaction with TYPE_LL as PKTID value. PLEN must // be 8 as the response is 2 flits long (data and registration key) // - SC requests from the processor are systematically transmitted to the // memory cache as 2 flits VCI CMD_NOP (or CMD_STORE_COND) // transactions, with TYPE_SC as PKTID value (the first flit contains // the registration key, the second flit contains the data to write in // case of success). // The cache is not updated, as this is done in case of success by the // coherence transaction. // // 5/ Non cacheable access: // This component implement a strong order between non cacheable access // (read or write) : A new non cacheable VCI transaction starts only when // the previous non cacheable transaction is completed. Both cacheable and // non cacheable transactions use the write buffer, but the DCACHE FSM registers // a non cacheable write transaction posted in the write buffer by setting the // r_dcache_pending_unc_write flip_flop. All other non cacheable requests // are stalled until this flip-flop is reset by the VCI_RSP_FSM (when the // pending non cacheable write transaction completes). // // 6/ Error handling: // When the MMU is not activated, Read Bus Errors are synchronous events, // but Write Bus Errors are asynchronous events (processor is not frozen). // - If a Read Bus Error is detected, the VCI_RSP FSM sets the // r_vci_rsp_data_error flip-flop, without writing any data in the // r_vci_rsp_fifo_dcache FIFO, and the synchronous error is signaled // by the DCACHE FSM. // - If a Write Bus Error is detected, the VCI_RSP FSM signals // the asynchronous error using the setWriteBerr() method. // When the MMU is activated bus error are rare events, as the MMU // checks the physical address before the VCI transaction starts. //////////////////////////////////////////////////////////////////////////////////////// // default value for m_drsp m_drsp.valid = false; m_drsp.error = false; m_drsp.rdata = 0; // keep track of the local llsc table access and perform a NOP access if // necessary at the end of the DCACHE transition function bool llsc_local_table_access_done = false; switch ( r_dcache_fsm.read() ) { case DCACHE_IDLE: // There are 9 conditions to exit the IDLE state : // 1) Dirty bit update (processor) => DCACHE_DIRTY_GET_PTE // 2) Coherence request (TGT FSM) => DCACHE_CC_CHECK // 3) ITLB miss request (ICACHE FSM) => DCACHE_TLB_MISS // 4) XTN request (processor) => DCACHE_XTN_* // 5) DTLB miss (processor) => DCACHE_TLB_MISS // 6) Cacheable read miss (processor) => DCACHE_MISS_VICTIM // 7) Uncacheable read (processor) => DCACHE_UNC_WAIT // 8) LL access (processor) => DCACHE_LL_WAIT // 9) SC access (processor) => DCACHE_SC_WAIT // // The dtlb is unconditionally accessed to translate the // virtual adress from processor. // // There is 4 configurations to access the cache, // depending on the pipe-line state, defined // by the r_dcache_p0_valid (V0) flip-flop : P1 stage activated // and r_dcache_p1_valid (V1) flip-flop : P2 stage activated // V0 / V1 / Data / Directory / comment // 0 / 0 / read(A0) / read(A0) / read speculative access // 0 / 1 / write(A2) / nop / read request delayed // 1 / 0 / nop / read(A1) / read request delayed // 1 / 1 / write(A2) / read(A1) / read request delayed { bool tlb_inval_required = false; bool write_pipe_frozen = false; //////////////////////////////////////////////////////////////////////////////// // Handling P2 pipe-line stage // Inputs are r_dcache_p1_* registers. // If r_dcache_p1_valid is true, we update the local copy in dcache. // If the modified cache line has copies in TLBs, we launch a TLB invalidate // operation, going to DCACHE_INVAL_TLB_SCAN state. if ( r_dcache_p1_valid.read() ) // P2 stage activated { size_t way = r_dcache_p1_cache_way.read(); size_t set = r_dcache_p1_cache_set.read(); size_t word = r_dcache_p1_cache_word.read(); uint32_t wdata = r_dcache_p1_wdata.read(); vci_be_t be = r_dcache_p1_be.read(); r_dcache.write( way, set, word, wdata, be ); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif // cache update after a WRITE hit can require itlb & dtlb inval or flush if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) { tlb_inval_required = true; r_dcache_tlb_inval_count = 0; r_dcache_tlb_inval_line = r_dcache_p1_paddr.read()>> (uint32_log2(m_dcache_words<<2)); r_dcache_in_tlb[way*m_dcache_sets+set] = false; } else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) { r_itlb.reset(); r_dtlb.reset(); r_dcache_contains_ptd[way*m_dcache_sets+set] = false; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Cache update in P2 stage" << std::dec << " / WAY = " << way << " / SET = " << set << " / WORD = " << word << std::hex << " / DATA = " << wdata << " / BE = " << be << std::endl; } #endif } // end P2 stage /////////////////////////////////////////////////////////////////////////// // Handling P1 pipe-line stage // Inputs are r_dcache_p0_* registers. // We must write into wbuf and test the hit in dcache. // If the write request is non cacheable, and there is a pending // non cacheable write, or if the write buffer is full, the P0 and P1 stages // are frozen until the write request registration is possible, // while the P2 stage is not frozen. // The r_dcache_p1_valid bit must be computed at all cycles, and // the P2 stage must be activated if there is a local copy in dcache. if ( r_dcache_p0_valid.read() ) // P1 stage activated { // frozen if write not cacheable, and previous non cacheable write registered if ( not r_dcache_p0_cacheable.read() and r_dcache_pending_unc_write.read() ) { r_dcache_p1_valid = false; write_pipe_frozen = true; } else // try a registration into write buffer { bool wok = r_wbuf.write( r_dcache_p0_paddr.read(), r_dcache_p0_be.read(), r_dcache_p0_wdata.read(), r_dcache_p0_cacheable.read() ); #ifdef INSTRUMENTATION m_cpt_wbuf_write++; #endif if ( not wok ) // frozen if write buffer full { r_dcache_p1_valid = false; write_pipe_frozen = true; } else // update the write_buffer state extension { if(not r_dcache_pending_unc_write.read()) r_dcache_pending_unc_write = not r_dcache_p0_cacheable.read(); // read directory to check local copy size_t cache_way; size_t cache_set; size_t cache_word; bool local_copy; if ( r_mmu_mode.read() & DATA_CACHE_MASK) // cache activated { local_copy = r_dcache.hit( r_dcache_p0_paddr.read(), &cache_way, &cache_set, &cache_word ); #ifdef INSTRUMENTATION m_cpt_dcache_dir_read++; #endif } else { local_copy = false; } // store values for P2 pipe stage if ( local_copy ) { r_dcache_p1_valid = true; r_dcache_p1_wdata = r_dcache_p0_wdata.read(); r_dcache_p1_be = r_dcache_p0_be.read(); r_dcache_p1_paddr = r_dcache_p0_paddr.read(); r_dcache_p1_cache_way = cache_way; r_dcache_p1_cache_set = cache_set; r_dcache_p1_cache_word = cache_word; } else { r_dcache_p1_valid = false; } } } } else // P1 stage not activated { r_dcache_p1_valid = false; } // end P1 stage ///////////////////////////////////////////////////////////////////////////////// // handling P0 pipe-line stage // This stage is controlling r_dcache_fsm and r_dcache_p0_* registers. // The r_dcache_p0_valid flip-flop is only set in case of a WRITE request. // - the TLB invalidate requests have the highest priority, // - then the external coherence requests, // - then the itlb miss requests, // - and finally the processor requests. // If dtlb is activated, there is an unconditionnal access to dtlb, // for address translation. // 1) A processor WRITE request is blocked if the Dirty bit mus be set, or if // dtlb miss. If dtlb is OK, It enters the three stage pipe-line (fully // handled by the IDLE state), and the processor request is acknowledged. // 2) A processor READ request generate a simultaneouss access to // both dcache data and dcache directoty, using speculative PPN, but // is delayed if the write pipe-line is not empty. // In case of miss, we wait the VCI response in DCACHE_UNC_WAIT or // DCACHE_MISS_WAIT states. // 3) A processor LL request generate a VCI LL transaction. We wait for // the response in DCACHE_LL_WAIT state. // 4) A processor SC request is delayed until the write pipe-line is empty. // A VCI SC transaction is launched, and we wait the VCI response in // DCACHE_SC_WAIT state. It can be completed by a "long write" if the // PTE dirty bit must be updated in dtlb, dcache, and RAM. // The data is not modified in dcache, as it will be done by the // coherence transaction. // TLB inval required after a write hit if ( tlb_inval_required ) { r_dcache_fsm_scan_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; r_dcache_p0_valid = r_dcache_p0_valid.read() and write_pipe_frozen; } // external coherence request else if ( r_tgt_dcache_req.read() ) { r_dcache_fsm_cc_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_p0_valid = r_dcache_p0_valid.read() and write_pipe_frozen; } // processor request else if ( m_dreq.valid and not write_pipe_frozen ) { // dcache access using speculative PPN only if pipe-line empty paddr_t cache_paddr; size_t cache_way; size_t cache_set; size_t cache_word; uint32_t cache_rdata; bool cache_hit; if ( (r_mmu_mode.read() & DATA_CACHE_MASK) and // cache activated not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) // pipe-line empty { cache_paddr = (r_dcache_p0_paddr.read() & ~PAGE_K_MASK) | ((paddr_t)m_dreq.addr & PAGE_K_MASK); cache_hit = r_dcache.read( cache_paddr, &cache_rdata, &cache_way, &cache_set, &cache_word ); #ifdef INSTRUMENTATION m_cpt_dcache_dir_read++; m_cpt_dcache_data_read++; #endif } else { cache_hit = false; } // end dcache access // systematic dtlb access using virtual address paddr_t tlb_paddr; pte_info_t tlb_flags; size_t tlb_way; size_t tlb_set; paddr_t tlb_nline; bool tlb_hit; if ( r_mmu_mode.read() & DATA_TLB_MASK ) // DTLB activated { tlb_hit = r_dtlb.translate( m_dreq.addr, &tlb_paddr, &tlb_flags, &tlb_nline, &tlb_way, &tlb_set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif } else { tlb_hit = false; } // end dtlb access // register the processor request r_dcache_p0_vaddr = m_dreq.addr; r_dcache_p0_be = m_dreq.be; r_dcache_p0_wdata = m_dreq.wdata; // Handling READ XTN requests from processor // They are executed in this DCACHE_IDLE state. // The processor must not be in user mode if (m_dreq.type == iss_t::XTN_READ) { int xtn_opcode = (int)m_dreq.addr/4; // checking processor mode: if (m_dreq.mode == iss_t::MODE_USER) { r_mmu_detr = MMU_READ_PRIVILEGE_VIOLATION; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; r_dcache_fsm = DCACHE_IDLE; } else { switch( xtn_opcode ) { case iss_t::XTN_INS_ERROR_TYPE: m_drsp.rdata = r_mmu_ietr.read(); m_drsp.valid = true; break; case iss_t::XTN_DATA_ERROR_TYPE: m_drsp.rdata = r_mmu_detr.read(); m_drsp.valid = true; break; case iss_t::XTN_INS_BAD_VADDR: m_drsp.rdata = r_mmu_ibvar.read(); m_drsp.valid = true; break; case iss_t::XTN_DATA_BAD_VADDR: m_drsp.rdata = r_mmu_dbvar.read(); m_drsp.valid = true; break; case iss_t::XTN_PTPR: m_drsp.rdata = r_mmu_ptpr.read(); m_drsp.valid = true; break; case iss_t::XTN_TLB_MODE: m_drsp.rdata = r_mmu_mode.read(); m_drsp.valid = true; break; case iss_t::XTN_MMU_PARAMS: m_drsp.rdata = r_mmu_params; m_drsp.valid = true; break; case iss_t::XTN_MMU_RELEASE: m_drsp.rdata = r_mmu_release; m_drsp.valid = true; break; case iss_t::XTN_MMU_WORD_LO: m_drsp.rdata = r_mmu_word_lo.read(); m_drsp.valid = true; break; case iss_t::XTN_MMU_WORD_HI: m_drsp.rdata = r_mmu_word_hi.read(); m_drsp.valid = true; break; default: r_mmu_detr = MMU_READ_UNDEFINED_XTN; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; break; } // end switch xtn_opcode } // end else r_dcache_p0_valid = false; } // end if XTN_READ // Handling WRITE XTN requests from processor. // They are not executed in this DCACHE_IDLE state, // if they require access to the caches or the TLBs // that are already accessed for speculative read. // Caches can be invalidated or flushed in user mode, // and the sync instruction can be executed in user mode else if (m_dreq.type == iss_t::XTN_WRITE) { int xtn_opcode = (int)m_dreq.addr/4; r_dcache_xtn_opcode = xtn_opcode; // checking processor mode: if ( (m_dreq.mode == iss_t::MODE_USER) && (xtn_opcode != iss_t:: XTN_SYNC) && (xtn_opcode != iss_t::XTN_DCACHE_INVAL) && (xtn_opcode != iss_t::XTN_DCACHE_FLUSH) && (xtn_opcode != iss_t::XTN_ICACHE_INVAL) && (xtn_opcode != iss_t::XTN_ICACHE_FLUSH) ) { r_mmu_detr = MMU_WRITE_PRIVILEGE_VIOLATION; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; r_dcache_fsm = DCACHE_IDLE; } else { switch( xtn_opcode ) { case iss_t::XTN_PTPR: // itlb & dtlb must be flushed r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_SWITCH; break; case iss_t::XTN_TLB_MODE: // no cache or tlb access r_mmu_mode = m_dreq.wdata; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_DTLB_INVAL: // dtlb access r_dcache_fsm = DCACHE_XTN_DT_INVAL; break; case iss_t::XTN_ITLB_INVAL: // itlb access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IT_INVAL; break; case iss_t::XTN_DCACHE_INVAL: // dcache, dtlb & itlb access r_dcache_fsm = DCACHE_XTN_DC_INVAL_VA; break; case iss_t::XTN_MMU_DCACHE_PA_INV: // dcache, dtlb & itlb access r_dcache_fsm = DCACHE_XTN_DC_INVAL_PA; if (sizeof(paddr_t) <= 32) { assert(r_mmu_word_hi.read() == 0 && "high bits should be 0 for 32bit paddr"); r_dcache_p0_paddr = (paddr_t)r_mmu_word_lo.read(); } else { r_dcache_p0_paddr = (paddr_t)r_mmu_word_hi.read() << 32 | (paddr_t)r_mmu_word_lo.read(); } break; case iss_t::XTN_DCACHE_FLUSH: // itlb and dtlb must be reset r_dcache_flush_count = 0; r_dcache_fsm = DCACHE_XTN_DC_FLUSH; break; case iss_t::XTN_ICACHE_INVAL: // icache and itlb access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_INVAL_VA; break; case iss_t::XTN_MMU_ICACHE_PA_INV: // icache access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_INVAL_PA; break; case iss_t::XTN_ICACHE_FLUSH: // icache access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_FLUSH; break; case iss_t::XTN_SYNC: // wait until write buffer empty r_dcache_fsm = DCACHE_XTN_SYNC; break; case iss_t::XTN_MMU_WORD_LO: // no cache or tlb access r_mmu_word_lo = m_dreq.wdata; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_MMU_WORD_HI: // no cache or tlb access r_mmu_word_hi = m_dreq.wdata; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_ICACHE_PREFETCH: // not implemented : no action case iss_t::XTN_DCACHE_PREFETCH: // not implemented : no action m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_DEBUG_MASK: // debug mask m_debug_dcache_fsm = ((m_dreq.wdata & 0x1) != 0); m_debug_icache_fsm = ((m_dreq.wdata & 0x2) != 0); m_debug_cleanup_fsm = ((m_dreq.wdata & 0x4) != 0); m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; default: r_mmu_detr = MMU_WRITE_UNDEFINED_XTN; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } // end switch xtn_opcode } // end else r_dcache_p0_valid = false; } // end if XTN_WRITE // Handling read/write/ll/sc processor requests. // The dtlb and dcache can be activated or not. // We compute the physical address, the cacheability, and check processor request. // - If DTLB not activated : cacheability is defined by the segment table, // the physical address is equal to the virtual address (identity mapping) // - If DTLB activated : cacheability is defined by the C bit in the PTE, // the physical address is obtained from the TLB, and the U & W bits // of the PTE are checked. // The processor request is decoded only if the TLB is not activated or if // the virtual address hits in tLB and access rights are OK. // We call the TLB_MISS sub-fsm in case of dtlb miss. else { bool valid_req = false; bool cacheable = false; paddr_t paddr = 0; if ( not (r_mmu_mode.read() & DATA_TLB_MASK) ) // dtlb not activated { valid_req = true; // cacheability if ( not (r_mmu_mode.read() & DATA_CACHE_MASK) ) cacheable = false; else cacheable = m_cacheability_table[m_dreq.addr]; // physical address paddr = (paddr_t)m_dreq.addr; } else // dtlb activated { if ( tlb_hit ) // tlb hit { // cacheability if ( not (r_mmu_mode.read() & DATA_CACHE_MASK) ) cacheable = false; else cacheable = tlb_flags.c; // access rights checking if ( not tlb_flags.u and (m_dreq.mode == iss_t::MODE_USER)) { if ( (m_dreq.type == iss_t::DATA_READ) or (m_dreq.type == iss_t::DATA_LL) ) r_mmu_detr = MMU_READ_PRIVILEGE_VIOLATION; else r_mmu_detr = MMU_WRITE_PRIVILEGE_VIOLATION; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; m_drsp.rdata = 0; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dtlb, but privilege violation" << std::endl; } #endif } else if ( not tlb_flags.w and ((m_dreq.type == iss_t::DATA_WRITE) or (m_dreq.type == iss_t::DATA_SC)) ) { r_mmu_detr = MMU_WRITE_ACCES_VIOLATION; r_mmu_dbvar = m_dreq.addr; m_drsp.valid = true; m_drsp.error = true; m_drsp.rdata = 0; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dtlb, but writable violation" << std::endl; } #endif } else { valid_req = true; } // physical address paddr = tlb_paddr; } else // tlb miss { r_dcache_tlb_vaddr = m_dreq.addr; r_dcache_tlb_ins = false; r_dcache_fsm = DCACHE_TLB_MISS; } } // end DTLB activated if ( valid_req ) // processor request is valid after TLB check { // physical address and cacheability registration r_dcache_p0_paddr = paddr; r_dcache_p0_cacheable = cacheable; // READ request // The read requests are taken only if the write pipe-line is empty. // If dcache hit, dtlb hit, and speculative PPN OK, data in one cycle. // If speculative access is KO we just pay one extra cycle. // If dcache miss, we go to DCACHE_MISS_VICTIM state. // If uncacheable, we go to DCACHE_UNC_WAIT state. if ( ((m_dreq.type == iss_t::DATA_READ)) and not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) { if ( cacheable ) // cacheable read { // if the speculative access is illegal, we pay an extra cycle if ( (r_dcache_p0_paddr.read() & ~PAGE_K_MASK) != (paddr & ~PAGE_K_MASK)) { #ifdef INSTRUMENTATION m_cpt_dcache_spec_miss++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Speculative access miss" << std::endl; } #endif } // if cache miss, try to get the missing line else if ( not cache_hit ) { #ifdef INSTRUMENTATION m_cpt_dcache_miss++; #endif r_dcache_vci_paddr = paddr; r_dcache_vci_miss_req = true; r_dcache_miss_type = PROC_MISS; r_dcache_fsm = DCACHE_MISS_VICTIM; } // if cache hit return the data else { #ifdef INSTRUMENTATION m_cpt_data_read++; #endif m_drsp.valid = true; m_drsp.rdata = cache_rdata; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT read in dcache" << " / PADDR=" << std::hex << paddr << std::endl; } #endif } } else // uncacheable read { r_dcache_vci_paddr = paddr; r_dcache_vci_unc_be = m_dreq.be; r_dcache_vci_unc_req = true; r_dcache_fsm = DCACHE_UNC_WAIT; } r_dcache_p0_valid = false; } // end READ // LL request // The LL requests are taken only if the write pipe-line is empty. // We request an LL transaction to CMD FSM and go to // DCACHE_LL_WAIT state, that will return the response to // the processor. else if ( ((m_dreq.type == iss_t::DATA_LL)) and not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) { // prepare llsc local table access table_in.cmd = LLSCLocalTable::LL_CMD ; table_in.address = paddr; // access the table r_llsc_table.exec(table_in, table_out); llsc_local_table_access_done = true; // test if the table is done if(!table_out.done) { r_dcache_p0_valid = false; break; } // request an LL CMD and go to DCACHE_LL_WAIT state r_dcache_vci_ll_req = true; r_dcache_ll_rsp_count = 0; r_dcache_p0_valid = false; r_dcache_vci_paddr = paddr; r_dcache_fsm = DCACHE_LL_WAIT; }// end LL // WRITE request: // If the TLB is activated and the PTE Dirty bit is not set, we stall // the processor and set the Dirty bit before handling the write request. // If we don't need to set the Dirty bit, we can acknowledge // the processor request, as the write arguments (including the // physical address) are registered in r_dcache_p0 registers: // We simply activate the P1 pipeline stage. else if ( m_dreq.type == iss_t::DATA_WRITE ) { if ( (r_mmu_mode.read() & DATA_TLB_MASK ) and not tlb_flags.d ) // Dirty bit must be set { // The PTE physical address is obtained from the nline value (dtlb), // and the word index (proper bits of the virtual address) if ( tlb_flags.b ) // PTE1 { r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | (paddr_t)((m_dreq.addr>>19) & 0x3c); } else // PTE2 { r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | (paddr_t)((m_dreq.addr>>9) & 0x38); } r_dcache_fsm = DCACHE_DIRTY_GET_PTE; r_dcache_p0_valid = false; } else // Write request accepted { #ifdef INSTRUMENTATION m_cpt_data_write++; #endif table_in.cmd = LLSCLocalTable::SW_CMD ; table_in.address = paddr; r_llsc_table.exec(table_in, table_out) ; llsc_local_table_access_done = true; if(!table_out.done) { r_dcache_p0_valid = false; break; } m_drsp.valid = true; m_drsp.rdata = 0; r_dcache_p0_valid = true; } } // end WRITE // SC request: // The SC requests are taken only if the write pipe-line is empty. // We test if a DIRTY bit update is required. // If the TLB is activated and the PTE Dirty bit is not set, we stall // the processor and set the Dirty bit before handling the write request. // If we don't need to set the Dirty bit, we request a SC transaction // to CMD FSM and go to DCACHE_SC_WAIT state, that will return // the response to the processor. // We don't check a possible write hit in dcache, as the cache update // is done by the coherence transaction induced by the SC... else if ( ( m_dreq.type == iss_t::DATA_SC ) and not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) { if ( (r_mmu_mode.read() & DATA_TLB_MASK ) and not tlb_flags.d ) // Dirty bit must be set { // The PTE physical address is obtained from the nline value (dtlb), // and the word index (virtual address) if ( tlb_flags.b ) // PTE1 { r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | (paddr_t)((m_dreq.addr>>19) & 0x3c); } else // PTE2 { r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | (paddr_t)((m_dreq.addr>>9) & 0x38); } r_dcache_fsm = DCACHE_DIRTY_GET_PTE; } else // SC request accepted { #ifdef INSTRUMENTATION m_cpt_data_sc++; #endif // prepare llsc local table access table_in.cmd = LLSCLocalTable::SC_CMD ; table_in.address = paddr; // access the table r_llsc_table.exec(table_in, table_out) ; llsc_local_table_access_done = true; // test if the table is done if(!table_out.done) { r_dcache_p0_valid = false; break; } // test for a local fail if(table_out.hit) { // request an SC CMD and go to DCACHE_SC_WAIT state r_sc_key = table_out.key; r_dcache_vci_paddr = paddr; r_dcache_vci_sc_req = true; r_dcache_vci_sc_data = m_dreq.wdata; r_dcache_fsm = DCACHE_SC_WAIT; } else // local fail { m_drsp.valid = true; m_drsp.rdata = 0x1; } } r_dcache_p0_valid = false; } // end SC else { r_dcache_p0_valid = false; } } // end valid_req else { r_dcache_p0_valid = false; } } // end if read/write/ll/sc request } // end dreq.valid // itlb miss request else if ( r_icache_tlb_miss_req.read() ) { r_dcache_tlb_ins = true; r_dcache_tlb_vaddr = r_icache_vaddr_save.read(); r_dcache_fsm = DCACHE_TLB_MISS; r_dcache_p0_valid = r_dcache_p0_valid.read() and write_pipe_frozen; } else { r_dcache_p0_valid = r_dcache_p0_valid.read() and write_pipe_frozen; } // end P0 pipe stage break; } ///////////////////// case DCACHE_TLB_MISS: // This is the entry point for the sub-fsm handling all tlb miss. // Input arguments are: // - r_dcache_tlb_vaddr // - r_dcache_tlb_ins (true when itlb miss) // The sub-fsm access the dcache to find the missing TLB entry, // and activates the cache miss procedure in case of miss. // It bypass the first level page table access if possible. // It uses atomic access to update the R/L access bits // in the page table if required. // It directly updates the itlb or dtlb, and writes into the // r_mmu_ins_* or r_mmu_data* error reporting registers. { uint32_t ptba = 0; bool bypass; paddr_t pte_paddr; // evaluate bypass in order to skip first level page table access if ( r_dcache_tlb_ins.read() ) // itlb miss { bypass = r_itlb.get_bypass(r_dcache_tlb_vaddr.read(), &ptba); } else // dtlb miss { bypass = r_dtlb.get_bypass(r_dcache_tlb_vaddr.read(), &ptba); } if ( not bypass ) // Try to read PTE1/PTD1 in dcache { pte_paddr = (paddr_t)r_mmu_ptpr.read() << (INDEX1_NBITS+2) | (paddr_t)((r_dcache_tlb_vaddr.read() >> PAGE_M_NBITS) << 2); r_dcache_tlb_paddr = pte_paddr; r_dcache_fsm = DCACHE_TLB_PTE1_GET; } else // Try to read PTE2 in dcache { pte_paddr = (paddr_t)ptba << PAGE_K_NBITS | (paddr_t)(r_dcache_tlb_vaddr.read()&PTD_ID2_MASK)>>(PAGE_K_NBITS-3); r_dcache_tlb_paddr = pte_paddr; r_dcache_fsm = DCACHE_TLB_PTE2_GET; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) { std::cout << " ITLB miss"; } else { std::cout << " DTLB miss"; } std::cout << " / VADDR = " << std::hex << r_dcache_tlb_vaddr.read() << " / BYPASS = " << bypass << " / PTE_ADR = " << pte_paddr << std::endl; } #endif break; } ///////////////////////// case DCACHE_TLB_PTE1_GET: // try to read a PT1 entry in dcache { uint32_t entry; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_tlb_paddr.read(), &entry, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // hit in dcache { if ( not (entry & PTE_V_MASK) ) // unmapped { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT1_UNMAPPED; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT1_UNMAPPED; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); m_drsp.valid = true; m_drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache, but unmapped" << std::hex << " / paddr = " << r_dcache_tlb_paddr.read() << std::dec << " / way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTE1 = " << entry << std::endl; } #endif } else if( entry & PTE_T_MASK ) // PTD : me must access PT2 { // mark the cache line ac containing a PTD r_dcache_contains_ptd[m_dcache_sets*way+set] = true; // register bypass if ( r_dcache_tlb_ins.read() ) // itlb { r_itlb.set_bypass(r_dcache_tlb_vaddr.read(), entry & ((1 << (m_paddr_nbits-PAGE_K_NBITS)) - 1), r_dcache_tlb_paddr.read() >> (uint32_log2(m_icache_words<<2))); } else // dtlb { r_dtlb.set_bypass(r_dcache_tlb_vaddr.read(), entry & ((1 << (m_paddr_nbits-PAGE_K_NBITS)) - 1), r_dcache_tlb_paddr.read() >> (uint32_log2(m_dcache_words)+2)); } r_dcache_tlb_paddr = (paddr_t)(entry & ((1<<(m_paddr_nbits-PAGE_K_NBITS))-1)) << PAGE_K_NBITS | (paddr_t)(((r_dcache_tlb_vaddr.read() & PTD_ID2_MASK) >> PAGE_K_NBITS) << 3); r_dcache_fsm = DCACHE_TLB_PTE2_GET; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache" << std::hex << " / paddr = " << r_dcache_tlb_paddr.read() << std::dec << " / way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTD = " << entry << std::endl; } #endif } else // PTE1 : we must update the TLB { r_dcache_in_tlb[m_icache_sets*way+set] = true; r_dcache_tlb_pte_flags = entry; r_dcache_tlb_cache_way = way; r_dcache_tlb_cache_set = set; r_dcache_tlb_cache_word = word; r_dcache_fsm = DCACHE_TLB_PTE1_SELECT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache" << std::hex << " / paddr = " << r_dcache_tlb_paddr.read() << std::dec << " / way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTE1 = " << entry << std::endl; } #endif } } else // we must load the missing cache line in dcache { r_dcache_vci_miss_req = true; r_dcache_vci_paddr = r_dcache_tlb_paddr.read(); r_dcache_miss_type = PTE1_MISS; r_dcache_fsm = DCACHE_MISS_VICTIM; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " MISS in dcache:" << " PTE1 address = " << std::hex << r_dcache_tlb_paddr.read() << std::endl; } #endif } break; } //////////////////////////// case DCACHE_TLB_PTE1_SELECT: // select a slot for PTE1 { size_t way; size_t set; if ( r_dcache_tlb_ins.read() ) { r_itlb.select( r_dcache_tlb_vaddr.read(), true, // PTE1 &way, &set ); #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif } else { r_dtlb.select( r_dcache_tlb_vaddr.read(), true, // PTE1 &way, &set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif } r_dcache_tlb_way = way; r_dcache_tlb_set = set; r_dcache_fsm = DCACHE_TLB_PTE1_UPDT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) std::cout << " Select a slot in ITLB:"; else std::cout << " Select a slot in DTLB:"; std::cout << " way = " << std::dec << way << " / set = " << set << std::endl; } #endif break; } ////////////////////////// case DCACHE_TLB_PTE1_UPDT: // write a new PTE1 in tlb after testing the L/R bit // - if L/R bit already set, exit the sub-fsm. // - if not, we update the page table but we dont write // neither in DCACHE, nor in TLB, as this will be done by // the coherence mechanism. { paddr_t nline = r_dcache_tlb_paddr.read() >> (uint32_log2(m_dcache_words)+2); uint32_t pte = r_dcache_tlb_pte_flags.read(); bool pt_updt = false; bool local = true; // We should compute the access locality: // The PPN MSB bits define the destination cluster index. // The m_srcid_d MSB bits define the source cluster index. // The number of bits to compare depends on the number of clusters, // and can be obtained in the mapping table. // As long as this computation is not done, all access are local. if ( local ) // local access { if ( not ((pte & PTE_L_MASK) == PTE_L_MASK) ) // we must set the L bit { pt_updt = true; r_dcache_vci_cas_old = pte; r_dcache_vci_cas_new = pte | PTE_L_MASK; pte = pte | PTE_L_MASK; r_dcache_tlb_pte_flags = pte; } } else // remote access { if ( not ((pte & PTE_R_MASK) == PTE_R_MASK) ) // we must set the R bit { pt_updt = true; r_dcache_vci_cas_old = pte; r_dcache_vci_cas_new = pte | PTE_R_MASK; pte = pte | PTE_R_MASK; r_dcache_tlb_pte_flags = pte; } } if ( not pt_updt ) // update TLB and return { if ( r_dcache_tlb_ins.read() ) { r_itlb.write( true, // 2M page pte, 0, // argument unused for a PTE1 r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_itlb_write++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " write PTE1 in ITLB"; std::cout << " / set = " << std::dec << r_dcache_tlb_set.read() << " / way = " << r_dcache_tlb_way.read() << std::endl; r_itlb.printTrace(); } #endif } else { r_dtlb.write( true, // 2M page pte, 0, // argument unused for a PTE1 r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_dtlb_write++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " write PTE1 in DTLB"; std::cout << " / set = " << std::dec << r_dcache_tlb_set.read() << " / way = " << r_dcache_tlb_way.read() << std::endl; r_dtlb.printTrace(); } #endif } r_dcache_fsm = DCACHE_TLB_RETURN; } else // update page table but not TLB { r_dcache_fsm = DCACHE_TLB_LR_UPDT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " L/R bit update required" << std::endl; } #endif } break; } ///////////////////////// case DCACHE_TLB_PTE2_GET: // Try to get a PTE2 (64 bits) in the dcache { uint32_t pte_flags; uint32_t pte_ppn; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_tlb_paddr.read(), &pte_flags, &pte_ppn, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // request hits in dcache { if ( not (pte_flags & PTE_V_MASK) ) // unmapped { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT2_UNMAPPED; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT2_UNMAPPED; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); m_drsp.valid = true; m_drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache, but PTE is unmapped" << " PTE_FLAGS = " << std::hex << pte_flags << " PTE_PPN = " << std::hex << pte_ppn << std::endl; } #endif } else // mapped : we must update the TLB { r_dcache_in_tlb[m_dcache_sets*way+set] = true; r_dcache_tlb_pte_flags = pte_flags; r_dcache_tlb_pte_ppn = pte_ppn; r_dcache_tlb_cache_way = way; r_dcache_tlb_cache_set = set; r_dcache_tlb_cache_word = word; r_dcache_fsm = DCACHE_TLB_PTE2_SELECT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache:" << " PTE_FLAGS = " << std::hex << pte_flags << " PTE_PPN = " << std::hex << pte_ppn << std::endl; } #endif } } else // we must load the missing cache line in dcache { r_dcache_fsm = DCACHE_MISS_VICTIM; r_dcache_vci_miss_req = true; r_dcache_vci_paddr = r_dcache_tlb_paddr.read(); r_dcache_miss_type = PTE2_MISS; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " MISS in dcache:" << " PTE address = " << std::hex << r_dcache_tlb_paddr.read() << std::endl; } #endif } break; } //////////////////////////// case DCACHE_TLB_PTE2_SELECT: // select a slot for PTE2 { size_t way; size_t set; if ( r_dcache_tlb_ins.read() ) { r_itlb.select( r_dcache_tlb_vaddr.read(), false, // PTE2 &way, &set ); #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif } else { r_dtlb.select( r_dcache_tlb_vaddr.read(), false, // PTE2 &way, &set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) std::cout << " Select a slot in ITLB:"; else std::cout << " Select a slot in DTLB:"; std::cout << " way = " << std::dec << way << " / set = " << set << std::endl; } #endif r_dcache_tlb_way = way; r_dcache_tlb_set = set; r_dcache_fsm = DCACHE_TLB_PTE2_UPDT; break; } ////////////////////////// case DCACHE_TLB_PTE2_UPDT: // write a new PTE2 in tlb after testing the L/R bit // - if L/R bit already set, exit the sub-fsm. // - if not, we update the page table but we dont write // neither in DCACHE, nor in TLB, as this will be done by // the coherence mechanism. { paddr_t nline = r_dcache_tlb_paddr.read() >> (uint32_log2(m_dcache_words)+2); uint32_t pte_flags = r_dcache_tlb_pte_flags.read(); uint32_t pte_ppn = r_dcache_tlb_pte_ppn.read(); bool pt_updt = false; bool local = true; // We should compute the access locality: // The PPN MSB bits define the destination cluster index. // The m_srcid_d MSB bits define the source cluster index. // The number of bits to compare depends on the number of clusters, // and can be obtained in the mapping table. // As long as this computation is not done, all access are local. if ( local ) // local access { if ( not ((pte_flags & PTE_L_MASK) == PTE_L_MASK) ) // we must set the L bit { pt_updt = true; r_dcache_vci_cas_old = pte_flags; r_dcache_vci_cas_new = pte_flags | PTE_L_MASK; pte_flags = pte_flags | PTE_L_MASK; r_dcache_tlb_pte_flags = pte_flags; } } else // remote access { if ( not ((pte_flags & PTE_R_MASK) == PTE_R_MASK) ) // we must set the R bit { pt_updt = true; r_dcache_vci_cas_old = pte_flags; r_dcache_vci_cas_new = pte_flags | PTE_R_MASK; pte_flags = pte_flags | PTE_R_MASK; r_dcache_tlb_pte_flags = pte_flags; } } if ( not pt_updt ) // update TLB { if ( r_dcache_tlb_ins.read() ) { r_itlb.write( false, // 4K page pte_flags, pte_ppn, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_itlb_write++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " write PTE2 in ITLB"; std::cout << " / set = " << std::dec << r_dcache_tlb_set.read() << " / way = " << r_dcache_tlb_way.read() << std::endl; r_itlb.printTrace(); } #endif } else { r_dtlb.write( false, // 4K page pte_flags, pte_ppn, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_dtlb_write++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " write PTE2 in DTLB"; std::cout << " / set = " << std::dec << r_dcache_tlb_set.read() << " / way = " << r_dcache_tlb_way.read() << std::endl; r_dtlb.printTrace(); } #endif } r_dcache_fsm = DCACHE_TLB_RETURN; } else // update page table but not TLB { r_dcache_fsm = DCACHE_TLB_LR_UPDT; // dcache and page table update #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " L/R bit update required" << std::endl; } #endif } break; } //////////////////////// case DCACHE_TLB_LR_UPDT: // request a SC transaction to update L/R bit { #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Update dcache: (L/R) bit" << std::endl; } #endif // r_dcache_vci_cas_old & r_dcache_vci_cas_new registers are already set r_dcache_vci_paddr = r_dcache_tlb_paddr.read(); // prepare llsc local table access table_in.cmd = LLSCLocalTable::SW_CMD; table_in.address = r_dcache_tlb_paddr.read(); // access the table r_llsc_table.exec(table_in, table_out); llsc_local_table_access_done = true; // test if the table is done if(!table_out.done) break; // request a CAS CMD and go to DCACHE_TLB_LR_WAIT state r_dcache_vci_cas_req = true; r_dcache_fsm = DCACHE_TLB_LR_WAIT; break; } //////////////////////// case DCACHE_TLB_LR_WAIT: // Waiting the response to SC transaction for DIRTY bit. // We consume the response in rsp FIFO, // and exit the sub-fsm, but we don't // analyse the response, because we don't // care if the L/R bit update is not done. // We must take the coherence requests because // there is a risk of dead-lock { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_cc_save = r_dcache_fsm.read(); break; } if ( r_vci_rsp_data_error.read() ) // bus error { std::cout << "BUS ERROR in DCACHE_TLB_LR_WAIT state" << std::endl; std::cout << "This should not happen in this state" << std::endl; exit(0); } else if ( r_vci_rsp_fifo_dcache.rok() ) // response available { #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " SC response received" << std::endl; } #endif vci_rsp_fifo_dcache_get = true; r_dcache_fsm = DCACHE_TLB_RETURN; } break; } /////////////////////// case DCACHE_TLB_RETURN: // return to caller depending on tlb miss type { #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " TLB MISS completed" << std::endl; } #endif if ( r_dcache_tlb_ins.read() ) r_icache_tlb_miss_req = false; r_dcache_fsm = DCACHE_IDLE; break; } /////////////////////// case DCACHE_XTN_SWITCH: // The r_ptpr registers must be written, // and both itlb and dtlb must be flushed. // Caution : the itlb miss requests must be taken // to avoid dead-lock in case of simultaneous ITLB miss { // itlb miss request if ( r_icache_tlb_miss_req.read() ) { r_dcache_tlb_ins = true; r_dcache_tlb_vaddr = r_icache_vaddr_save.read(); r_dcache_fsm = DCACHE_TLB_MISS; break; } if ( not r_dcache_xtn_req.read() ) { r_dtlb.flush(); r_mmu_ptpr = m_dreq.wdata; r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << ": PADDR=" << std::hex << (m_dreq.wdata << (INDEX1_NBITS+2)) << std::endl; } #endif } break; } ///////////////////// case DCACHE_XTN_SYNC: // waiting until write buffer empty // The coherence request must be taken // as there is a risk of dead-lock { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm_cc_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_CC_CHECK; } if ( r_wbuf.empty() ) { m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; } break; } //////////////////////// case DCACHE_XTN_IC_FLUSH: // Waiting completion of an XTN request to the ICACHE FSM case DCACHE_XTN_IC_INVAL_VA: // Caution : the itlb miss requests must be taken case DCACHE_XTN_IC_INVAL_PA: // because the XTN_ICACHE_INVAL request to icache case DCACHE_XTN_IT_INVAL: // can generate an itlb miss, // and because it can exist a simultaneous ITLB miss { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm_cc_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_CC_CHECK; break; } // itlb miss request if ( r_icache_tlb_miss_req.read() ) { r_dcache_tlb_ins = true; r_dcache_tlb_vaddr = r_icache_vaddr_save.read(); r_dcache_fsm = DCACHE_TLB_MISS; break; } // test if XTN request to icache completed if ( not r_dcache_xtn_req.read() ) { r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; } break; } ///////////////////////// case DCACHE_XTN_DC_FLUSH: // Invalidate sequencially all cache lines, using // the r_dcache_flush counter as a slot counter. // We loop in this state until all slots have been visited. // A cleanup request is generated for each valid line // and we are blocked until the previous cleanup is completed // Finally, both the itlb and dtlb are flushed // (including global entries) { if ( not r_dcache_cleanup_req ) { paddr_t nline; size_t way = r_dcache_flush_count.read()/m_icache_sets; size_t set = r_dcache_flush_count.read()%m_icache_sets; bool cleanup_req = r_dcache.inval( way, set, &nline ); if ( cleanup_req ) { r_dcache_cleanup_req = true; r_dcache_cleanup_line = nline; } r_dcache_in_tlb[m_dcache_sets*way+set] = false; r_dcache_contains_ptd[m_dcache_sets*way+set] = false; r_dcache_flush_count = r_dcache_flush_count.read() + 1; if ( r_dcache_flush_count.read() == (m_dcache_sets*m_dcache_ways - 1) ) // last { r_dtlb.reset(); r_itlb.reset(); r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; } } break; } ///////////////////////// case DCACHE_XTN_DT_INVAL: // handling processor XTN_DTLB_INVAL request { r_dtlb.inval(r_dcache_p0_wdata.read()); r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_VA: // selective cache line invalidate with virtual address // requires 3 cycles: access tlb, read cache, inval cache // we compute the physical address in this state { paddr_t paddr; bool hit; if ( r_mmu_mode.read() & DATA_TLB_MASK ) // dtlb activated { #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif hit = r_dtlb.translate( r_dcache_p0_wdata.read(), &paddr ); } else // dtlb not activated { paddr = (paddr_t)r_dcache_p0_wdata.read(); hit = true; } if ( hit ) // tlb hit { r_dcache_p0_paddr = paddr; r_dcache_fsm = DCACHE_XTN_DC_INVAL_PA; } else // tlb miss { #ifdef INSTRUMENTATION m_cpt_dtlb_miss++; #endif r_dcache_tlb_ins = false; // dtlb r_dcache_tlb_vaddr = r_dcache_p0_wdata.read(); r_dcache_fsm = DCACHE_TLB_MISS; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Compute physical address" << std::hex << " / VADDR = " << r_dcache_p0_wdata.read() << " / PADDR = " << paddr << std::endl; } #endif break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_PA: // selective cache line invalidate with physical address // requires 2 cycles: read cache / inval cache // In this state we read dcache. { uint32_t data; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_p0_paddr.read(), &data, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // inval to be done { r_dcache_xtn_way = way; r_dcache_xtn_set = set; r_dcache_fsm = DCACHE_XTN_DC_INVAL_GO; } else // miss : nothing to do { r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Test hit in dcache" << std::hex << " / PADDR = " << r_dcache_p0_paddr.read() << std::dec << " / HIT = " << hit << " / SET = " << set << " / WAY = " << way << std::endl; } #endif break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_GO: // In this state, we invalidate the cache line // Blocked if previous cleanup not completed // Test if itlb or dtlb inval is required { if ( not r_dcache_cleanup_req.read() ) { paddr_t nline; size_t way = r_dcache_xtn_way.read(); size_t set = r_dcache_xtn_set.read(); bool hit; hit = r_dcache.inval( way, set, &nline ); assert(hit && "XTN_DC_INVAL way/set should still be in cache"); // request cleanup r_dcache_cleanup_req = true; r_dcache_cleanup_line = nline; // possible itlb & dtlb invalidate if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) { r_dcache_tlb_inval_line = nline; r_dcache_tlb_inval_count = 0; r_dcache_fsm_scan_save = DCACHE_XTN_DC_INVAL_END; r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; r_dcache_in_tlb[way*m_dcache_sets+set] = false; } else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) { r_itlb.reset(); r_dtlb.reset(); r_dcache_contains_ptd[way*m_dcache_sets+set] = false; r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; } else { r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Actual dcache inval" << std::hex << " / NLINE = " << nline << std::endl; } #endif } break; } ////////////////////////////// case DCACHE_XTN_DC_INVAL_END: // send response to processor XTN request { r_dcache_fsm = DCACHE_IDLE; m_drsp.valid = true; break; } //////////////////////// case DCACHE_MISS_VICTIM: // Selects a victim line if there is no pending cleanup // on the missing line, and if a new cleanup can be posted. // Set the r_dcache_cleanup_req flip-flop if required { size_t index; // unused bool hit = r_cleanup_buffer.hit( r_dcache_vci_paddr.read()>>(uint32_log2(m_dcache_words)+2), &index ); if ( not hit and not r_dcache_cleanup_req.read() ) { bool valid; size_t way; size_t set; paddr_t victim; valid = r_dcache.victim_select( r_dcache_vci_paddr.read(), &victim, &way, &set ); r_dcache_miss_way = way; r_dcache_miss_set = set; if ( valid ) { r_dcache_cleanup_req = true; r_dcache_cleanup_line = victim; r_dcache_fsm = DCACHE_MISS_INVAL; } else { r_dcache_fsm = DCACHE_MISS_WAIT; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Select a slot:" << std::dec << " / WAY = " << way << " / SET = " << set << " / VALID = " << valid << " / LINE = " << std::hex << victim << std::endl; } #endif } break; } /////////////////////// case DCACHE_MISS_INVAL: // invalidate the victim line // and possibly request itlb or dtlb invalidate { paddr_t nline; size_t way = r_dcache_miss_way.read(); size_t set = r_dcache_miss_set.read(); bool hit; hit = r_dcache.inval( way, set, &nline ); assert(hit && "selected way/set line should be in dcache"); #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " inval line:" << std::dec << " / way = " << way << " / set = " << set << " / nline = " << std::hex << nline << std::endl; } #endif // if selective itlb & dtlb invalidate are required // the miss response is not handled before invalidate completed if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) { r_dcache_in_tlb[way*m_dcache_sets+set] = false; r_dcache_tlb_inval_line = nline; r_dcache_tlb_inval_count = 0; r_dcache_fsm_scan_save = DCACHE_MISS_WAIT; r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; } else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) { r_itlb.reset(); r_dtlb.reset(); r_dcache_contains_ptd[way*m_dcache_sets+set] = false; r_dcache_fsm = DCACHE_MISS_WAIT; } else { r_dcache_fsm = DCACHE_MISS_WAIT; } break; } ////////////////////// case DCACHE_MISS_WAIT: // waiting the response to a miss request from VCI_RSP FSM // This state is in charge of error signaling // There is 5 types of error depending on the requester { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm_cc_save = r_dcache_fsm; r_dcache_fsm = DCACHE_CC_CHECK; break; } if ( r_vci_rsp_data_error.read() ) // bus error { switch ( r_dcache_miss_type.read() ) { case PROC_MISS: { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_p0_vaddr.read(); m_drsp.valid = true; m_drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } case PTE1_MISS: { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT1_ILLEGAL_ACCESS; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT1_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); m_drsp.valid = true; m_drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; break; } case PTE2_MISS: { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT2_ILLEGAL_ACCESS; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT2_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); m_drsp.valid = true; m_drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; break; } } // end switch type r_vci_rsp_data_error = false; } else if ( r_vci_rsp_fifo_dcache.rok() ) // valid response available { r_dcache_miss_word = 0; r_dcache_fsm = DCACHE_MISS_UPDT; } break; } ////////////////////// case DCACHE_MISS_UPDT: // update the dcache (one word per cycle) // returns the response depending on the miss type { if ( r_vci_rsp_fifo_dcache.rok() ) // one word available { if ( r_dcache_miss_inval.read() ) // Matching coherence request // pop the FIFO, without cache update // send a cleanup for the missing line // if the previous cleanup is completed { if ( r_dcache_miss_word.read() < (m_dcache_words - 1) ) // not the last { vci_rsp_fifo_dcache_get = true; r_dcache_miss_word = r_dcache_miss_word.read() + 1; } else // last word { if ( not r_dcache_cleanup_req.read() ) // no pending cleanup { vci_rsp_fifo_dcache_get = true; r_dcache_cleanup_req = true; r_dcache_cleanup_line = r_dcache_vci_paddr.read() >> (uint32_log2(m_dcache_words)+2); r_dcache_miss_inval = false; r_dcache_fsm = DCACHE_IDLE; } } } else // No matching coherence request // pop the FIFO and update the cache // update the directory at the last word { size_t way = r_dcache_miss_way.read(); size_t set = r_dcache_miss_set.read(); size_t word = r_dcache_miss_word.read(); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif r_dcache.write( way, set, word, r_vci_rsp_fifo_dcache.read()); vci_rsp_fifo_dcache_get = true; r_dcache_miss_word = r_dcache_miss_word.read() + 1; // if last word, update directory, set in_tlb & contains_ptd bits if ( r_dcache_miss_word.read() == (m_dcache_words - 1) ) { #ifdef INSTRUMENTATION m_cpt_dcache_dir_write++; #endif r_dcache.victim_update_tag( r_dcache_vci_paddr.read(), r_dcache_miss_way.read(), r_dcache_miss_set.read() ); r_dcache_in_tlb[way*m_dcache_sets+set] = false; r_dcache_contains_ptd[way*m_dcache_sets+set] = false; if (r_dcache_miss_type.read()==PTE1_MISS) r_dcache_fsm = DCACHE_TLB_PTE1_GET; else if (r_dcache_miss_type.read()==PTE2_MISS) r_dcache_fsm = DCACHE_TLB_PTE2_GET; else r_dcache_fsm = DCACHE_IDLE; } } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_miss_inval.read() ) { if ( r_dcache_miss_word.read() < m_dcache_words-1 ) { std::cout << " Matching coherence request:" << " pop the FIFO, don't update the cache" << std::endl; } else { std::cout << " Matching coherence request:" << " last word : send a cleanup request " << std::endl; } } else { std::cout << " Write one word:" << " address = " << std::hex << r_dcache_vci_paddr.read() << " / data = " << r_vci_rsp_fifo_dcache.read() << " / way = " << std::dec << r_dcache_miss_way.read() << " / set = " << r_dcache_miss_set.read() << " / word = " << r_dcache_miss_word.read() << std::endl; } } #endif } // end if rok break; } ///////////////////// case DCACHE_UNC_WAIT: { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm_cc_save = r_dcache_fsm; r_dcache_fsm = DCACHE_CC_CHECK; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = m_dreq.addr; r_vci_rsp_data_error = false; m_drsp.error = true; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; } else if ( r_vci_rsp_fifo_dcache.rok() ) // data available { // consume data vci_rsp_fifo_dcache_get = true; r_dcache_fsm = DCACHE_IDLE; // acknowledge the processor request if it has not been modified if ( m_dreq.valid and (m_dreq.addr == r_dcache_p0_vaddr.read()) ) { m_drsp.valid = true; m_drsp.rdata = r_vci_rsp_fifo_dcache.read(); } } break; } ///////////////////// case DCACHE_LL_WAIT: { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm_cc_save = r_dcache_fsm; r_dcache_fsm = DCACHE_CC_CHECK; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = m_dreq.addr; r_vci_rsp_data_error = false; m_drsp.error = true; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; } else if ( r_vci_rsp_fifo_dcache.rok() ) // data available { // consume data vci_rsp_fifo_dcache_get = true; if(r_dcache_ll_rsp_count.read() == 0) //first flit { //access table table_in.cmd = LLSCLocalTable::LL_RSP ; table_in.index = 0;//p_vci_ini_d.rtrdid.read() ; // TODO use this ? table_in.key = r_vci_rsp_fifo_dcache.read(); r_llsc_table.exec(table_in, table_out); llsc_local_table_access_done = true; r_dcache_ll_rsp_count = r_dcache_ll_rsp_count.read() + 1 ; } else //last flit { // acknowledge the processor request if it has not been modified if ( m_dreq.valid and (m_dreq.addr == r_dcache_p0_vaddr.read()) ) { m_drsp.valid = true; m_drsp.rdata = r_vci_rsp_fifo_dcache.read(); } r_dcache_fsm = DCACHE_IDLE; } } break; } //////////////////// case DCACHE_SC_WAIT: // waiting VCI response after a processor SC request { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm_cc_save = r_dcache_fsm; r_dcache_fsm = DCACHE_CC_CHECK; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = m_dreq.addr; r_vci_rsp_data_error = false; m_drsp.error = true; m_drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; } else if ( r_vci_rsp_fifo_dcache.rok() ) // response available { // consume response vci_rsp_fifo_dcache_get = true; m_drsp.valid = true; m_drsp.rdata = r_vci_rsp_fifo_dcache.read(); r_dcache_fsm = DCACHE_IDLE; } break; } ////////////////////////// case DCACHE_DIRTY_GET_PTE: // This sub_fsm set the PTE Dirty bit in memory // before handling a processor WRITE or SC request // Input argument is r_dcache_dirty_paddr // In this first state, we get PTE value in dcache // and post a SC request to CMD FSM { // get PTE in dcache uint32_t pte; size_t way; size_t set; size_t word; // unused bool hit = r_dcache.read( r_dcache_dirty_paddr.read(), &pte, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif assert( hit and "error in DCACHE_DIRTY_TLB_SET: the PTE should be in dcache" ); // request CAS transaction to CMD_FSM r_dcache_dirty_way = way; r_dcache_dirty_set = set; // prepare llsc local table access table_in.cmd = LLSCLocalTable::SW_CMD; table_in.address = r_dcache_dirty_paddr.read(); // access the table r_llsc_table.exec(table_in, table_out); llsc_local_table_access_done = true; // test if the table is done if(!table_out.done) break; // request a CAS CMD and go to DCACHE_DIRTY_WAIT state r_dcache_vci_cas_req = true; r_dcache_vci_paddr = r_dcache_dirty_paddr.read(); r_dcache_vci_cas_old = pte; r_dcache_vci_cas_new = pte | PTE_D_MASK; r_dcache_fsm = DCACHE_DIRTY_WAIT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Get PTE in dcache" << std::hex << " / PTE_PADDR = " << r_dcache_dirty_paddr.read() << " / PTE_VALUE = " << pte << std::dec << " / CACHE_SET = " << set << " / CACHE_WAY = " << way << std::endl; } #endif break; } ////////////////////////// case DCACHE_DIRTY_WAIT: // wait completion of CAS for PTE Dirty bit, // and return to IDLE state when response is received. // we don't care if the CAS is a failure: // - if the CAS is a success, the coherence mechanism // updates the local copy. // - if the CAS is a failure, we just retry the write. { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm_cc_save = r_dcache_fsm; r_dcache_fsm = DCACHE_CC_CHECK; break; } if ( r_vci_rsp_data_error.read() ) // bus error { std::cout << "BUS ERROR in DCACHE_DIRTY_WAIT state" << std::endl; std::cout << "This should not happen in this state" << std::endl; exit(0); } else if ( r_vci_rsp_fifo_dcache.rok() ) // response available { vci_rsp_fifo_dcache_get = true; r_dcache_fsm = DCACHE_IDLE; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " SC completed" << std::endl; } #endif } break; } ///////////////////// case DCACHE_CC_CHECK: // This state is the entry point for the sub-FSM // handling coherence requests. // If there is a matching pending miss on the modified cache // line this is signaled in the r_dcache_miss inval flip-flop. // If the updated (or invalidated) cache line has copies in TLBs // these TLB copies are invalidated. // The return state is defined in r_dcache_fsm_cc_save { paddr_t paddr = r_tgt_paddr.read(); paddr_t mask = ~((m_dcache_words<<2)-1); if( (r_dcache_fsm_cc_save == DCACHE_MISS_WAIT) and ((r_dcache_vci_paddr.read() & mask) == (paddr & mask)) ) // matching pending miss { r_dcache_miss_inval = true; // signaling the match r_tgt_dcache_req = false; // coherence request completed r_tgt_dcache_rsp = r_tgt_update.read(); // response required if update r_dcache_fsm = r_dcache_fsm_cc_save.read(); #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Coherence request matching a pending miss:" << " address = " << std::hex << paddr << std::endl; } #endif } else // no match for a pending miss { uint32_t rdata; size_t way; size_t set; size_t word; bool hit = r_dcache.read( paddr, &rdata, // unused &way, &set, &word); // unused #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif r_dcache_cc_way = way; r_dcache_cc_set = set; if ( hit and r_tgt_update.read() ) // hit update { r_dcache_fsm = DCACHE_CC_UPDT; r_dcache_cc_word = r_tgt_word_min.read(); } else if ( hit and not r_tgt_update.read() ) // hit inval { r_dcache_fsm = DCACHE_CC_INVAL; } else // miss can happen { r_tgt_dcache_req = false; r_tgt_dcache_rsp = r_tgt_update.read(); r_dcache_fsm = r_dcache_fsm_cc_save.read(); } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Coherence request received :" << " address = " << std::hex << paddr << std::dec; if ( hit ) { std::cout << " / HIT" << " / way = " << way << " / set = " << set << std::endl; } else { std::cout << " / MISS" << std::endl; } } #endif } break; } ///////////////////// case DCACHE_CC_INVAL: // invalidate one cache line after // invalidation of copies in TLBs { paddr_t nline; size_t way = r_dcache_cc_way.read(); size_t set = r_dcache_cc_set.read(); bool hit; if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) // selective TLB inval { r_dcache_in_tlb[way*m_dcache_sets+set] = false; r_dcache_tlb_inval_line = r_tgt_paddr.read() >> (uint32_log2(m_dcache_words)+2); r_dcache_tlb_inval_count = 0; r_dcache_fsm_scan_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; } else // actual cache line inval { if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) // TLB flush { r_itlb.reset(); r_dtlb.reset(); r_dcache_contains_ptd[way*m_dcache_sets+set] = false; } r_tgt_dcache_rsp = true; r_tgt_dcache_req = false; r_dcache_fsm = r_dcache_fsm_cc_save.read(); hit = r_dcache.inval( way, set, &nline ); #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Invalidate cache line" << std::dec << " / WAY = " << way << " / SET = " << set << std::endl; } #endif assert(hit && "CC_INVAL way/set should be in dcache"); } break; } /////////////////// case DCACHE_CC_UPDT: // write one word per cycle (from word_min to word_max) // and test possible copies in TLBs { size_t word = r_dcache_cc_word.read(); size_t way = r_dcache_cc_way.read(); size_t set = r_dcache_cc_set.read(); paddr_t nline = r_tgt_paddr.read() >> (uint32_log2(m_dcache_words)+2); if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) // selective TLB inval { r_dcache_in_tlb[way*m_dcache_sets+set] = false; r_dcache_tlb_inval_line = nline; r_dcache_tlb_inval_count = 0; r_dcache_fsm_scan_save = r_dcache_fsm.read(); r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; } else // cache update { if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) // TLB flush { r_itlb.reset(); r_dtlb.reset(); r_dcache_contains_ptd[way*m_dcache_sets+set] = false; } r_dcache.write( way, set, word, r_tgt_buf[word], r_tgt_be[word] ); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif r_dcache_cc_word = word + 1; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Update one word" << std::dec << " / WAY = " << way << " / SET = " << set << " / WORD = " << word << " / VALUE = " << std::hex << r_tgt_buf[word] << std::endl; } #endif if ( word == r_tgt_word_max.read() ) // last word { r_tgt_dcache_rsp = true; r_tgt_dcache_req = false; r_dcache_fsm = r_dcache_fsm_cc_save.read(); } } break; } /////////////////////////// case DCACHE_INVAL_TLB_SCAN: // Scan sequencially all TLB entries for both ITLB & DTLB // It makes the assumption that (m_itlb_sets == m_dtlb_sets) // and (m_itlb_ways == m_dtlb_ways) // We enter this state when a DCACHE line is modified, // and there is a copy in itlb or dtlb. // It can be caused by: // - a coherence inval or updt transaction, // - a line inval caused by a cache miss // - a processor XTN inval request, // - a WRITE hit, // - a Dirty bit update failure // Input arguments are: // - r_dcache_tlb_inval_line // - r_dcache_tlb_inval_count // - r_dcache_fsm_cc_save { paddr_t line = r_dcache_tlb_inval_line.read(); // nline size_t way = r_dcache_tlb_inval_count.read()/m_itlb_sets; // way size_t set = r_dcache_tlb_inval_count.read()%m_itlb_sets; // set bool ok; ok = r_itlb.inval( line, way, set ); #if DEBUG_DCACHE if ( m_debug_dcache_fsm and ok ) { std::cout << " Invalidate ITLB entry:" << std::hex << " line = " << line << std::dec << " / set = " << set << " / way = " << way << std::endl; r_itlb.printTrace(); } #endif ok = r_dtlb.inval( line, way, set ); #if DEBUG_DCACHE if ( m_debug_dcache_fsm and ok ) { std::cout << " Invalidate DTLB entry:" << std::hex << " line = " << line << std::dec << " / set = " << set << " / way = " << way << std::endl; r_dtlb.printTrace(); } #endif // return to the calling state when TLB inval completed if ( r_dcache_tlb_inval_count.read() == ((m_dtlb_sets*m_dtlb_ways)-1) ) { r_dcache_fsm = r_dcache_fsm_scan_save.read(); } r_dcache_tlb_inval_count = r_dcache_tlb_inval_count.read() + 1; break; } } // end switch r_dcache_fsm // perform a NOP access the the local llsc table if necessary if(llsc_local_table_access_done == false) { table_in.cmd = LLSCLocalTable::NOP; r_llsc_table.exec(table_in, table_out); } ///////////////// wbuf update ////////////////////////////////////////////////////// r_wbuf.update(); //////////////// test processor frozen ///////////////////////////////////////////// // The simulation exit if the number of consecutive frozen cycles // is larger than the m_max_frozen_cycles (constructor parameter) if ( (m_ireq.valid and not m_irsp.valid) or (m_dreq.valid and not m_drsp.valid) ) { m_cpt_frz_cycles++; // used for instrumentation m_cpt_stop_simulation++; // used for debug if ( m_cpt_stop_simulation > m_max_frozen_cycles ) { std::cout << std::dec << "ERROR in CC_VCACHE_WRAPPER " << name() << std::endl << " stop at cycle " << m_cpt_total_cycles << std::endl << " frozen since cycle " << m_cpt_total_cycles - m_max_frozen_cycles << std::endl; r_iss.dump(); exit(1); } } else { m_cpt_stop_simulation = 0; } /////////// execute one iss cycle ///////////////////////////////// { uint32_t it = 0; for (size_t i=0; i<(size_t)iss_t::n_irq; i++) if(p_irq[i].read()) it |= (1< Cleanup request for icache:" << std::hex << " address = " << (r_dcache_cleanup_line.read()*m_dcache_words*4) << " / trdid = " << std::dec << r_cleanup_trdid.read() << std::endl; } #endif } break; } //////////////////// case CLEANUP_INS_GO: { if ( p_vci_ini_c.cmdack.read() ) { r_cleanup_fsm = CLEANUP_DATA_IDLE; r_icache_cleanup_req = false; #if DEBUG_CLEANUP if ( m_debug_cleanup_fsm ) { std::cout << " Cleanup request for dcache:" << std::hex << " address = " << (r_icache_cleanup_line.read()*m_icache_words*4) << " / trdid = " << std::dec << r_cleanup_trdid.read() << std::endl; } #endif } break; } } // end switch CLEANUP FSM //////////////// Handling cleanup responses ////////////////// if ( p_vci_ini_c.rspval.read() ) { r_cleanup_buffer.inval( p_vci_ini_c.rtrdid.read() >> 1); } ///////////////// Response FIFOs update ////////////////////// r_vci_rsp_fifo_icache.update(vci_rsp_fifo_icache_get, vci_rsp_fifo_icache_put, vci_rsp_fifo_icache_data); r_vci_rsp_fifo_dcache.update(vci_rsp_fifo_dcache_get, vci_rsp_fifo_dcache_put, vci_rsp_fifo_dcache_data); #undef LLSCLocalTable } // end transition() /////////////////////// tmpl(void)::genMoore() /////////////////////// { //////////////////////////////////////////////////////////////// // VCI initiator command on the coherence network (cleanup) // it depends on the CLEANUP FSM state paddr_t cleanup_nline; paddr_t address; if ( r_cleanup_fsm.read() == CLEANUP_DATA_GO ) { cleanup_nline = r_dcache_cleanup_line.read(); address = (m_x_width + m_y_width) ? (cleanup_nline * m_dcache_words * 4 ) >> (vci_param::N - m_x_width - m_y_width ) : 0; } else if ( r_cleanup_fsm.read() == CLEANUP_INS_GO ) { cleanup_nline = r_icache_cleanup_line.read(); address = (m_x_width + m_y_width) ? (cleanup_nline * m_icache_words * 4 ) >> (vci_param::N - m_x_width - m_y_width ) : 0; } else { cleanup_nline = 0; address = 0; } address <<= vci_param::S - m_x_width - m_y_width; address |= m_memory_cache_local_id; address <<= vci_param::N - vci_param::S; p_vci_ini_c.cmdval = ((r_cleanup_fsm.read() == CLEANUP_DATA_GO) or (r_cleanup_fsm.read() == CLEANUP_INS_GO) ); p_vci_ini_c.address = address; p_vci_ini_c.wdata = (uint32_t) cleanup_nline; p_vci_ini_c.be = (cleanup_nline >> 32) & 0x3; p_vci_ini_c.plen = 4; p_vci_ini_c.cmd = vci_param::CMD_WRITE; p_vci_ini_c.trdid = r_cleanup_trdid.read(); p_vci_ini_c.pktid = 0; p_vci_ini_c.srcid = m_srcid_c; p_vci_ini_c.cons = false; p_vci_ini_c.wrap = false; p_vci_ini_c.contig = false; p_vci_ini_c.clen = 0; p_vci_ini_c.cfixed = false; p_vci_ini_c.eop = true; ///////////////////////////////////////////////////////////////// // VCI initiator response on the coherence network (cleanup) // We always consume the response, and we don't use it. p_vci_ini_c.rspack = true; ///////////////////////////////////////////////////////////////// // VCI initiator command on the direct network // it depends on the CMD FSM state bool is_sc_or_cas = (r_vci_cmd_fsm.read() == CMD_DATA_CAS) or (r_vci_cmd_fsm.read() == CMD_DATA_SC ); p_vci_ini_d.pktid = 0; p_vci_ini_d.srcid = m_srcid_d; p_vci_ini_d.cons = is_sc_or_cas; p_vci_ini_d.contig = not is_sc_or_cas; p_vci_ini_d.wrap = false; p_vci_ini_d.clen = 0; p_vci_ini_d.cfixed = false; switch ( r_vci_cmd_fsm.read() ) { case CMD_IDLE: p_vci_ini_d.cmdval = false; p_vci_ini_d.address = 0; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = 0; p_vci_ini_d.plen = 0; p_vci_ini_d.cmd = vci_param::CMD_NOP; p_vci_ini_d.eop = false; break; case CMD_INS_MISS: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_icache_vci_paddr.read() & m_icache_yzmask; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_READ_INS_MISS; p_vci_ini_d.plen = m_icache_words<<2; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_INS_UNC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_icache_vci_paddr.read() & ~0x3; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_READ_INS_UNC; p_vci_ini_d.plen = 4; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_MISS: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & m_dcache_yzmask; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_READ_DATA_MISS; p_vci_ini_d.plen = m_dcache_words << 2; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_UNC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = r_dcache_vci_unc_be.read(); p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_READ_DATA_UNC; p_vci_ini_d.plen = 4; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_WRITE: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_wbuf.getAddress(r_vci_cmd_cpt.read()) & ~0x3; p_vci_ini_d.wdata = r_wbuf.getData(r_vci_cmd_cpt.read()); p_vci_ini_d.be = r_wbuf.getBe(r_vci_cmd_cpt.read()); p_vci_ini_d.trdid = r_wbuf.getIndex(); p_vci_ini_d.pktid = TYPE_WRITE; p_vci_ini_d.plen = (r_vci_cmd_max.read() - r_vci_cmd_min.read() + 1) << 2; p_vci_ini_d.cmd = vci_param::CMD_WRITE; p_vci_ini_d.eop = (r_vci_cmd_cpt.read() == r_vci_cmd_max.read()); break; case CMD_DATA_LL: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; //TODO local table index p_vci_ini_d.pktid = TYPE_LL; p_vci_ini_d.plen = 8; p_vci_ini_d.cmd = vci_param::CMD_LOCKED_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_SC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; if ( r_vci_cmd_cpt.read() == 0 ) p_vci_ini_d.wdata = r_sc_key.read(); else p_vci_ini_d.wdata = r_dcache_vci_sc_data.read(); p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_SC; p_vci_ini_d.plen = 8; p_vci_ini_d.cmd = vci_param::CMD_NOP; p_vci_ini_d.eop = (r_vci_cmd_cpt.read() == 1); break; case CMD_DATA_CAS: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; if ( r_vci_cmd_cpt.read() == 0 ) p_vci_ini_d.wdata = r_dcache_vci_cas_old.read(); else p_vci_ini_d.wdata = r_dcache_vci_cas_new.read(); p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = 0; p_vci_ini_d.pktid = TYPE_CAS; p_vci_ini_d.plen = 8; p_vci_ini_d.cmd = vci_param::CMD_NOP; p_vci_ini_d.eop = (r_vci_cmd_cpt.read() == 1); break; } // end switch r_vci_cmd_fsm ////////////////////////////////////////////////////////// // VCI initiator response on the direct network // it depends on the VCI RSP state switch (r_vci_rsp_fsm.read() ) { case RSP_DATA_WRITE : p_vci_ini_d.rspack = true; break; case RSP_INS_MISS : p_vci_ini_d.rspack = r_vci_rsp_fifo_icache.wok(); break; case RSP_INS_UNC : p_vci_ini_d.rspack = r_vci_rsp_fifo_icache.wok(); break; case RSP_DATA_MISS : p_vci_ini_d.rspack = r_vci_rsp_fifo_dcache.wok(); break; case RSP_DATA_UNC : p_vci_ini_d.rspack = r_vci_rsp_fifo_dcache.wok(); break; case RSP_DATA_LL : p_vci_ini_d.rspack = r_vci_rsp_fifo_dcache.wok(); break; case RSP_IDLE : p_vci_ini_d.rspack = false; break; } // end switch r_vci_rsp_fsm //////////////////////////////////////////////////////////////// // VCI target command and response on the coherence network switch ( r_tgt_fsm.read() ) { case TGT_IDLE: case TGT_UPDT_WORD: case TGT_UPDT_DATA: p_vci_tgt_c.cmdack = true; p_vci_tgt_c.rspval = false; break; case TGT_RSP_BROADCAST: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() and ( r_tgt_icache_rsp.read() or r_tgt_dcache_rsp.read() ); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_RSP_ICACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_icache_req.read() and r_tgt_icache_rsp.read(); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_RSP_DCACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_dcache_req.read() and r_tgt_dcache_rsp.read(); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_REQ_BROADCAST: case TGT_REQ_ICACHE: case TGT_REQ_DCACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = false; break; } // end switch TGT_FSM } // end genMoore }} // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4