/* -*- c++ -*-C * File : vci_cc_vcache_wrapper_v4.cpp * Copyright (c) UPMC, Lip6, SoC * Authors : Alain GREINER, Yang GAO * * SOCLIB_LGPL_HEADER_BEGIN * * This file is part of SoCLib, GNU LGPLv2.1. * * SoCLib is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; version 2.1 of the License. * * SoCLib is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with SoCLib; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA * * SOCLIB_LGPL_HEADER_END */ #include #include "arithmetics.h" #include "../include/vci_cc_vcache_wrapper_v4.h" #define DEBUG_DCACHE 1 #define DEBUG_ICACHE 1 #define DEBUG_CLEANUP 0 #define DEBUG_INVAL_ITLB 1 #define DEBUG_INVAL_DTLB 1 namespace soclib { namespace caba { namespace { const char *icache_fsm_state_str[] = { "ICACHE_IDLE", "ICACHE_XTN_TLB_FLUSH", "ICACHE_XTN_CACHE_FLUSH", "ICACHE_XTN_TLB_INVAL", "ICACHE_XTN_CACHE_INVAL_VA", "ICACHE_XTN_CACHE_INVAL_PA", "ICACHE_XTN_CACHE_INVAL_GO", "ICACHE_TLB_WAIT", "ICACHE_MISS_VICTIM", "ICACHE_MISS_INVAL", "ICACHE_MISS_WAIT", "ICACHE_MISS_UPDT", "ICACHE_UNC_WAIT", "ICACHE_CC_CHECK", "ICACHE_CC_INVAL", "ICACHE_CC_UPDT", }; const char *dcache_fsm_state_str[] = { "DCACHE_IDLE", "DCACHE_TLB_MISS", "DCACHE_TLB_PTE1_GET", "DCACHE_TLB_PTE1_SELECT", "DCACHE_TLB_PTE1_UPDT", "DCACHE_TLB_PTE2_GET", "DCACHE_TLB_PTE2_SELECT", "DCACHE_TLB_PTE2_UPDT", "DCACHE_TLB_SC_UPDT", "DCACHE_TLB_SC_WAIT", "DCACHE_TLB_RETURN", "DCACHE_XTN_SWITCH", "DCACHE_XTN_SYNC", "DCACHE_XTN_IC_INVAL_VA", "DCACHE_XTN_IC_FLUSH", "DCACHE_XTN_IC_INVAL_PA", "DCACHE_XTN_IT_INVAL", "DCACHE_XTN_DC_FLUSH", "DCACHE_XTN_DC_INVAL_VA", "DCACHE_XTN_DC_INVAL_PA", "DCACHE_XTN_DC_INVAL_WAIT", "DCACHE_XTN_DC_INVAL_GO", "DCACHE_XTN_DT_INVAL", "DCACHE_WRITE_TLB_DIRTY", "DCACHE_WRITE_CACHE_DIRTY", "DCACHE_WRITE_SC_WAIT", "DCACHE_WRITE_UNC_WAIT", "DCACHE_MISS_VICTIM", "DCACHE_MISS_INVAL", "DCACHE_MISS_INVAL_WAIT", "DCACHE_MISS_WAIT", "DCACHE_MISS_UPDT", "DCACHE_UNC_WAIT", "DCACHE_CC_CHECK", "DCACHE_CC_INVAL", "DCACHE_CC_UPDT", "DCACHE_CC_WAIT", }; const char *cmd_fsm_state_str[] = { "CMD_IDLE", "CMD_INS_MISS", "CMD_INS_UNC", "CMD_DATA_MISS", "CMD_DATA_UNC", "CMD_DATA_WRITE", "CMD_DATA_SC", }; const char *rsp_fsm_state_str[] = { "RSP_IDLE", "RSP_INS_MISS", "RSP_INS_UNC", "RSP_DATA_MISS", "RSP_DATA_UNC", "RSP_DATA_WRITE", }; const char *cleanup_fsm_state_str[] = { "CLEANUP_DATA_IDLE", "CLEANUP_DATA_GO", "CLEANUP_INS_IDLE", "CLEANUP_INS_GO", }; const char *tgt_fsm_state_str[] = { "TGT_IDLE", "TGT_UPDT_WORD", "TGT_UPDT_DATA", "TGT_REQ_BROADCAST", "TGT_REQ_ICACHE", "TGT_REQ_DCACHE", "TGT_RSP_BROADCAST", "TGT_RSP_ICACHE", "TGT_RSP_DCACHE", }; const char *inval_itlb_fsm_state_str[] = { "INVAL_ITLB_IDLE", "INVAL_ITLB_SCAN", }; const char *inval_dtlb_fsm_state_str[] = { "INVAL_DTLB_IDLE", "INVAL_DTLB_SCAN", }; } #define tmpl(...) template __VA_ARGS__ VciCcVCacheWrapperV4 using soclib::common::uint32_log2; ///////////////////////////////// tmpl(/**/)::VciCcVCacheWrapperV4( sc_module_name name, int proc_id, const soclib::common::MappingTable &mtp, const soclib::common::MappingTable &mtc, const soclib::common::IntTab &initiator_index_d, const soclib::common::IntTab &initiator_index_c, const soclib::common::IntTab &target_index_d, size_t itlb_ways, size_t itlb_sets, size_t dtlb_ways, size_t dtlb_sets, size_t icache_ways, size_t icache_sets, size_t icache_words, size_t dcache_ways, size_t dcache_sets, size_t dcache_words, size_t wbuf_nlines, size_t wbuf_nwords, uint32_t max_frozen_cycles, uint32_t debug_start_cycle, bool debug_ok) : soclib::caba::BaseModule(name), p_clk("clk"), p_resetn("resetn"), p_vci_ini_d("vci_ini_d"), p_vci_ini_c("vci_ini_c"), p_vci_tgt_c("vci_tgt_d"), m_cacheability_table(mtp.getCacheabilityTable()), m_segment(mtc.getSegment(target_index_d)), m_srcid_d(mtp.indexForId(initiator_index_d)), m_srcid_c(mtp.indexForId(initiator_index_c)), m_itlb_ways(itlb_ways), m_itlb_sets(itlb_sets), m_dtlb_ways(dtlb_ways), m_dtlb_sets(dtlb_sets), m_icache_ways(icache_ways), m_icache_sets(icache_sets), m_icache_yzmask((~0)<<(uint32_log2(icache_words) + 2)), m_icache_words(icache_words), m_dcache_ways(dcache_ways), m_dcache_sets(dcache_sets), m_dcache_yzmask((~0)<<(uint32_log2(dcache_words) + 2)), m_dcache_words(dcache_words), m_max_frozen_cycles(max_frozen_cycles), m_paddr_nbits(vci_param::N), m_debug_start_cycle(debug_start_cycle), m_debug_ok(debug_ok), r_mmu_ptpr("r_mmu_ptpr"), r_mmu_mode("r_mmu_mode"), r_mmu_word_lo("r_mmu_word_lo"), r_mmu_word_hi("r_mmu_word_hi"), r_mmu_ibvar("r_mmu_ibvar"), r_mmu_dbvar("r_mmu_dbvar"), r_mmu_ietr("r_mmu_ietr"), r_mmu_detr("r_mmu_detr"), r_icache_fsm("r_icache_fsm"), r_icache_fsm_save("r_icache_fsm_save"), r_icache_vci_paddr("r_icache_vci_paddr"), r_icache_vaddr_save("r_icache_vaddr_save"), r_icache_miss_way("r_icache_miss_way"), r_icache_miss_set("r_icache_miss_set"), r_icache_miss_word("r_icache_miss_word"), r_icache_miss_inval("r_icache_miss_inval"), r_icache_cc_way("r_icache_cc_way"), r_icache_cc_set("r_icache_cc_set"), r_icache_cc_word("r_icache_cc_word"), r_icache_flush_count("r_icache_flush_count"), r_icache_miss_req("r_icache_miss_req"), r_icache_unc_req("r_icache_unc_req"), r_icache_tlb_miss_req("r_icache_tlb_read_req"), r_icache_tlb_rsp_error("r_icache_tlb_rsp_error"), r_icache_cleanup_req("r_icache_cleanup_req"), r_icache_cleanup_line("r_icache_cleanup_line"), r_dcache_fsm("r_dcache_fsm"), r_dcache_fsm_save("r_dcache_fsm_save"), r_dcache_p0_valid("r_dcache_p0_valid"), r_dcache_p0_vaddr("r_dcache_p0_vaddr"), r_dcache_p0_wdata("r_dcache_p0_wdata"), r_dcache_p0_be("r_dcache_p0_be"), r_dcache_p0_paddr("r_dcache_p0_paddr"), r_dcache_p0_cacheable("r_dcache_p0_cacheable"), r_dcache_p0_tlb_way("r_dcache_p0_tlb_way"), r_dcache_p0_tlb_set("r_dcache_p0_tlb_set"), r_dcache_p0_tlb_nline("r_dcache_p0_tlb_nline"), r_dcache_p0_tlb_dirty("r_dcache_p0_tlb_dirty"), r_dcache_p0_tlb_big("r_dcache_p0_tlb_big"), r_dcache_p1_valid("r_dcache_p1_valid"), r_dcache_p1_updt_cache("r_dcache_p1_updt_cache"), r_dcache_p1_set_dirty("r_dcache_p1_set_dirty"), r_dcache_p1_vaddr("r_dcache_p1_vaddr"), r_dcache_p1_wdata("r_dcache_p1_wdata"), r_dcache_p1_be("r_dcache_p1_be"), r_dcache_p1_paddr("r_dcache_p1_paddr"), r_dcache_p1_cache_way("r_dcache_p1_cache_way"), r_dcache_p1_cache_set("r_dcache_p1_cache_set"), r_dcache_p1_cache_word("r_dcache_p1_word_save"), r_dcache_p1_tlb_way("r_dcache_p1_tlb_way"), r_dcache_p1_tlb_set("r_dcache_p1_tlb_set"), r_dcache_p1_tlb_nline("r_dcache_p1_tlb_nline"), r_dcache_p2_vaddr("r_dcache_p2_vaddr"), r_dcache_p2_tlb_way("r_dcache_p2_tlb_way"), r_dcache_p2_tlb_set("r_dcache_p2_tlb_set"), r_dcache_p2_set_dirty("r_dcache_p2_set_dirty"), r_dcache_p2_pte_paddr("r_dcache_p2_pte_paddr"), r_dcache_p2_pte_way("r_dcache_p2_pte_way"), r_dcache_p2_pte_set("r_dcache_p2_pte_set"), r_dcache_p2_pte_word("r_dcache_p2_pte_word"), r_dcache_p2_pte_flags("r_dcache_p2_pte_flags"), r_dcache_vci_paddr("r_dcache_vci_paddr"), r_dcache_vci_miss_req("r_dcache_vci_miss_req"), r_dcache_vci_unc_req("r_dcache_vci_unc_req"), r_dcache_vci_unc_be("r_dcache_vci_unc_be"), r_dcache_vci_sc_req("r_dcache_vci_sc_req"), r_dcache_vci_sc_old("r_dcache_vci_sc_old"), r_dcache_vci_sc_new("r_dcache_vci_sc_new"), r_dcache_xtn_way("r_dcache_xtn_way"), r_dcache_xtn_set("r_dcache_xtn_set"), r_dcache_pending_unc_write("r_dcache_pending_unc_write"), r_dcache_miss_type("r_dcache_miss_type"), r_dcache_miss_word("r_dcache_miss_word"), r_dcache_miss_way("r_dcache_miss_way"), r_dcache_miss_set("r_dcache_miss_set"), r_dcache_miss_inval("r_dcache_miss_inval"), r_dcache_cc_way("r_dcache_cc_way"), r_dcache_cc_set("r_dcache_cc_set"), r_dcache_cc_word("r_dcache_cc_word"), r_dcache_flush_count("r_dcache_flush_count"), r_dcache_tlb_vaddr("r_dcache_tlb_vaddr"), r_dcache_tlb_ins("r_dcache_tlb_ins"), r_dcache_tlb_pte_flags("r_dcache_tlb_pte_flags"), r_dcache_tlb_pte_ppn("r_dcache_tlb_pte_ppn"), r_dcache_tlb_cache_way("r_dcache_tlb_cache_way"), r_dcache_tlb_cache_set("r_dcache_tlb_cache_set"), r_dcache_tlb_cache_word("r_dcache_tlb_cache_word"), r_dcache_tlb_way("r_dcache_tlb_way"), r_dcache_tlb_set("r_dcache_tlb_set"), r_dcache_ll_valid("r_dcache_ll_valid"), r_dcache_ll_data("r_dcache_ll_data"), r_dcache_ll_vaddr("r_dcache_ll_vaddr"), r_dcache_itlb_inval_req("r_dcache_itlb_inval_req"), r_dcache_dtlb_inval_req("r_dcache_dtlb_inval_req"), r_dcache_tlb_inval_line("r_dcache_tlb_inval_line"), r_dcache_xtn_req("r_dcache_xtn_req"), r_dcache_xtn_opcode("r_dcache_xtn_opcode"), r_dcache_cleanup_req("r_dcache_cleanup_req"), r_dcache_cleanup_line("r_dcache_cleanup_line"), r_vci_cmd_fsm("r_vci_cmd_fsm"), r_vci_cmd_min("r_vci_cmd_min"), r_vci_cmd_max("r_vci_cmd_max"), r_vci_cmd_cpt("r_vci_cmd_cpt"), r_vci_cmd_imiss_prio("r_vci_cmd_imiss_prio"), r_vci_rsp_fsm("r_vci_rsp_fsm"), r_vci_rsp_cpt("r_vci_rsp_cpt"), r_vci_rsp_ins_error("r_vci_rsp_ins_error"), r_vci_rsp_data_error("r_vci_rsp_data_error"), r_vci_rsp_fifo_icache("r_vci_rsp_fifo_icache", 2), // 2 words depth r_vci_rsp_fifo_dcache("r_vci_rsp_fifo_dcache", 2), // 2 words depth r_cleanup_fsm("r_cleanup_fsm"), r_cleanup_trdid("r_cleanup_trdid"), r_cleanup_buffer(4), // up to 4 simultaneous cleanups r_tgt_fsm("r_tgt_fsm"), r_tgt_paddr("r_tgt_paddr"), r_tgt_word_count("r_tgt_word_count"), r_tgt_word_min("r_tgt_word_min"), r_tgt_word_max("r_tgt_word_max"), r_tgt_update("r_tgt_update"), r_tgt_update_data("r_tgt_update_data"), r_tgt_srcid("r_tgt_srcid"), r_tgt_pktid("r_tgt_pktid"), r_tgt_trdid("r_tgt_trdid"), r_tgt_icache_req("r_tgt_icache_req"), r_tgt_dcache_req("r_tgt_dcache_req"), r_tgt_icache_rsp("r_tgt_icache_rsp"), r_tgt_dcache_rsp("r_tgt_dcache_rsp"), r_inval_itlb_fsm("r_inval_itlb_fsm"), r_inval_itlb_count("r_inval_itlb_count"), r_inval_dtlb_fsm("r_inval_dtlb_fsm"), r_inval_dtlb_count("r_inval_dtlb_count"), r_iss(this->name(), proc_id), r_wbuf("wbuf", wbuf_nwords, wbuf_nlines, dcache_words ), r_icache("icache", icache_ways, icache_sets, icache_words), r_dcache("dcache", dcache_ways, dcache_sets, dcache_words), r_itlb(itlb_ways,itlb_sets,vci_param::N), r_dtlb(dtlb_ways,dtlb_sets,vci_param::N) { assert( ((icache_words*vci_param::B) < (1< 2) and ((1<<(vci_param::T-1)) >= (wbuf_nlines)) and "Need more TRDID bits."); assert( (icache_words == dcache_words) and "icache_words and dcache_words parameters must be equal"); r_mmu_params = (uint32_log2(m_dtlb_ways) << 29) | (uint32_log2(m_dtlb_sets) << 25) | (uint32_log2(m_dcache_ways) << 22) | (uint32_log2(m_dcache_sets) << 18) | (uint32_log2(m_itlb_ways) << 15) | (uint32_log2(m_itlb_sets) << 11) | (uint32_log2(m_icache_ways) << 8) | (uint32_log2(m_icache_sets) << 4) | (uint32_log2(m_icache_words<<2)); r_mmu_release = (uint32_t)(1 << 16) | 0x1; r_tgt_buf = new uint32_t[dcache_words]; r_tgt_be = new vci_be_t[dcache_words]; r_dcache_in_itlb = new bool[dcache_ways*dcache_sets]; r_dcache_in_dtlb = new bool[dcache_ways*dcache_sets]; SC_METHOD(transition); dont_initialize(); sensitive << p_clk.pos(); SC_METHOD(genMoore); dont_initialize(); sensitive << p_clk.neg(); typename iss_t::CacheInfo cache_info; cache_info.has_mmu = true; cache_info.icache_line_size = icache_words*sizeof(uint32_t); cache_info.icache_assoc = icache_ways; cache_info.icache_n_lines = icache_sets; cache_info.dcache_line_size = dcache_words*sizeof(uint32_t); cache_info.dcache_assoc = dcache_ways; cache_info.dcache_n_lines = dcache_sets; r_iss.setCacheInfo(cache_info); } ///////////////////////////////////// tmpl(/**/)::~VciCcVCacheWrapperV4() ///////////////////////////////////// { delete [] r_tgt_be; delete [] r_tgt_buf; delete [] r_dcache_in_itlb; delete [] r_dcache_in_dtlb; } //////////////////////// tmpl(void)::print_cpi() //////////////////////// { std::cout << name() << " CPI = " << (float)m_cpt_total_cycles/(m_cpt_total_cycles - m_cpt_frz_cycles) << std::endl ; } //////////////////////////////////// tmpl(void)::print_trace(size_t mode) //////////////////////////////////// { // b0 : write buffer print trace // b1 : write buffer verbose // b2 : dcache print trace // b3 : icache print trace typename iss_t::InstructionRequest ireq; typename iss_t::InstructionResponse irsp; typename iss_t::DataRequest dreq; typename iss_t::DataResponse drsp; ireq.valid = m_ireq_valid; ireq.addr = m_ireq_addr; ireq.mode = m_ireq_mode; irsp.valid = m_irsp_valid; irsp.instruction = m_irsp_instruction; irsp.error = m_irsp_error; dreq.valid = m_dreq_valid; dreq.addr = m_dreq_addr; dreq.mode = m_dreq_mode; dreq.type = m_dreq_type; dreq.wdata = m_dreq_wdata; dreq.be = m_dreq_be; drsp.valid = m_drsp_valid; drsp.rdata = m_drsp_rdata; drsp.error = m_drsp_error; std::cout << std::dec << "PROC " << name() << std::endl; std::cout << " " << ireq << std::endl; std::cout << " " << irsp << std::endl; std::cout << " " << dreq << std::endl; std::cout << " " << drsp << std::endl; std::cout << " " << icache_fsm_state_str[r_icache_fsm.read()] << " | " << dcache_fsm_state_str[r_dcache_fsm.read()] << " | " << cmd_fsm_state_str[r_vci_cmd_fsm.read()] << " | " << rsp_fsm_state_str[r_vci_rsp_fsm.read()] << " | " << tgt_fsm_state_str[r_tgt_fsm.read()] << std::endl; std::cout << " " << cleanup_fsm_state_str[r_cleanup_fsm.read()] << " | " << inval_itlb_fsm_state_str[r_inval_itlb_fsm] << " | " << inval_dtlb_fsm_state_str[r_inval_dtlb_fsm]; if (r_dcache_p0_valid.read() ) std::cout << " | P1_WRITE"; if (r_dcache_p1_valid.read() ) std::cout << " | P2_WRITE"; std::cout << std::endl; if(mode & 0x1) { r_wbuf.printTrace((mode>>1)&1); } if(mode & 0x4) { std::cout << " Data cache" << std::endl; r_dcache.printTrace(); } if(mode & 0x8) { std::cout << " Instruction cache" << std::endl; r_icache.printTrace(); } } ////////////////////////////////////////// tmpl(void)::cache_monitor( paddr_t addr ) ////////////////////////////////////////// { size_t cache_way; size_t cache_set; size_t cache_word; uint32_t cache_rdata; bool cache_hit = r_dcache.read( addr, &cache_rdata, &cache_way, &cache_set, &cache_word ); if ( cache_hit != m_debug_previous_hit ) { std::cout << "PROC " << name() << " cache change at cycle " << std::dec << m_cpt_total_cycles << " for adresse " << std::hex << addr << " / HIT = " << cache_hit << std::endl; } m_debug_previous_hit = cache_hit; } /* //////////////////////// tmpl(void)::print_stats() //////////////////////// { float run_cycles = (float)(m_cpt_total_cycles - m_cpt_frz_cycles); std::cout << name() << std::endl << "- CPI = " << (float)m_cpt_total_cycles/run_cycles << std::endl << "- READ RATE = " << (float)m_cpt_read/run_cycles << std::endl << "- WRITE RATE = " << (float)m_cpt_write/run_cycles << std::endl << "- IMISS_RATE = " << (float)m_cpt_ins_miss/m_cpt_ins_read << std::endl << "- DMISS RATE = " << (float)m_cpt_data_miss/(m_cpt_read-m_cpt_unc_read) << std::endl << "- INS MISS COST = " << (float)m_cost_ins_miss_frz/m_cpt_ins_miss << std::endl << "- DATA MISS COST = " << (float)m_cost_data_miss_frz/m_cpt_data_miss << std::endl << "- WRITE COST = " << (float)m_cost_write_frz/m_cpt_write << std::endl << "- UNC COST = " << (float)m_cost_unc_read_frz/m_cpt_unc_read << std::endl << "- UNCACHED READ RATE = " << (float)m_cpt_unc_read/m_cpt_read << std::endl << "- CACHED WRITE RATE = " << (float)m_cpt_write_cached/m_cpt_write << std::endl << "- INS TLB MISS RATE = " << (float)m_cpt_ins_tlb_miss/m_cpt_ins_tlb_read << std::endl << "- DATA TLB MISS RATE = " << (float)m_cpt_data_tlb_miss/m_cpt_data_tlb_read << std::endl << "- ITLB MISS COST = " << (float)m_cost_ins_tlb_miss_frz/m_cpt_ins_tlb_miss << std::endl << "- DTLB MISS COST = " << (float)m_cost_data_tlb_miss_frz/m_cpt_data_tlb_miss << std::endl << "- ITLB UPDATE ACC COST = " << (float)m_cost_ins_tlb_update_acc_frz/m_cpt_ins_tlb_update_acc << std::endl << "- DTLB UPDATE ACC COST = " << (float)m_cost_data_tlb_update_acc_frz/m_cpt_data_tlb_update_acc << std::endl << "- DTLB UPDATE DIRTY COST = " << (float)m_cost_data_tlb_update_dirty_frz/m_cpt_data_tlb_update_dirty << std::endl << "- ITLB HIT IN DCACHE RATE= " << (float)m_cpt_ins_tlb_hit_dcache/m_cpt_ins_tlb_miss << std::endl << "- DTLB HIT IN DCACHE RATE= " << (float)m_cpt_data_tlb_hit_dcache/m_cpt_data_tlb_miss << std::endl << "- DCACHE FROZEN BY ITLB = " << (float)m_cost_ins_tlb_occup_cache_frz/m_cpt_dcache_frz_cycles << std::endl << "- DCACHE FOR TLB % = " << (float)m_cpt_tlb_occup_dcache/(m_dcache_ways*m_dcache_sets) << std::endl << "- NB CC BROADCAST = " << m_cpt_cc_broadcast << std::endl << "- NB CC UPDATE DATA = " << m_cpt_cc_update_data << std::endl << "- NB CC INVAL DATA = " << m_cpt_cc_inval_data << std::endl << "- NB CC INVAL INS = " << m_cpt_cc_inval_ins << std::endl << "- CC BROADCAST COST = " << (float)m_cost_broadcast_frz/m_cpt_cc_broadcast << std::endl << "- CC UPDATE DATA COST = " << (float)m_cost_updt_data_frz/m_cpt_cc_update_data << std::endl << "- CC INVAL DATA COST = " << (float)m_cost_inval_data_frz/m_cpt_cc_inval_data << std::endl << "- CC INVAL INS COST = " << (float)m_cost_inval_ins_frz/m_cpt_cc_inval_ins << std::endl << "- NB CC CLEANUP DATA = " << m_cpt_cc_cleanup_data << std::endl << "- NB CC CLEANUP INS = " << m_cpt_cc_cleanup_ins << std::endl << "- IMISS TRANSACTION = " << (float)m_cost_imiss_transaction/m_cpt_imiss_transaction << std::endl << "- DMISS TRANSACTION = " << (float)m_cost_dmiss_transaction/m_cpt_dmiss_transaction << std::endl << "- UNC TRANSACTION = " << (float)m_cost_unc_transaction/m_cpt_unc_transaction << std::endl << "- WRITE TRANSACTION = " << (float)m_cost_write_transaction/m_cpt_write_transaction << std::endl << "- WRITE LENGTH = " << (float)m_length_write_transaction/m_cpt_write_transaction << std::endl << "- ITLB MISS TRANSACTION = " << (float)m_cost_itlbmiss_transaction/m_cpt_itlbmiss_transaction << std::endl << "- DTLB MISS TRANSACTION = " << (float)m_cost_dtlbmiss_transaction/m_cpt_dtlbmiss_transaction << std::endl; } //////////////////////// tmpl(void)::clear_stats() //////////////////////// { m_cpt_dcache_data_read = 0; m_cpt_dcache_data_write = 0; m_cpt_dcache_dir_read = 0; m_cpt_dcache_dir_write = 0; m_cpt_icache_data_read = 0; m_cpt_icache_data_write = 0; m_cpt_icache_dir_read = 0; m_cpt_icache_dir_write = 0; m_cpt_frz_cycles = 0; m_cpt_dcache_frz_cycles = 0; m_cpt_total_cycles = 0; m_cpt_read = 0; m_cpt_write = 0; m_cpt_data_miss = 0; m_cpt_ins_miss = 0; m_cpt_unc_read = 0; m_cpt_write_cached = 0; m_cpt_ins_read = 0; m_cost_write_frz = 0; m_cost_data_miss_frz = 0; m_cost_unc_read_frz = 0; m_cost_ins_miss_frz = 0; m_cpt_imiss_transaction = 0; m_cpt_dmiss_transaction = 0; m_cpt_unc_transaction = 0; m_cpt_write_transaction = 0; m_cpt_icache_unc_transaction = 0; m_cost_imiss_transaction = 0; m_cost_dmiss_transaction = 0; m_cost_unc_transaction = 0; m_cost_write_transaction = 0; m_cost_icache_unc_transaction = 0; m_length_write_transaction = 0; m_cpt_ins_tlb_read = 0; m_cpt_ins_tlb_miss = 0; m_cpt_ins_tlb_update_acc = 0; m_cpt_data_tlb_read = 0; m_cpt_data_tlb_miss = 0; m_cpt_data_tlb_update_acc = 0; m_cpt_data_tlb_update_dirty = 0; m_cpt_ins_tlb_hit_dcache = 0; m_cpt_data_tlb_hit_dcache = 0; m_cpt_ins_tlb_occup_cache = 0; m_cpt_data_tlb_occup_cache = 0; m_cost_ins_tlb_miss_frz = 0; m_cost_data_tlb_miss_frz = 0; m_cost_ins_tlb_update_acc_frz = 0; m_cost_data_tlb_update_acc_frz = 0; m_cost_data_tlb_update_dirty_frz = 0; m_cost_ins_tlb_occup_cache_frz = 0; m_cost_data_tlb_occup_cache_frz = 0; m_cpt_itlbmiss_transaction = 0; m_cpt_itlb_ll_transaction = 0; m_cpt_itlb_sc_transaction = 0; m_cpt_dtlbmiss_transaction = 0; m_cpt_dtlb_ll_transaction = 0; m_cpt_dtlb_sc_transaction = 0; m_cpt_dtlb_ll_dirty_transaction = 0; m_cpt_dtlb_sc_dirty_transaction = 0; m_cost_itlbmiss_transaction = 0; m_cost_itlb_ll_transaction = 0; m_cost_itlb_sc_transaction = 0; m_cost_dtlbmiss_transaction = 0; m_cost_dtlb_ll_transaction = 0; m_cost_dtlb_sc_transaction = 0; m_cost_dtlb_ll_dirty_transaction = 0; m_cost_dtlb_sc_dirty_transaction = 0; m_cpt_cc_update_data = 0; m_cpt_cc_inval_ins = 0; m_cpt_cc_inval_data = 0; m_cpt_cc_broadcast = 0; m_cost_updt_data_frz = 0; m_cost_inval_ins_frz = 0; m_cost_inval_data_frz = 0; m_cost_broadcast_frz = 0; m_cpt_cc_cleanup_data = 0; m_cpt_cc_cleanup_ins = 0; } */ ///////////////////////// tmpl(void)::transition() ///////////////////////// { if ( not p_resetn.read() ) { r_iss.reset(); r_wbuf.reset(); r_icache.reset(); r_dcache.reset(); r_itlb.reset(); r_dtlb.reset(); r_dcache_fsm = DCACHE_IDLE; r_icache_fsm = ICACHE_IDLE; r_vci_cmd_fsm = CMD_IDLE; r_vci_rsp_fsm = RSP_IDLE; r_tgt_fsm = TGT_IDLE; r_inval_itlb_fsm = INVAL_ITLB_IDLE; r_inval_dtlb_fsm = INVAL_DTLB_IDLE; r_cleanup_fsm = CLEANUP_DATA_IDLE; std::memset(r_dcache_in_itlb, 0, sizeof(*r_dcache_in_itlb)*m_icache_ways*m_icache_sets); std::memset(r_dcache_in_dtlb, 0, sizeof(*r_dcache_in_dtlb)*m_dcache_ways*m_dcache_sets); // Response FIFOs and cleanup buffer r_vci_rsp_fifo_icache.init(); r_vci_rsp_fifo_dcache.init(); r_cleanup_buffer.reset(); // ICACHE & DCACHE activated r_mmu_mode = 0x3; // No request from ICACHE FSM to CMD FSM r_icache_miss_req = false; r_icache_unc_req = false; // No request from ICACHE_FSM to DCACHE FSM r_icache_tlb_miss_req = false; // No request from ICACHE_FSM to CLEANUP FSMs r_icache_cleanup_req = false; // No pending write in pipeline r_dcache_p0_valid = false; r_dcache_p1_valid = false; // No request from DCACHE_FSM to CMD_FSM r_dcache_vci_miss_req = false; r_dcache_vci_unc_req = false; r_dcache_vci_sc_req = false; // No uncacheable write pending r_dcache_pending_unc_write = false; // No LL reservation r_dcache_ll_valid = false; // No request from DCACHE FSM to INVAL TLB FSMs r_dcache_itlb_inval_req = false; r_dcache_dtlb_inval_req = false; // No processor XTN request pending r_dcache_xtn_req = false; // No request from DCACHE FSM to CLEANUP FSMs r_dcache_cleanup_req = false; // No request from TGT FSM to ICACHE/DCACHE FSMs r_tgt_icache_req = false; r_tgt_dcache_req = false; // No signalisation of a coherence request matching a pending miss r_icache_miss_inval = false; r_dcache_miss_inval = false; // No signalisation of errors r_vci_rsp_ins_error = false; r_vci_rsp_data_error = false; // Debug variables m_debug_previous_hit = false; m_debug_dcache_fsm = false; m_debug_icache_fsm = false; m_debug_cleanup_fsm = false; m_debug_inval_itlb_fsm = false; m_debug_inval_dtlb_fsm = false; /* // activity counters m_cpt_dcache_data_read = 0; m_cpt_dcache_data_write = 0; m_cpt_dcache_dir_read = 0; m_cpt_dcache_dir_write = 0; m_cpt_icache_data_read = 0; m_cpt_icache_data_write = 0; m_cpt_icache_dir_read = 0; m_cpt_icache_dir_write = 0; m_cpt_frz_cycles = 0; m_cpt_dcache_frz_cycles = 0; m_cpt_total_cycles = 0; m_cpt_read = 0; m_cpt_write = 0; m_cpt_data_miss = 0; m_cpt_ins_miss = 0; m_cpt_unc_read = 0; m_cpt_write_cached = 0; m_cpt_ins_read = 0; m_cost_write_frz = 0; m_cost_data_miss_frz = 0; m_cost_unc_read_frz = 0; m_cost_ins_miss_frz = 0; m_cpt_imiss_transaction = 0; m_cpt_dmiss_transaction = 0; m_cpt_unc_transaction = 0; m_cpt_write_transaction = 0; m_cpt_icache_unc_transaction = 0; m_cost_imiss_transaction = 0; m_cost_dmiss_transaction = 0; m_cost_unc_transaction = 0; m_cost_write_transaction = 0; m_cost_icache_unc_transaction = 0; m_length_write_transaction = 0; m_cpt_ins_tlb_read = 0; m_cpt_ins_tlb_miss = 0; m_cpt_ins_tlb_update_acc = 0; m_cpt_data_tlb_read = 0; m_cpt_data_tlb_miss = 0; m_cpt_data_tlb_update_acc = 0; m_cpt_data_tlb_update_dirty = 0; m_cpt_ins_tlb_hit_dcache = 0; m_cpt_data_tlb_hit_dcache = 0; m_cpt_ins_tlb_occup_cache = 0; m_cpt_data_tlb_occup_cache = 0; m_cost_ins_tlb_miss_frz = 0; m_cost_data_tlb_miss_frz = 0; m_cost_ins_tlb_update_acc_frz = 0; m_cost_data_tlb_update_acc_frz = 0; m_cost_data_tlb_update_dirty_frz = 0; m_cost_ins_tlb_occup_cache_frz = 0; m_cost_data_tlb_occup_cache_frz = 0; m_cpt_ins_tlb_inval = 0; m_cpt_data_tlb_inval = 0; m_cost_ins_tlb_inval_frz = 0; m_cost_data_tlb_inval_frz = 0; m_cpt_cc_update_data = 0; m_cpt_cc_inval_ins = 0; m_cpt_cc_inval_data = 0; m_cpt_cc_broadcast = 0; m_cost_updt_data_frz = 0; m_cost_inval_ins_frz = 0; m_cost_inval_data_frz = 0; m_cost_broadcast_frz = 0; m_cpt_cc_cleanup_data = 0; m_cpt_cc_cleanup_ins = 0; m_cpt_itlbmiss_transaction = 0; m_cpt_itlb_ll_transaction = 0; m_cpt_itlb_sc_transaction = 0; m_cpt_dtlbmiss_transaction = 0; m_cpt_dtlb_ll_transaction = 0; m_cpt_dtlb_sc_transaction = 0; m_cpt_dtlb_ll_dirty_transaction = 0; m_cpt_dtlb_sc_dirty_transaction = 0; m_cost_itlbmiss_transaction = 0; m_cost_itlb_ll_transaction = 0; m_cost_itlb_sc_transaction = 0; m_cost_dtlbmiss_transaction = 0; m_cost_dtlb_ll_transaction = 0; m_cost_dtlb_sc_transaction = 0; m_cost_dtlb_ll_dirty_transaction = 0; m_cost_dtlb_sc_dirty_transaction = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_icache [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_dcache [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_cmd [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_rsp [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_tgt [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_cmd_cleanup [i] = 0; for (uint32_t i=0; i<32 ; ++i) m_cpt_fsm_rsp_cleanup [i] = 0; */ return; } // Response FIFOs default values bool vci_rsp_fifo_icache_get = false; bool vci_rsp_fifo_icache_put = false; uint32_t vci_rsp_fifo_icache_data = 0; bool vci_rsp_fifo_dcache_get = false; bool vci_rsp_fifo_dcache_put = false; uint32_t vci_rsp_fifo_dcache_data = 0; #ifdef INSTRUMENTATION m_cpt_fsm_dcache [r_dcache_fsm.read() ] ++; m_cpt_fsm_icache [r_icache_fsm.read() ] ++; m_cpt_fsm_cmd [r_vci_cmd_fsm.read()] ++; m_cpt_fsm_rsp [r_vci_rsp_fsm.read()] ++; m_cpt_fsm_tgt [r_tgt_fsm.read() ] ++; m_cpt_fsm_cleanup [r_cleanup_fsm.read()] ++; #endif m_cpt_total_cycles++; m_debug_cleanup_fsm = (m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok; m_debug_icache_fsm = (m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok; m_debug_dcache_fsm = (m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok; m_debug_inval_itlb_fsm = (m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok; m_debug_inval_dtlb_fsm = (m_cpt_total_cycles > m_debug_start_cycle) and m_debug_ok; ///////////////////////////////////////////////////////////////////// // The TGT_FSM controls the following ressources: // - r_tgt_fsm // - r_tgt_buf[nwords] // - r_tgt_be[nwords] // - r_tgt_update // - r_tgt_word_min // - r_tgt_word_max // - r_tgt_word_count // - r_tgt_paddr // - r_tgt_srcid // - r_tgt_trdid // - r_tgt_pktid // - r_tgt_icache_req (set) // - r_tgt_dcache_req (set) // // All VCI commands must be CMD_WRITE. // - If the 2 LSB bits of the VCI address are 11, it is a broadcast request. // It is a multicast request otherwise. // - For multicast requests, the ADDRESS[2] bit distinguishes DATA/INS // (0 for data / 1 for instruction), and the ADDRESS[3] bit distinguishes // INVAL/UPDATE (0 for invalidate / 1 for UPDATE). // // For all types of coherence request, the line index (i.e. the Z & Y fields) // is coded on 34 bits, and is contained in the WDATA and BE fields // of the first VCI flit. // - for a multicast invalidate or for a broadcast invalidate request // the VCI packet length is 1 word. // - for an update request the VCI packet length is (n+2) words. // The WDATA field of the second VCI word contains the word index. // The WDATA field of the n following words contains the values. // - for all transaction types, the VCI response is one single word. // In case of errors in the VCI command packet, the simulation // is stopped with an error message. // // This FSM is NOT pipelined : It consumes a new coherence request // on the VCI port only when the previous request is completed. // // The VCI_TGT FSM stores the external request arguments in the // IDLE, UPDT_WORD & UPDT_DATA states. It sets the r_tgt_icache_req // and/or the r_tgt_dcache_req flip-flops to signal the coherence request // to the ICACHE & DCACHE FSMs in the REQ_ICACHE, REQ_DCACHE & REQ_BROADCAST // states. It waits the completion of the coherence request by polling the // r_tgt_*cache_req flip-flops in the RSP_ICACHE, RSP_DCACHE & RSP_BROADCAST // states. These flip-flops are reset by the ICACHE and DCACHE FSMs. // These two FSMs signal if a VCI answer must be send by setting // the r_tgt_icache_rsp and/or the r_tgt_dcache_rsp flip_flops. /////////////////////////////////////////////////////////////////////////////// switch( r_tgt_fsm.read() ) { ////////////// case TGT_IDLE: { if ( p_vci_tgt_c.cmdval.read() ) { paddr_t address = p_vci_tgt_c.address.read(); // command checking if ( p_vci_tgt_c.cmd.read() != vci_param::CMD_WRITE) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the received VCI coherence command is not a write" << std::endl; exit(0); } // address checking if ( ( (address & 0x3) != 0x3 ) && ( not m_segment.contains(address)) ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "out of segment VCI coherence command received" << std::endl; exit(0); } r_tgt_srcid = p_vci_tgt_c.srcid.read(); r_tgt_trdid = p_vci_tgt_c.trdid.read(); r_tgt_pktid = p_vci_tgt_c.pktid.read(); r_tgt_paddr = (paddr_t)(p_vci_tgt_c.be.read() & 0x3) << 32 | (paddr_t)p_vci_tgt_c.wdata.read() * m_dcache_words * 4; if ( (address&0x3) == 0x3 ) // broadcast invalidate for data or instruction type { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the BROADCAST INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_BROADCAST; #ifdef INSTRUMENTATION m_cpt_cc_broadcast++; #endif } else // multi-update or multi-invalidate for data type { paddr_t cell = address - m_segment.baseAddress(); if (cell == 0) // invalidate data { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_DCACHE; #ifdef INSTRUMENTATION m_cpt_cc_inval_dcache++; #endif } else if (cell == 4) // invalidate instruction { if ( not p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-INVALIDATE command must be one flit" << std::endl; exit(0); } r_tgt_update = false; r_tgt_fsm = TGT_REQ_ICACHE; #ifdef INSTRUMENTATION m_cpt_cc_inval_icache++; #endif } else if (cell == 8) // update data { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } r_tgt_update = true; r_tgt_update_data = true; r_tgt_fsm = TGT_UPDT_WORD; #ifdef INSTRUMENTATION m_cpt_cc_update_dcache++; #endif } else // update instruction { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } r_tgt_update = true; r_tgt_update_data = false; r_tgt_fsm = TGT_UPDT_WORD; #ifdef INSTRUMENTATION m_cpt_cc_update_icache++; #endif } } // end if multi } // end if cmdval break; } /////////////////// case TGT_UPDT_WORD: // first word index acquisition { if (p_vci_tgt_c.cmdval.read()) { if ( p_vci_tgt_c.eop.read() ) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the MULTI-UPDATE command must be N+2 flits" << std::endl; exit(0); } for ( size_t i=0 ; i= m_dcache_words) { std::cout << "error in component VCI_CC_VCACHE_WRAPPER " << name() << std::endl; std::cout << "the reveived MULTI-UPDATE command is wrong" << std::endl; exit(0); } r_tgt_buf[word] = p_vci_tgt_c.wdata.read(); r_tgt_be[word] = p_vci_tgt_c.be.read(); r_tgt_word_count = word + 1; if (p_vci_tgt_c.eop.read()) // last word { r_tgt_word_max = word; if ( r_tgt_update_data.read() ) r_tgt_fsm = TGT_REQ_DCACHE; else r_tgt_fsm = TGT_REQ_ICACHE; } } break; } /////////////////////// case TGT_REQ_BROADCAST: // set requests to DCACHE & ICACHE FSMs { if ( not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() ) { r_tgt_fsm = TGT_RSP_BROADCAST; r_tgt_icache_req = true; r_tgt_dcache_req = true; } break; } ///////////////////// case TGT_REQ_ICACHE: // set request to ICACHE FSM (if no previous request pending) { if ( not r_tgt_icache_req.read() ) { r_tgt_fsm = TGT_RSP_ICACHE; r_tgt_icache_req = true; } break; } //////////////////// case TGT_REQ_DCACHE: // set request to DCACHE FSM (if no previous request pending) { if ( not r_tgt_dcache_req.read() ) { r_tgt_fsm = TGT_RSP_DCACHE; r_tgt_dcache_req = true; } break; } /////////////////////// case TGT_RSP_BROADCAST: // waiting acknowledge from both DCACHE & ICACHE FSMs // no response when r_tgt_*cache_rsp is false { if ( not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() ) // both completed { if ( r_tgt_icache_rsp.read() or r_tgt_dcache_rsp.read() ) // at least one response { if ( p_vci_tgt_c.rspack.read() ) { // reset dcache first if activated if (r_tgt_dcache_rsp) r_tgt_dcache_rsp = false; else r_tgt_icache_rsp = false; } } else { r_tgt_fsm = TGT_IDLE; } } break; } //////////////////// case TGT_RSP_ICACHE: // waiting acknowledge from ICACHE FSM { // no response when r_tgt_icache_rsp is false if ( not r_tgt_icache_req.read() and p_vci_tgt_c.rspack.read() ) { r_tgt_fsm = TGT_IDLE; r_tgt_icache_rsp = false; } break; } //////////////////// case TGT_RSP_DCACHE: { // no response when r_tgt_dcache_rsp is false if ( not r_tgt_dcache_req.read() and p_vci_tgt_c.rspack.read() ) { r_tgt_fsm = TGT_IDLE; r_tgt_dcache_rsp = false; } break; } } // end switch TGT_FSM ///////////////////////////////////////////////////////////////////// // Get data and instruction requests from processor /////////////////////////////////////////////////////////////////////// typename iss_t::InstructionRequest ireq = ISS_IREQ_INITIALIZER; typename iss_t::DataRequest dreq = ISS_DREQ_INITIALIZER; r_iss.getRequests(ireq, dreq); //////////////////////////////////////////////////////////////////////////////////// // ICACHE_FSM // // There is 9 conditions to exit the IDLE state: // One condition is a coherence request from TGT FSM : // - Coherence operation => ICACHE_CC_CHEK // Five configurations corresponding to XTN processor requests sent by DCACHE FSM : // - Flush TLB => ICACHE_XTN_TLB_FLUSH // - Flush cache => ICACHE_XTN_CACHE_FLUSH // - Invalidate a TLB entry => ICACHE_XTN_TLB_INVAL // - Invalidate a cache line => ICACHE_XTN_CACHE_INVAL_VA@ // - Invalidate a cache line using physical address => ICACHE_XTN_CACHE_INVAL_PA // three configurations corresponding to instruction processor requests : // - tlb miss => ICACHE_TLB_WAIT // - cacheable read miss => ICACHE_MISS_VICTIM // - uncacheable read miss => ICACHE_UNC_REQ // // In case of cache miss, the ICACHE FSM request a VCI transaction to CMD FSM // using the r_icache_tlb_miss_req flip-flop, that reset this flip-flop when the // transaction starts. Then the ICACHE FSM goes to the ICACHE_MISS VICTIM // state to select a slot and request a VCI transaction to the CLEANUP FSM. // It goes next to the ICACHE_MISS_WAIT state waiting a response from RSP FSM. // The availability of the missing cache line is signaled by the response fifo, // and the cache update is done (one word per cycle) in the ICACHE_MISS_UPDT state. // // In case of uncacheable address, the ICACHE FSM request an uncached VCI transaction // to CMD FSM using the r_icache_unc_req flip-flop, that reset this flip-flop // when the transaction starts. The ICACHE FSM goes to ICACHE_UNC_WAIT to wait // the response from the RSP FSM, through the response fifo. The missing instruction // is directly returned to processor in this state. // // In case of tlb miss, the ICACHE FSM request to the DCACHE FSM to update the tlb // using the r_icache_tlb_miss_req flip-flop and the r_icache_tlb_miss_vaddr register, // and goes to the ICACHE_TLB_WAIT state. // The tlb update is entirely done by the DCACHE FSM (who becomes the owner of dtlb until // the update is completed, and reset r_icache_tlb_miss_req to signal the completion. // // The DCACHE FSM signals XTN processor requests to ICACHE_FSM // using the r_dcache_xtn_req flip-flop. // The request opcode and the address to be invalidated are transmitted // in the r_dcache_xtn_opcode and r_dcache_p0_wdata registers respectively. // The r_dcache_xtn_req flip-flop is reset by the ICACHE_FSM when the operation // is completed. // // The r_vci_rsp_ins_error flip-flop is set by the RSP FSM in case of bus error // in a cache miss or uncacheable read VCI transaction. Nothing is written // in the response fifo. This flip-flop is reset by the ICACHE-FSM. //////////////////////////////////////////////////////////////////////////////////////// // The default value for irsp.valid is false typename iss_t::InstructionResponse irsp = ISS_IRSP_INITIALIZER; switch( r_icache_fsm.read() ) { ///////////////// case ICACHE_IDLE: // In this state, we handle processor requests, XTN requests sent // by DCACHE FSM, and coherence requests with a fixed priority: // coherence > XTN > instruction // We access the itlb and dcache in parallel with the virtual address // for itlb, and with a speculative physical address for icache, // computed during the previous cycle. { // coherence request from the target FSM if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } // Decoding processor XTN requests sent by DCACHE FSM // These request are not executed in this IDLE state, because // they require access to icache or itlb, that are already accessed if ( r_dcache_xtn_req.read() ) { if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_PTPR ) { r_icache_fsm = ICACHE_XTN_TLB_FLUSH; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ICACHE_FLUSH) { r_icache_flush_count = 0; r_icache_fsm = ICACHE_XTN_CACHE_FLUSH; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ITLB_INVAL) { r_icache_fsm = ICACHE_XTN_TLB_INVAL; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_ICACHE_INVAL) { r_icache_fsm = ICACHE_XTN_CACHE_INVAL_VA; break; } if ( (int)r_dcache_xtn_opcode.read() == (int)iss_t::XTN_MMU_ICACHE_PA_INV) { r_icache_vci_paddr = (paddr_t)r_mmu_word_hi.read() << 32 | (paddr_t)r_mmu_word_lo.read(); r_icache_fsm = ICACHE_XTN_CACHE_INVAL_PA; break; } } // end if xtn_req // processor request if ( ireq.valid ) { bool cacheable; paddr_t paddr; // We register processor request r_icache_vaddr_save = ireq.addr; // speculative icache access (if cache activated) // we use the speculative PPN computed during the previous cycle uint32_t cache_inst = 0; bool cache_hit = false; if ( r_mmu_mode.read() & INS_CACHE_MASK ) { paddr_t spc_paddr = (r_icache_vci_paddr.read() & ~PAGE_K_MASK) | ((paddr_t)ireq.addr & PAGE_K_MASK); #ifdef INSTRUMENTATION m_cpt_icache_data_read++; m_cpt_icache_dir_read++; #endif cache_hit = r_icache.read( spc_paddr, &cache_inst ); } // systematic itlb access (if tlb activated) // we use the virtual address paddr_t tlb_paddr; pte_info_t tlb_flags; size_t tlb_way; size_t tlb_set; paddr_t tlb_nline; bool tlb_hit = false;; if ( r_mmu_mode.read() & INS_TLB_MASK ) { #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif tlb_hit = r_itlb.translate( ireq.addr, &tlb_paddr, &tlb_flags, &tlb_nline, // unused &tlb_way, // unused &tlb_set ); // unused } // We compute cacheability, physical address and check access rights: // - If MMU activated : cacheability is defined by the C bit in the PTE, // the physical address is obtained from the TLB, and the access rights are // defined by the U and X bits in the PTE. // - If MMU not activated : cacheability is defined by the segment table, // the physical address is equal to the virtual address (identity mapping) // and there is no access rights checking if ( not (r_mmu_mode.read() & INS_TLB_MASK) ) // tlb not activated: { // cacheability if ( not (r_mmu_mode.read() & INS_CACHE_MASK) ) cacheable = false; else cacheable = m_cacheability_table[ireq.addr]; // physical address paddr = (paddr_t)ireq.addr; } else // itlb activated { if ( tlb_hit ) // tlb hit { // cacheability if ( not (r_mmu_mode.read() & INS_CACHE_MASK) ) cacheable = false; else cacheable = tlb_flags.c; // physical address paddr = tlb_paddr; // access rights checking if ( not tlb_flags.u && (ireq.mode == iss_t::MODE_USER) ) { r_mmu_ietr = MMU_READ_PRIVILEGE_VIOLATION; r_mmu_ibvar = ireq.addr; irsp.valid = true; irsp.error = true; irsp.instruction = 0; break; } else if ( not tlb_flags.x ) { r_mmu_ietr = MMU_READ_EXEC_VIOLATION; r_mmu_ibvar = ireq.addr; irsp.valid = true; irsp.error = true; irsp.instruction = 0; break; } } // in case of TLB miss we send an itlb miss request to DCACHE FSM and break else { #ifdef INSTRUMENTATION m_cpt_itlb_miss++; #endif r_icache_fsm = ICACHE_TLB_WAIT; r_icache_tlb_miss_req = true; break; } } // end if itlb activated // physical address registration (for next cycle) r_icache_vci_paddr = paddr; // Finally, we get the instruction depending on cacheability if ( cacheable ) // cacheable read { if ( not cache_hit ) // cache miss { // in case of icache miss we send a request to CMD FSM, but we are // blocked in IDLE state if the previous cleanup is not completed if ( not r_icache_cleanup_req.read() ) { #ifdef INSTRUMENTATION m_cpt_icache_miss++; #endif r_icache_fsm = ICACHE_MISS_VICTIM; r_icache_miss_req = true; } } else // cache hit { if ( (r_icache_vci_paddr.read() & ~PAGE_K_MASK) != (paddr & ~PAGE_K_MASK) ) // speculative access KO { #ifdef INSTRUMENTATION m_cpt_icache_spc_miss++; #endif // we return an invalid response // and the cache access will cost one extra cycle. break; } else // speculative access OK { #ifdef INSTRUMENTATION m_cpt_ins_read++; #endif irsp.valid = true; irsp.instruction = cache_inst; } } } else // non cacheable read { r_icache_unc_req = true; r_icache_fsm = ICACHE_UNC_WAIT; } } // end if ireq.valid break; } ///////////////////// case ICACHE_TLB_WAIT: // Waiting the itlb update by the DCACHE FSM after a tlb miss // the itlb is udated by the DCACHE FSM, as well as the // r_mmu_ietr and r_mmu_ibvar registers in case of error. // the itlb is not accessed by ICACHE FSM until DCACHE FSM // reset the r_icache_tlb_miss_req flip-flop // external coherence request are accepted in this state. { // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( ireq.valid ) m_cost_ins_tlb_miss_frz++; // DCACHE FSM signals response by reseting the request flip-flop if ( not r_icache_tlb_miss_req.read() ) { if ( r_icache_tlb_rsp_error.read() ) // error reported : tlb not updated { r_icache_tlb_rsp_error = false; irsp.error = true; irsp.valid = true; r_icache_fsm = ICACHE_IDLE; } else // tlb updated : return to IDLE state { r_icache_fsm = ICACHE_IDLE; } } break; } ////////////////////////// case ICACHE_XTN_TLB_FLUSH: // invalidate in one cycle all non global TLB entries { r_itlb.flush(); r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; break; } //////////////////////////// case ICACHE_XTN_CACHE_FLUSH: // Invalidate sequencially all cache lines using // the r_icache_flush_count register as a slot counter. // We loop in this state until all slots have been visited. // A cleanup request is generated for each valid line // and we are blocked until the previous cleanup is completed { if ( not r_icache_cleanup_req.read() ) { size_t way = r_icache_flush_count.read()/m_icache_sets; size_t set = r_icache_flush_count.read()%m_icache_sets; paddr_t nline; bool cleanup_req = r_icache.inval( way, set, &nline ); if ( cleanup_req ) { r_icache_cleanup_req = true; r_icache_cleanup_line = nline; } r_icache_flush_count = r_icache_flush_count.read() + 1; } if ( r_icache_flush_count.read() == (m_icache_sets*m_icache_ways - 1) ) { r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } break; } ////////////////////////// case ICACHE_XTN_TLB_INVAL: // invalidate one TLB entry selected by the virtual address // stored in the r_dcache_p0_wdata register { r_itlb.inval(r_dcache_p0_wdata.read()); r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_VA: // Selective cache line invalidate with virtual address // requires 3 cycles (in case of hit on itlb and icache). // In this state, we access TLB to translate virtual address // stored in the r_dcache_p0_wdata register. { paddr_t paddr; bool hit; // read physical address in TLB when MMU activated if ( r_mmu_mode.read() & INS_TLB_MASK ) // itlb activated { #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif hit = r_itlb.translate(r_dcache_p0_wdata.read(), &paddr); } else // itlb not activated { paddr = (paddr_t)r_dcache_p0_wdata.read(); hit = true; } if ( hit ) // continue the selective inval process { r_icache_vci_paddr = paddr; r_icache_fsm = ICACHE_XTN_CACHE_INVAL_PA; } else // miss : send a request to DCACHE FSM { #ifdef INSTRUMENTATION m_cpt_itlb_miss++; #endif r_icache_tlb_miss_req = true; r_icache_fsm = ICACHE_TLB_WAIT; } break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_PA: // selective invalidate cache line with physical address // require 2 cycles. In this state, we read dcache, // with address stored in r_icache_vci_paddr register. { uint32_t data; size_t way; size_t set; size_t word; bool hit = r_icache.read(r_icache_vci_paddr.read(), &data, &way, &set, &word); if ( hit ) // inval to be done { r_icache_miss_way = way; r_icache_miss_set = set; r_icache_fsm = ICACHE_XTN_CACHE_INVAL_GO; } else // miss : acknowlege the XTN request and return { r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } break; } /////////////////////////////// case ICACHE_XTN_CACHE_INVAL_GO: // In this state, we invalidate the cache line & cleanup. // We are blocked if the previous cleanup is not completed { paddr_t nline; if ( not r_icache_cleanup_req.read() ) { r_icache.inval( r_icache_miss_way.read(), r_icache_miss_set.read(), &nline ); // request cleanup r_icache_cleanup_req = true; r_icache_cleanup_line = nline; // acknowledge the XTN request and return r_dcache_xtn_req = false; r_icache_fsm = ICACHE_IDLE; } break; } //////////////////////// case ICACHE_MISS_VICTIM: // Selects a victim line // Set the r_icache_cleanup_req flip-flop // when the selected slot is not empty { if ( ireq.valid ) m_cost_ins_miss_frz++; bool valid; size_t way; size_t set; paddr_t victim; valid = r_icache.victim_select(r_icache_vci_paddr.read(), &victim, &way, &set); r_icache_miss_way = way; r_icache_miss_set = set; if ( valid ) { r_icache_cleanup_req = true; r_icache_cleanup_line = victim; r_icache_fsm = ICACHE_MISS_INVAL; } else { r_icache_fsm = ICACHE_MISS_WAIT; } break; } /////////////////////// case ICACHE_MISS_INVAL: // invalidate the victim line { paddr_t nline; r_icache.inval( r_icache_miss_way.read(), r_icache_miss_set.read(), &nline ); // unused r_icache_fsm = ICACHE_MISS_WAIT; break; } ////////////////////// case ICACHE_MISS_WAIT: // waiting a response to a miss request from VCI_RSP FSM { if ( ireq.valid ) m_cost_ins_miss_frz++; // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( r_vci_rsp_ins_error.read() ) // bus error { r_mmu_ietr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_ibvar = r_icache_vaddr_save.read(); irsp.valid = true; irsp.error = true; r_vci_rsp_ins_error = false; r_icache_fsm = ICACHE_IDLE; } else if ( r_vci_rsp_fifo_icache.rok() ) // response available { r_icache_miss_word = 0; r_icache_fsm = ICACHE_MISS_UPDT; } break; } ////////////////////// case ICACHE_MISS_UPDT: // update the cache (one word per cycle) { if ( ireq.valid ) m_cost_ins_miss_frz++; if ( r_vci_rsp_fifo_icache.rok() ) // response available { if ( r_icache_miss_inval ) // Matching coherence request // We pop the response FIFO, without updating the cache // We send a cleanup for the missing line at the last word // Blocked if the previous cleanup is not completed { if ( r_icache_miss_word.read() < m_icache_words-1 ) // not the last word { vci_rsp_fifo_icache_get = true; r_icache_miss_word = r_icache_miss_word.read() + 1; } else // last word { if ( not r_icache_cleanup_req.read() ) // no pending cleanup { vci_rsp_fifo_icache_get = true; r_icache_cleanup_req = true; r_icache_cleanup_line = r_icache_vci_paddr.read() >> (uint32_log2(m_icache_words<<2)); r_icache_miss_inval = false; r_icache_fsm = ICACHE_IDLE; } } } else // No matching coherence request // We pop the FIFO and update the cache // We update the directory at the last word { #ifdef INSTRUMENTATION m_cpt_icache_data_write++; #endif r_icache.write( r_icache_miss_way.read(), r_icache_miss_set.read(), r_icache_miss_word.read(), r_vci_rsp_fifo_icache.read() ); vci_rsp_fifo_icache_get = true; r_icache_miss_word = r_icache_miss_word.read() + 1; if ( r_icache_miss_word.read() == m_icache_words-1 ) // last word { #ifdef INSTRUMENTATION m_cpt_icache_dir_write++; #endif r_icache.victim_update_tag( r_icache_vci_paddr.read(), r_icache_miss_way.read(), r_icache_miss_set.read() ); r_icache_fsm = ICACHE_IDLE; } } } break; } //////////////////// case ICACHE_UNC_WAIT: // waiting a response to an uncacheable read from VCI_RSP FSM // { // external coherence request if ( r_tgt_icache_req.read() ) { r_icache_fsm = ICACHE_CC_CHECK; r_icache_fsm_save = r_icache_fsm.read(); break; } if ( r_vci_rsp_ins_error.read() ) // bus error { r_mmu_ietr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_ibvar = ireq.addr; r_vci_rsp_ins_error = false; irsp.valid = true; irsp.error = true; r_icache_fsm = ICACHE_IDLE; } else if (r_vci_rsp_fifo_icache.rok() ) // instruction available { vci_rsp_fifo_icache_get = true; r_icache_fsm = ICACHE_IDLE; if ( ireq.valid and (ireq.addr == r_icache_vaddr_save.read()) ) // request not modified { irsp.valid = true; irsp.instruction = r_vci_rsp_fifo_icache.read(); } } break; } ///////////////////// case ICACHE_CC_CHECK: // This state is the entry point of a sub-fsm // handling coherence requests. // the return state is defined in r_icache_fsm_save. { paddr_t paddr = r_tgt_paddr.read(); paddr_t mask = ~((m_icache_words<<2)-1); if( (r_icache_fsm_save.read() == ICACHE_MISS_WAIT) and ((r_icache_vci_paddr.read() & mask) == (paddr & mask))) // matching a pending miss { r_icache_miss_inval = true; // signaling the matching r_tgt_icache_req = false; // coherence request completed r_tgt_icache_rsp = r_tgt_update.read(); // response required if update r_icache_fsm = r_icache_fsm_save.read(); } else // no match { #ifdef INSTRUMENTATION m_cpt_icache_dir_read++; #endif uint32_t inst; size_t way; size_t set; size_t word; bool hit = r_icache.read(paddr, &inst, &way, &set, &word); r_icache_cc_way = way; r_icache_cc_set = set; if ( hit and r_tgt_update.read() ) // hit update { r_icache_fsm = ICACHE_CC_UPDT; r_icache_cc_word = r_tgt_word_min.read(); } else if ( hit and not r_tgt_update.read() ) // hit inval { r_icache_fsm = ICACHE_CC_INVAL; } else // miss can happen { r_tgt_icache_req = false; r_tgt_icache_rsp = r_tgt_update.read(); r_icache_fsm = r_icache_fsm_save.read(); } } break; } ///////////////////// case ICACHE_CC_INVAL: // invalidate a cache line { paddr_t nline; r_icache.inval( r_icache_cc_way.read(), r_icache_cc_set.read(), &nline ); r_tgt_icache_req = false; r_tgt_icache_rsp = true; r_icache_fsm = r_icache_fsm_save.read(); break; } //////////////////// case ICACHE_CC_UPDT: // write one word per cycle (from word_min to word_max) { size_t word = r_icache_cc_word.read(); size_t way = r_icache_cc_way.read(); size_t set = r_icache_cc_set.read(); r_icache.write( way, set, word, r_tgt_buf[word], r_tgt_be[word] ); r_icache_cc_word = word+1; if ( word == r_tgt_word_max.read() ) // last word { r_tgt_icache_req = false; r_tgt_icache_rsp = true; r_icache_fsm = r_icache_fsm_save.read(); } break; } } // end switch r_icache_fsm // save the IREQ and IRSP fields for the print_trace() function m_ireq_valid = ireq.valid; m_ireq_addr = ireq.addr; m_ireq_mode = ireq.mode; m_irsp_valid = irsp.valid; m_irsp_instruction = irsp.instruction; m_irsp_error = irsp.error; //////////////////////////////////////////////////////////////////////////////////// // INVAL ITLB FSM // This FSM works in parallel with the ICACHE FSM. // When the r_dcache_itlb_inval_req flip-flop is activated by the DCACHE FSM // it scans sequencially all entries in the ITLB, and invalidates the // entries matching the evicted line. // It signals the completion of invalidation by reseting r_dcache_itlb_inval_req. //////////////////////////////////////////////////////////////////////////////////// switch(r_inval_itlb_fsm) { ///////////////////// case INVAL_ITLB_IDLE: { if ( r_dcache_itlb_inval_req.read() ) { r_itlb.reset_bypass(r_dcache_tlb_inval_line.read()); r_inval_itlb_count = 0; r_inval_itlb_fsm = INVAL_ITLB_SCAN; #if DEBUG_INVAL_ITLB if ( m_debug_inval_itlb_fsm ) { std::cout << " Invalidate request for line " << std::hex << r_dcache_tlb_inval_line.read() << std::endl; r_itlb.print(); } #endif } break; } ///////////////////// case INVAL_ITLB_SCAN: { paddr_t line = r_dcache_tlb_inval_line.read(); // nline size_t way = r_inval_itlb_count.read()/m_itlb_sets; // way size_t set = r_inval_itlb_count.read()%m_itlb_sets; // set bool ok = r_itlb.inval( line, way, set ); #if DEBUG_INVAL_ITLB if ( m_debug_inval_itlb_fsm ) { std::cout << " " << std::hex << " line = " << line << std::dec << " / set = " << set << " / way = " << way; if ( ok ) std::cout << " / HIT" << std::endl; else std::cout << " / MISS" << std::endl; } #endif r_inval_itlb_count = r_inval_itlb_count.read() + 1; if ( r_inval_itlb_count.read() == (m_itlb_sets*m_itlb_ways - 1) ) { r_inval_itlb_fsm = INVAL_ITLB_IDLE; r_dcache_itlb_inval_req = false; } break; } } // end switch r_inval_itlb_fsm //////////////////////////////////////////////////////////////////////////////////// // DCACHE FSM // // Both the Cacheability Table, and the MMU cacheable bit are used to define // the cacheability, depending on the MMU mode. // // 1/ Coherence requests : // There is a coherence request when the tgt_dcache_req flip-flop is set, // requesting a line invalidation or a line update. // Coherence requests are taken into account in IDLE, UNC_WAIT, MISS_WAIT states. // The actions associated to the pre-empted state are not executed, the DCACHE FSM // goes to the CC_CHECK state to execute the requested action, and returns to the // pre-empted state. // // 2/ processor requests : // Processor READ, WRITE, LL or SC requests are taken in IDLE state only. // The IDLE state implements a three stages pipe-line to handle write bursts: // - The physical address is computed by dtlb in stage P0. // - The registration in wbuf and the dcache hit are computed in stage P1. // - The dcache update is done in stage P2. // A write operation can require a fourth stage if the dirty bit must be updated, // or if the TLBs must be cleared, but these "long write" operation requires // to exit the IDLE stage // If there is no write in the pipe, dcache and dtlb are accessed in parallel, // (virtual address for itlb, and speculative physical address computed during // previous cycle for dcache) in order to return the data in one cycle for a read. // We just pay an extra cycle when the speculative access is illegal. // // 3/ Atomic instructions LL/SC // The LL/SC address can be cacheable or non cacheable. // The reservation registers (r_dcache_ll_valid, r_dcache_ll_vaddr and // r_dcache_ll_data are stored in the L1 cache controller, and not in the // memory controller. // - LL requests from the processor are transmitted as standard VCI // READ transactions (one word / one line, depending on the cacheability). // - SC requests from the processor are systematically transmitted to the // memory cache as COMPARE&swap requests (both the data value stored in the // r_dcache_ll_data register and the new value). // // 4/ Non cacheable access: // This component implement a strong order between non cacheable access // (read or write) : A new non cacheable VCI transaction starts only when // the previous non cacheable transaction is completed. Both cacheable and // non cacheable transactions use the write buffer, but the DCACHE FSM registers // a non cacheable write transaction posted in the write buffer by setting the // r_dcache_pending_unc_write flip_flop. All other non cacheable requests // are stalled until this flip-flop is reset by the VCI_RSP_FSM (when the // pending non cacheable write transaction completes). // // 5/ Error handling: // When the MMU is not activated, Read Bus Errors are synchronous events, // but Write Bus Errors are asynchronous events (processor is not frozen). // - If a Read Bus Error is detected, the VCI_RSP FSM sets the // r_vci_rsp_data_error flip-flop, without writing any data in the // r_vci_rsp_fifo_dcache FIFO, and the synchronous error is signaled // by the DCACHE FSM. // - If a Write Bus Error is detected, the VCI_RSP FSM signals // the asynchronous error using the setWriteBerr() method. // When the MMU is activated bus error are rare events, as the MMU // checks the physical address before the VCI transaction starts. //////////////////////////////////////////////////////////////////////////////////////// // The default value for drsp.valid is false typename iss_t::DataResponse drsp = ISS_DRSP_INITIALIZER; switch ( r_dcache_fsm.read() ) { case DCACHE_IDLE: // There is 8 conditions to exit the IDLE state : // 1) Long write request (DCACHE FSM) => DCACHE_WRITE_*** // 2) Coherence request (TGT FSM) => DCACHE_CC_CHECK // 3) ITLB miss request (ICACHE FSM) => DCACHE_TLB_MISS // 4) XTN request (processor) => DCACHE_XTN_* // 5) DTLB miss (processor) => DCACHE_TLB_MISS // 6) Cacheable read miss (processor) => DCACHE_MISS_VICTIM // 7) Uncacheable read (processor) => DCACHE_UNC_REQ // 8) SC access (processor) => DCACHE_SC // There is 4 configurations to access the cache, // depending on the pipe-line state, defined // by the r_dcache_p0_valid flip-flop (P1 stage activated) // and r_dcache_p1_valid flip-flop (P2 stage activated) : // V0 / V1 / Data / Directory / comment // 0 / 0 / read(A0) / read(A0) / read speculative access // 0 / 1 / write(A2) / nop / read request delayed // 1 / 0 / nop / read(A1) / read request delayed // 1 / 1 / write(A2) / read(A1) / read request delayed { //////////////////////////////////////////////////////////////////////////////// // Handling P2 pipe-line stage // Inputs are r_dcache_p1_* registers. // Three actions are executed in this P2 stage: // - If r_dcache_p1_updt_cache is true, we update the local copy in dcache. // - If the modified cache line has copies in itlb (resp. dtlb), and if the // INVAL_ITLB (resp. INVAL_DTLB) FSM is idle, we launch the TLB invalidate // operation. This operation is NOT blocking for the processor: the DCACHE FSM // doesn't wait the TLB invalidate completion to handle processor requests. // If the INVAL_ITLB (resp INVAL_DTLB) FSM is already processintg a previous // invalidation request, the DCACHE FSM is frosen until completion of the // previous TLB invalidate operation. // - If the PTE dirty bit must be updated, we start a "long write", that is // blocking for the processor, because we switch to the DCACHE_WRITE_SET_DIRTY // state, and the number of cycles can be large... bool long_write_set_dirty = false; bool tlb_inval_frozen = false; if ( r_dcache_p1_valid.read() ) // P2 stage activated { bool cache_updt = r_dcache_p1_updt_cache.read(); size_t cache_way = r_dcache_p1_cache_way.read(); size_t cache_set = r_dcache_p1_cache_set.read(); size_t cache_word = r_dcache_p1_cache_word.read(); uint32_t wdata = r_dcache_p1_wdata.read(); vci_be_t be = r_dcache_p1_be.read(); // The PTE address is used when the PTE dirty bit must be set. // It is the concatenation of the nline value (from dtlb) // and the word index (from virtual address) paddr_t pte_paddr = (paddr_t)(r_dcache_p1_tlb_nline.read()*(m_dcache_words<<2)) | (paddr_t)(r_dcache_p1_vaddr.read()%(m_dcache_words<<2)); // The line index is used when a TLB inval is required paddr_t inval_line = r_dcache_p1_paddr.read()>>(uint32_log2(m_dcache_words<<2)); // checking dcache update if ( cache_updt ) { r_dcache.write( cache_way, cache_set, cache_word, wdata, be ); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif // Checking copies in TLBs bool itlb_inval = ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[cache_way*m_dcache_sets+cache_set] ); bool dtlb_inval = ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[cache_way*m_dcache_sets+cache_set] ); if ( (dtlb_inval and r_dcache_dtlb_inval_req.read() ) or (itlb_inval and r_dcache_itlb_inval_req.read() ) ) // at least one FSM not idle { tlb_inval_frozen = true; } else // requested FSM idle { r_dcache_tlb_inval_line = inval_line; r_dcache_itlb_inval_req = itlb_inval; r_dcache_dtlb_inval_req = dtlb_inval; r_dcache_in_itlb[cache_way*m_dcache_sets+cache_set] = false; r_dcache_in_dtlb[cache_way*m_dcache_sets+cache_set] = false; } } // end dcache update // checking dirty bit update if ( r_dcache_p1_set_dirty.read() ) { long_write_set_dirty = true; r_dcache_p2_vaddr = r_dcache_p1_vaddr.read(); r_dcache_p2_set_dirty = r_dcache_p1_set_dirty.read(); r_dcache_p2_tlb_way = r_dcache_p1_tlb_way.read(); r_dcache_p2_tlb_set = r_dcache_p1_tlb_set.read(); r_dcache_p2_pte_paddr = pte_paddr; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( cache_updt ) std::cout << " P2 stage: cache update" << std::dec << " / way = " << cache_way << " / set = " << cache_set << " / word = " << cache_word << std::hex << " / wdata = " << wdata << " / be = " << be << std::endl; if ( long_write_set_dirty ) std::cout << " P2 stage: dirty bit update required" << " / pte_paddr = " << std::hex << pte_paddr << std::endl; } #endif } // end P2 stage /////////////////////////////////////////////////////////////////////////// // Handling P1 pipe-line stage // Inputs are r_dcache_p0_* registers. // - We must write into wbuf and test the hit in dcache. // If the write request is not cacheable, and there is a pending // non cacheable write, or if the write buffer is full, we break: // The P0 and P1 pipe-line stages are frozen until the write // request registration is possible, but the P2 stage is not frozen. // - The r_dcache_p1_valid bit activating the P2 pipe-line stage // must be computed at all cycles. The P2 stage must be activated // if there is local copy in dcache, or if the PTE dirty bit must be set. if ( r_dcache_p0_valid.read() and not tlb_inval_frozen ) // P1 stage activated { // write not cacheable, and previous non cacheable write registered if ( not r_dcache_p0_cacheable.read() and r_dcache_pending_unc_write.read() ) { r_dcache_p1_valid = false; break; } // try a registration into write buffer bool wok = r_wbuf.write( r_dcache_p0_paddr.read(), r_dcache_p0_be.read(), r_dcache_p0_wdata.read(), r_dcache_p0_cacheable.read() ); #ifdef INSTRUMENTATION m_cpt_wbuf_write++; #endif // write buffer full if ( not wok ) { r_dcache_p1_valid = false; break; } // update the write_buffer state extension r_dcache_pending_unc_write = not r_dcache_p0_cacheable.read(); // read directory to detect local copy size_t cache_way; size_t cache_set; size_t cache_word; bool local_copy; if ( r_mmu_mode.read() & DATA_CACHE_MASK) // cache activated { local_copy = r_dcache.hit( r_dcache_p0_paddr.read(), &cache_way, &cache_set, &cache_word ); #ifdef INSTRUMENTATION m_cpt_dcache_dir_read++; #endif } else { local_copy = false; } // dirty bit update requested bool dirty_req = (r_mmu_mode.read() & DATA_TLB_MASK) and not r_dcache_p0_tlb_dirty.read(); // if there is a local copy or a dirty bit update requested if ( local_copy or dirty_req ) { r_dcache_p1_valid = true; r_dcache_p1_set_dirty = dirty_req; r_dcache_p1_updt_cache = local_copy; r_dcache_p1_vaddr = r_dcache_p0_vaddr.read(); r_dcache_p1_wdata = r_dcache_p0_wdata.read(); r_dcache_p1_be = r_dcache_p0_be.read(); r_dcache_p1_paddr = r_dcache_p0_paddr.read(); r_dcache_p1_tlb_way = r_dcache_p0_tlb_way; r_dcache_p1_tlb_set = r_dcache_p0_tlb_set; r_dcache_p1_tlb_nline = r_dcache_p0_tlb_nline; r_dcache_p1_cache_way = cache_way; r_dcache_p1_cache_set = cache_set; r_dcache_p1_cache_word = cache_word; } else { r_dcache_p1_valid = false; } } else // P1 stage not activated { r_dcache_p1_valid = false; } // end P1 stage ///////////////////////////////////////////////////////////////////////////// // handling P0 write pipe-line stage // This stage is controlling the DCACHE FSM state register: // - the FSM is frozen if a TLB invalidate operation must be delayed, // - the long write requests have the highest priority, // - then the external coherence requests, // - then the itlb miss requests, // - and finally the processor requests. // A processor read request generate a dcache access using speculative PPN // only if the write pipe-line is empty. There is an unconditionnal access // to the dtlb, using virtual address from processor. // The r_dcache_p0_valid value must be computed at all cycles. bool p0_valid = false; // default value // TLB inval delayed if ( tlb_inval_frozen ) { break; } // long write request else if ( long_write_set_dirty ) { r_dcache_fsm = DCACHE_WRITE_TLB_DIRTY; } // external coherence request else if ( r_tgt_dcache_req.read() ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = DCACHE_IDLE; } // itlb miss request else if ( r_icache_tlb_miss_req.read() ) { r_dcache_tlb_ins = true; r_dcache_tlb_vaddr = r_icache_vaddr_save.read(); r_dcache_fsm = DCACHE_TLB_MISS; } // processor request else if (dreq.valid ) { // dcache access using speculative PPN only if pipe-line empty paddr_t cache_paddr; size_t cache_way; size_t cache_set; size_t cache_word; uint32_t cache_rdata; bool cache_hit; if ( (r_mmu_mode.read() & DATA_CACHE_MASK) and // cache activated not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) // pipe-line empty { cache_paddr = (r_dcache_p0_paddr.read() & ~PAGE_K_MASK) | ((paddr_t)dreq.addr & PAGE_K_MASK); cache_hit = r_dcache.read( cache_paddr, &cache_rdata, &cache_way, &cache_set, &cache_word ); #ifdef INSTRUMENTATION m_cpt_dcache_dir_read++; m_cpt_dcache_data_read++; #endif } else { cache_hit = false; } // end dcache access // systematic dtlb access using virtual address paddr_t tlb_paddr; pte_info_t tlb_flags; size_t tlb_way; size_t tlb_set; paddr_t tlb_nline; bool tlb_hit; if ( r_mmu_mode.read() & DATA_TLB_MASK ) // TLB activated { tlb_hit = r_dtlb.translate( dreq.addr, &tlb_paddr, &tlb_flags, &tlb_nline, &tlb_way, &tlb_set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif // register dtlb outputs r_dcache_p0_tlb_nline = tlb_nline; r_dcache_p0_tlb_way = tlb_way; r_dcache_p0_tlb_set = tlb_set; r_dcache_p0_tlb_dirty = tlb_flags.d; r_dcache_p0_tlb_big = tlb_flags.b; } else { tlb_hit = false; } // end dtlb access // register the processor request r_dcache_p0_vaddr = dreq.addr; r_dcache_p0_be = dreq.be; r_dcache_p0_wdata = dreq.wdata; // Handling READ XTN requests from processor // They are executed in this DCACHE_IDLE state. // The processor must not be in user mode if (dreq.type == iss_t::XTN_READ) { int xtn_opcode = (int)dreq.addr/4; // checking processor mode: if (dreq.mode == iss_t::MODE_USER) { r_mmu_detr = MMU_READ_PRIVILEGE_VIOLATION; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; } else { switch( xtn_opcode ) { case iss_t::XTN_INS_ERROR_TYPE: drsp.rdata = r_mmu_ietr.read(); drsp.valid = true; break; case iss_t::XTN_DATA_ERROR_TYPE: drsp.rdata = r_mmu_detr.read(); drsp.valid = true; break; case iss_t::XTN_INS_BAD_VADDR: drsp.rdata = r_mmu_ibvar.read(); drsp.valid = true; break; case iss_t::XTN_DATA_BAD_VADDR: drsp.rdata = r_mmu_dbvar.read(); drsp.valid = true; break; case iss_t::XTN_PTPR: drsp.rdata = r_mmu_ptpr.read(); drsp.valid = true; break; case iss_t::XTN_TLB_MODE: drsp.rdata = r_mmu_mode.read(); drsp.valid = true; break; case iss_t::XTN_MMU_PARAMS: drsp.rdata = r_mmu_params.read(); drsp.valid = true; break; case iss_t::XTN_MMU_RELEASE: drsp.rdata = r_mmu_release.read(); drsp.valid = true; break; case iss_t::XTN_MMU_WORD_LO: drsp.rdata = r_mmu_word_lo.read(); drsp.valid = true; break; case iss_t::XTN_MMU_WORD_HI: drsp.rdata = r_mmu_word_hi.read(); drsp.valid = true; break; default: r_mmu_detr = MMU_READ_UNDEFINED_XTN; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; break; } // end switch xtn_opcode } // end else } // end if XTN_READ // Handling WRITE XTN requests from processor. // They are not executed in this DCACHE_IDLE state, // if they require access to the caches or the TLBs // that are already accessed for speculative read. // Caches can be invalidated or flushed in user mode, // and the sync instruction can be executed in user mode else if (dreq.type == iss_t::XTN_WRITE) { int xtn_opcode = (int)dreq.addr/4; r_dcache_xtn_opcode = xtn_opcode; // checking processor mode: if ( (dreq.mode == iss_t::MODE_USER) && (xtn_opcode != iss_t:: XTN_SYNC) && (xtn_opcode != iss_t::XTN_DCACHE_INVAL) && (xtn_opcode != iss_t::XTN_DCACHE_FLUSH) && (xtn_opcode != iss_t::XTN_ICACHE_INVAL) && (xtn_opcode != iss_t::XTN_ICACHE_FLUSH) ) { r_mmu_detr = MMU_WRITE_PRIVILEGE_VIOLATION; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; } else { switch( xtn_opcode ) { case iss_t::XTN_PTPR: // itlb & dtlb must be flushed r_mmu_ptpr = dreq.wdata; r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_SWITCH; break; case iss_t::XTN_TLB_MODE: // no cache or tlb access r_mmu_mode = dreq.wdata; drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_DTLB_INVAL: // dtlb access r_dcache_fsm = DCACHE_XTN_DT_INVAL; break; case iss_t::XTN_ITLB_INVAL: // itlb access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IT_INVAL; break; case iss_t::XTN_DCACHE_INVAL: // dcache, dtlb & itlb access r_dcache_fsm = DCACHE_XTN_DC_INVAL_VA; break; case iss_t::XTN_MMU_DCACHE_PA_INV: // dcache, dtlb & itlb access r_dcache_fsm = DCACHE_XTN_DC_INVAL_PA; r_dcache_p0_paddr = (paddr_t)r_mmu_word_hi.read() << 32 | (paddr_t)r_mmu_word_lo.read(); break; case iss_t::XTN_DCACHE_FLUSH: // itlb and dtlb must be reset r_dcache_flush_count = 0; r_dcache_fsm = DCACHE_XTN_DC_FLUSH; break; case iss_t::XTN_ICACHE_INVAL: // icache and itlb access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_INVAL_VA; break; case iss_t::XTN_MMU_ICACHE_PA_INV: // icache access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_INVAL_PA; break; case iss_t::XTN_ICACHE_FLUSH: // icache access r_dcache_xtn_req = true; r_dcache_fsm = DCACHE_XTN_IC_FLUSH; break; case iss_t::XTN_SYNC: // wait until write buffer empty r_dcache_fsm = DCACHE_XTN_SYNC; break; case iss_t::XTN_MMU_WORD_LO: // no cache or tlb access r_mmu_word_lo = dreq.wdata; drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_MMU_WORD_HI: // no cache or tlb access r_mmu_word_hi = dreq.wdata; drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; case iss_t::XTN_ICACHE_PREFETCH: // not implemented : no action case iss_t::XTN_DCACHE_PREFETCH: // not implemented : no action drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; default: r_mmu_detr = MMU_WRITE_UNDEFINED_XTN; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } // end switch xtn_opcode } // end else } // end if XTN_WRITE // Handling read/write processor requests. // The dtlb and dcache can be activated or not. // We compute the physical address, the cacheability, and check processor request. // - If DTLB not activated : cacheability is defined by the segment table, // the physical address is equal to the virtual address (identity mapping) // - If DTLB activated : cacheability is defined by the C bit in the PTE, // the physical address is obtained from the TLB, and the U & W bits // of the PTE are checked. // The processor request is decoded only if the TLB is not activated or if // the virtual address hits in tLB and access rights are OK. // We call the TLB_MISS sub-fsm in case of dtlb miss. else { bool valid_req = false; bool cacheable = false; paddr_t paddr = 0; if ( not (r_mmu_mode.read() & DATA_TLB_MASK) ) // dtlb not activated { valid_req = true; // cacheability if ( not (r_mmu_mode.read() & DATA_CACHE_MASK) ) cacheable = false; else cacheable = m_cacheability_table[dreq.addr]; // physical address paddr = (paddr_t)dreq.addr; } else // dtlb activated { if ( tlb_hit ) // tlb hit { // cacheability if ( not (r_mmu_mode.read() & DATA_CACHE_MASK) ) cacheable = false; else cacheable = tlb_flags.c; // access rights checking if ( not tlb_flags.u and (dreq.mode == iss_t::MODE_USER)) { if ( (dreq.type == iss_t::DATA_READ) or (dreq.type == iss_t::DATA_LL) ) r_mmu_detr = MMU_READ_PRIVILEGE_VIOLATION; else r_mmu_detr = MMU_WRITE_PRIVILEGE_VIOLATION; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; drsp.rdata = 0; } else if ( not tlb_flags.w and ((dreq.type == iss_t::DATA_WRITE) or (dreq.type == iss_t::DATA_SC)) ) { r_mmu_detr = MMU_WRITE_ACCES_VIOLATION; r_mmu_dbvar = dreq.addr; drsp.valid = true; drsp.error = true; drsp.rdata = 0; } else { valid_req = true; } // physical address paddr = tlb_paddr; } else // tlb miss { r_dcache_tlb_vaddr = dreq.addr; r_dcache_tlb_ins = false; r_dcache_fsm = DCACHE_TLB_MISS; } } // end DTLB activated if ( valid_req ) // processor request is valid { // physical address and cacheability registration r_dcache_p0_paddr = paddr; r_dcache_p0_cacheable = cacheable; // READ or LL request // The read requests are taken only if the write pipe-line is empty. // If dcache hit, dtlb hit, and speculative PPN OK, data in one cycle. // If speculative access is KO we just pay one extra cycle. // If dcache miss, we go to DCACHE_MISS_VICTIM state. // If uncacheable, we go to DCACHE_UNC_WAIT state. if ( ((dreq.type == iss_t::DATA_READ) or (dreq.type == iss_t::DATA_LL)) and not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) { if ( cacheable ) // cacheable read { // if the speculative access is illegal, we pay an extra cycle if ( (r_dcache_p0_paddr.read() & ~PAGE_K_MASK) != (paddr & ~PAGE_K_MASK)) { #ifdef INSTRUMENTATION m_cpt_dcache_spec_miss++; #endif } // if cache miss, try to get the missing line else if ( not cache_hit ) { #ifdef INSTRUMENTATION m_cpt_dcache_miss++; #endif // blocked in IDLE state if previous cleanup not completed if ( not r_dcache_cleanup_req.read() ) { r_dcache_vci_paddr = paddr; r_dcache_vci_miss_req = true; r_dcache_miss_type = PROC_MISS; r_dcache_fsm = DCACHE_MISS_VICTIM; } } // if cache hit return the data else { #ifdef INSTRUMENTATION m_cpt_data_read++; #endif drsp.valid = true; drsp.rdata = cache_rdata; } } else // uncacheable read { r_dcache_vci_paddr = paddr; r_dcache_vci_unc_be = dreq.be; r_dcache_vci_unc_req = true; r_dcache_fsm = DCACHE_UNC_WAIT; } // makes reservation in case of LL if ( dreq.type == iss_t::DATA_LL ) { r_dcache_ll_valid = true; r_dcache_ll_data = cache_rdata; r_dcache_ll_vaddr = dreq.addr; } } // end READ or LL // WRITE request: // The write request arguments have been registered. // The physical address has been computed and registered. // We acknowledge the processor request and activate the P1 pipeline stage. else if ( dreq.type == iss_t::DATA_WRITE ) { #ifdef INSTRUMENTATION m_cpt_data_write++; #endif p0_valid = true; drsp.valid = true; drsp.rdata = 0; } // end WRITE // SC request: // - if a valid LL reservation (with the same address) is registered, // we request a SC transaction to CMD FSM and go to the DCACHE_SC_WAIT state // that will directly return the response to the processor, and invalidate // the LL reservation. We don't check a possible hit in dcache, as this is // done by the coherence transaction... // - if there is no registerd LL, we just stay in IDLE state, invalidate // the LL reservation, and return 1 (atomic access failed) else if ( dreq.type == iss_t::DATA_SC ) { #ifdef INSTRUMENTATION m_cpt_data_sc++; #endif // test if valid registered LL if ( r_dcache_ll_valid.read() and (r_dcache_ll_vaddr.read() == dreq.addr)) { r_dcache_vci_paddr = paddr; r_dcache_vci_sc_req = true; r_dcache_vci_sc_old = r_dcache_ll_data.read(); r_dcache_vci_sc_new = dreq.wdata; r_dcache_fsm = DCACHE_UNC_WAIT; } else // no registered LL { drsp.valid = true; drsp.rdata = 1; r_dcache_ll_valid = false; } } // end SC } // end valid_req } // end if read/write request } // end P0 pipe stage r_dcache_p0_valid = p0_valid; break; } ///////////////////// case DCACHE_TLB_MISS: // This is the entry point for the sub-fsm handling tlb miss. // - Input arguments are r_dcache_tlb_vaddr & r_dcache_tlb_ins // - It try to find the missing TLB entry in dcache, // and activates the cache miss procedure in case of miss. // - It bypass the first level page table access if possible. // - It uses atomic access to update the R/L access bits // in the page table if required. // - It directly updates the itlb or dtlb, and writes into the // r_mmu_ins_* or r_mmu_data* error reporting registers. { uint32_t ptba = 0; bool bypass; paddr_t paddr; // evaluate bypass in order to skip first level page table access if ( r_dcache_tlb_ins.read() ) // itlb miss { bypass = r_itlb.get_bypass(r_dcache_tlb_vaddr.read(), &ptba); } else // dtlb miss { bypass = r_dtlb.get_bypass(r_dcache_tlb_vaddr.read(), &ptba); } if ( not bypass ) // Try to read the PTE1/PTD1 in dcache { paddr = (paddr_t)r_mmu_ptpr.read() << (INDEX1_NBITS+2) | (paddr_t)((r_dcache_tlb_vaddr.read() >> PAGE_M_NBITS) << 2); r_dcache_tlb_paddr = paddr; r_dcache_fsm = DCACHE_TLB_PTE1_GET; } else // Try to read directly the PTE2 in dcache { paddr = (paddr_t)ptba << PAGE_K_NBITS | (paddr_t)(r_dcache_tlb_vaddr.read()&PTD_ID2_MASK)>>(PAGE_K_NBITS-3); r_dcache_tlb_paddr = paddr; r_dcache_fsm = DCACHE_TLB_PTE2_GET; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) std::cout << " ITLB miss request:"; else std::cout << " DTLB miss request:"; std::cout << " vaddr = " << std::hex << r_dcache_tlb_vaddr.read() << " / bypass = " << bypass << " / PTE address = " << paddr << std::endl; } #endif break; } ///////////////////////// case DCACHE_TLB_PTE1_GET: // try to read a PT1 entry in dcache { uint32_t entry; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_tlb_paddr.read(), &entry, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // request hit in dcache { if ( not (entry & PTE_V_MASK) ) // unmapped { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT1_UNMAPPED; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT1_UNMAPPED; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); drsp.valid = true; drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache, but unmapped:" << std::dec << " way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTE1 = " << entry << std::endl; } #endif } else if( entry & PTE_T_MASK ) // PTD : access PT2 { // register bypass if ( r_dcache_tlb_ins.read() ) // itlb { r_itlb.set_bypass(r_dcache_tlb_vaddr.read(), entry & ((1 << (m_paddr_nbits-PAGE_K_NBITS)) - 1), r_dcache_tlb_paddr.read() >> (uint32_log2(m_icache_words<<2))); } else // dtlb { r_dtlb.set_bypass(r_dcache_tlb_vaddr.read(), entry & ((1 << (m_paddr_nbits-PAGE_K_NBITS)) - 1), r_dcache_tlb_paddr.read() >> (uint32_log2(m_dcache_words)+2)); } r_dcache_tlb_paddr = (paddr_t)(entry & ((1<<(m_paddr_nbits-PAGE_K_NBITS))-1)) << PAGE_K_NBITS | (paddr_t)(((r_dcache_tlb_vaddr.read() & PTD_ID2_MASK) >> PAGE_K_NBITS) << 3); r_dcache_fsm = DCACHE_TLB_PTE2_GET; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache " << std::dec << " way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTD = " << entry << std::endl; } #endif } else // PTE1 : update the TLB { if ( r_dcache_tlb_ins.read() ) r_dcache_in_itlb[m_icache_sets*way+set] = true; else r_dcache_in_dtlb[m_dcache_sets*way+set] = true; r_dcache_tlb_pte_flags = entry; r_dcache_tlb_cache_way = way; r_dcache_tlb_cache_set = set; r_dcache_tlb_cache_word = word; r_dcache_fsm = DCACHE_TLB_PTE1_SELECT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache:" << std::dec << " way = " << way << std::dec << " / set = " << set << std::dec << " / word = " << word << std::hex << " / PTE1 = " << entry << std::endl; } #endif } } else // we must load the missing cache line in dcache { r_dcache_vci_paddr = r_dcache_tlb_paddr.read(); r_dcache_miss_type = PTE1_MISS; r_dcache_fsm = DCACHE_MISS_VICTIM; r_dcache_vci_miss_req = true; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " MISS in dcache:" << " PTE1 address = " << std::hex << r_dcache_tlb_paddr.read() << std::endl; } #endif } break; } //////////////////////////// case DCACHE_TLB_PTE1_SELECT: // select a slot for PTE1 { size_t way; size_t set; if ( r_dcache_tlb_ins.read() ) { r_itlb.select( r_dcache_tlb_vaddr.read(), true, // PTE1 &way, &set ); #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif } else { r_dtlb.select( r_dcache_tlb_vaddr.read(), true, // PTE1 &way, &set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif } r_dcache_tlb_way = way; r_dcache_tlb_set = set; r_dcache_fsm = DCACHE_TLB_PTE1_UPDT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) std::cout << " Select a slot in ITLB:"; else std::cout << " Select a slot in DTLB:"; std::cout << " way = " << std::dec << way << " / set = " << set << std::endl; } #endif break; } ////////////////////////// case DCACHE_TLB_PTE1_UPDT: // write a new PTE1 in tlb after testing the L/R bit // if L/R bit already set, exit the sub-fsm // if not, the page table must be updated { paddr_t nline = r_dcache_tlb_paddr.read() >> (uint32_log2(m_dcache_words)+2); uint32_t pte = r_dcache_tlb_pte_flags.read(); bool updt = false; // test the access bits L/R, depending on the physical address locality // we must use the 10 MSB bits of the 19 bits PPN1 to obtain the target index // we must use the 10 MSB bits of the SRCID to obtain the local index // set the r_dcache_vci_sc_old and r_dcache_vci_sc_new registers if SC required uint32_t target = (pte >> 9) & 0x3FF; uint32_t local = m_srcid_d >> 4; if ( local == target ) // local_address { if ( not ((pte & PTE_L_MASK) == PTE_L_MASK) ) // we must set the L bit { updt = true; r_dcache_vci_sc_old = r_dcache_tlb_pte_flags.read(); r_dcache_vci_sc_new = r_dcache_tlb_pte_flags.read() | PTE_L_MASK; } } else // remote address { if ( not ((pte & PTE_R_MASK) == PTE_R_MASK) ) // we must set the R bit { updt = true; r_dcache_vci_sc_old = r_dcache_tlb_pte_flags.read(); r_dcache_vci_sc_new = r_dcache_tlb_pte_flags.read() | PTE_R_MASK; } } // update TLB if ( r_dcache_tlb_ins.read() ) { r_itlb.write( pte, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_itlb_write++; #endif } else { r_dtlb.write( pte, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_dtlb_write++; #endif } // next state if ( updt ) r_dcache_fsm = DCACHE_TLB_SC_UPDT; // dcache and page table update else r_dcache_fsm = DCACHE_TLB_RETURN; // exit sub-fsm #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) { std::cout << " write PTE1 in ITLB:"; std::cout << " way = " << std::dec << r_dcache_tlb_way.read() << " / set = " << r_dcache_tlb_set.read() << std::endl; r_itlb.print(); } else { std::cout << " write PTE1 in DTLB:"; std::cout << " way = " << std::dec << r_dcache_tlb_way.read() << " / set = " << r_dcache_tlb_set.read() << std::endl; r_dtlb.print(); } } #endif break; } ///////////////////////// case DCACHE_TLB_PTE2_GET: // Try to get a PTE2 (64 bits) in the dcache { uint32_t pte_flags; uint32_t pte_ppn; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_tlb_paddr.read(), &pte_flags, &pte_ppn, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // request hits in dcache { if ( not (pte_flags & PTE_V_MASK) ) // unmapped { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT2_UNMAPPED; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT2_UNMAPPED; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); drsp.valid = true; drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache, but PTE is unmapped" << " PTE_FLAGS = " << std::hex << pte_flags << " PTE_PPN = " << std::hex << pte_ppn << std::endl; } #endif } else // mapped : update the TLB { if ( r_dcache_tlb_ins.read() ) r_dcache_in_itlb[m_icache_sets*way+set] = true; else r_dcache_in_dtlb[m_dcache_sets*way+set] = true; r_dcache_tlb_pte_flags = pte_flags; r_dcache_tlb_pte_ppn = pte_ppn; r_dcache_tlb_cache_way = way; r_dcache_tlb_cache_set = set; r_dcache_tlb_cache_word = word; r_dcache_fsm = DCACHE_TLB_PTE2_SELECT; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " HIT in dcache:" << " PTE_FLAGS = " << std::hex << pte_flags << " PTE_PPN = " << std::hex << pte_ppn << std::endl; } #endif } } else // we must load the missing cache line in dcache { r_dcache_vci_paddr = r_dcache_tlb_paddr.read(); r_dcache_miss_type = PTE2_MISS; r_dcache_fsm = DCACHE_MISS_VICTIM; r_dcache_vci_miss_req = true; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " MISS in dcache:" << " PTE address = " << std::hex << r_dcache_tlb_paddr.read() << std::endl; } #endif } break; } //////////////////////////// case DCACHE_TLB_PTE2_SELECT: // select a slot for PTE2 { size_t way; size_t set; if ( r_dcache_tlb_ins.read() ) { r_itlb.select( r_dcache_tlb_vaddr.read(), false, // PTE2 &way, &set ); #ifdef INSTRUMENTATION m_cpt_itlb_read++; #endif } else { r_dtlb.select( r_dcache_tlb_vaddr.read(), false, // PTE2 &way, &set ); #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif } r_dcache_tlb_way = way; r_dcache_tlb_set = set; r_dcache_fsm = DCACHE_TLB_PTE2_UPDT; break; } ////////////////////////// case DCACHE_TLB_PTE2_UPDT: // write a new PTE2 in tlb after testing the L/R bit // if L/R bit already set exit the sub-fsm // if not, the page table must be updated by an atomic access { paddr_t nline = r_dcache_p0_paddr.read() >> (uint32_log2(m_dcache_words)+2); uint32_t pte_flags = r_dcache_tlb_pte_flags.read(); uint32_t pte_ppn = r_dcache_tlb_pte_ppn.read(); bool updt = false; // page table update required // test the access bit L/R, depending on the physical address locality // we must use the 10 MSB bits of the 28 bits PPN2 to obtain the target cluster index // we must use the 10 MSB bits of the SRCID to obtain the local cluster index // set the r_dcache_vci_sc_old and r_dcache_vci_sc_new registers if SC required. uint32_t target = (pte_ppn >> 18) & 0x3FF; uint32_t local = m_srcid_d >> 4; if ( local == target ) // local address { if ( not ((pte_flags & PTE_L_MASK) == PTE_L_MASK) ) // we must set the L bit { updt = true; r_dcache_vci_sc_old = r_dcache_tlb_pte_flags.read(); r_dcache_vci_sc_new = r_dcache_tlb_pte_flags.read() | PTE_L_MASK; } } else // remote address { if ( not ((pte_flags & PTE_R_MASK) == PTE_R_MASK) ) // we must set the R bit { updt = true; r_dcache_vci_sc_old = r_dcache_tlb_pte_flags.read(); r_dcache_vci_sc_new = r_dcache_tlb_pte_flags.read() | PTE_R_MASK; } } // update TLB for a PTE2 if ( r_dcache_tlb_ins.read() ) { r_itlb.write( pte_flags, pte_ppn, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_itlb_write++; #endif } else { r_dtlb.write( pte_flags, pte_ppn, r_dcache_tlb_vaddr.read(), r_dcache_tlb_way.read(), r_dcache_tlb_set.read(), nline ); #ifdef INSTRUMENTATION m_cpt_dtlb_write++; #endif } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_tlb_ins.read() ) { std::cout << " write PTE2 in ITLB:"; std::cout << " way = " << std::dec << r_dcache_tlb_way.read() << " / set = " << r_dcache_tlb_set.read() << std::endl; r_itlb.print(); } else { std::cout << " write PTE2 in DTLB:"; std::cout << " way = " << std::dec << r_dcache_tlb_way.read() << " / set = " << r_dcache_tlb_set.read() << std::endl; r_dtlb.print(); } } #endif // next state if ( updt ) r_dcache_fsm = DCACHE_TLB_SC_UPDT; // dcache and page table update else r_dcache_fsm = DCACHE_TLB_RETURN; // exit sub-fsm break; } //////////////////////// case DCACHE_TLB_SC_UPDT: // update the dcache after a tlb miss (L/R bit), // request a SC transaction to CMD FSM { r_dcache.write(r_dcache_tlb_cache_way.read(), r_dcache_tlb_cache_set.read(), r_dcache_tlb_cache_word.read(), r_dcache_tlb_pte_flags.read()); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif // r_dcache_vci_sc_old & r_dcache_vci_sc_new registers are already set r_dcache_vci_sc_req = true; r_dcache_fsm = DCACHE_TLB_SC_WAIT; break; } //////////////////////// case DCACHE_TLB_SC_WAIT: // wait response to SC transaction from RSP FSM // we consume the response, and exit the sub-fsm. // we don't analyse the response, because // we don't care if the L/R bit update is not done { if ( not r_dcache_vci_sc_req.read() ) // response available { if ( r_vci_rsp_data_error.read() ) r_vci_rsp_data_error = false; else if ( r_vci_rsp_fifo_dcache.rok() ) vci_rsp_fifo_dcache_get = true; else { assert( false and "rsp_fifo should not be empty in DCACHE_TLB_SC_WAIT state" ); } r_dcache_fsm = DCACHE_TLB_RETURN; } break; } /////////////////////// case DCACHE_TLB_RETURN: // return to caller state depending on the tlb miss type { if ( r_dcache_tlb_ins.read() ) r_icache_tlb_miss_req = false; r_dcache_fsm = DCACHE_IDLE; break; } /////////////////////// case DCACHE_XTN_SWITCH: // Both itlb and dtlb must be flushed { if ( not r_dcache_xtn_req.read() ) { r_dtlb.flush(); r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } break; } ///////////////////// case DCACHE_XTN_SYNC: // waiting until write buffer empty // The coherence request must be taken // as there is a risk of dead-lock { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = DCACHE_XTN_SYNC; } if ( r_wbuf.empty() ) { drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; } break; } //////////////////////// case DCACHE_XTN_IC_FLUSH: // Waiting completion of an XTN request to the ICACHE FSM case DCACHE_XTN_IC_INVAL_VA: // Caution : the itlb miss requests must be taken case DCACHE_XTN_IC_INVAL_PA: // because the XTN_ICACHE_INVAL request to icache case DCACHE_XTN_IT_INVAL: // can generate an itlb miss... { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = r_dcache_fsm; break; } // itlb miss request if ( r_icache_tlb_miss_req.read() ) { r_dcache_tlb_ins = true; r_dcache_tlb_vaddr = r_icache_vaddr_save.read(); r_dcache_fsm = DCACHE_TLB_MISS; break; } // test if XTN request to icache completed if ( not r_dcache_xtn_req.read() ) { r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } break; } ///////////////////////// case DCACHE_XTN_DC_FLUSH: // Invalidate sequencially all cache lines, using // the r_dcache_flush counter as a slot counter. // We loop in this state until all slots have been visited. // A cleanup request is generated for each valid line // and we are blocked until the previous cleanup is completed // Finally, both the itlb and dtlb are reset, because // all TLB entries (including global entries) must be invalidated. { if ( not r_dcache_cleanup_req ) { paddr_t nline; size_t way = r_dcache_flush_count.read()/m_icache_sets; size_t set = r_dcache_flush_count.read()%m_icache_sets; bool cleanup_req = r_dcache.inval( way, set, &nline ); if ( cleanup_req ) { r_dcache_cleanup_req = true; r_dcache_cleanup_line = nline; } r_dcache_flush_count = r_dcache_flush_count.read() + 1; if ( r_dcache_flush_count.read() == (m_dcache_sets*m_dcache_ways - 1) ) // last slot { r_dtlb.reset(); // global entries are invalidated r_itlb.reset(); // global entries are invalidated for (size_t line = 0; line < m_dcache_ways*m_dcache_sets; line++) { r_dcache_in_itlb[line] = false; r_dcache_in_dtlb[line] = false; } r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } } break; } ///////////////////////// case DCACHE_XTN_DT_INVAL: // handling processor XTN_DTLB_INVAL request { r_dtlb.inval(r_dcache_p0_wdata.read()); r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_VA: // selective cache line invalidate with virtual address // requires 3 cycles: access tlb, read cache, inval cache // we compute the physical address in this state { paddr_t paddr; bool hit; if ( r_mmu_mode.read() & DATA_TLB_MASK ) // dtlb activated { #ifdef INSTRUMENTATION m_cpt_dtlb_read++; #endif hit = r_dtlb.translate( r_dcache_p0_wdata.read(), &paddr ); } else // dtlb not activated { paddr = (paddr_t)r_dcache_p0_wdata.read(); hit = true; } if ( hit ) // tlb hit { r_dcache_p0_paddr = paddr; r_dcache_fsm = DCACHE_XTN_DC_INVAL_PA; } else // tlb miss { #ifdef INSTRUMENTATION m_cpt_dtlb_miss++; #endif r_dcache_tlb_ins = false; // dtlb r_dcache_tlb_vaddr = r_dcache_p0_wdata.read(); r_dcache_fsm = DCACHE_TLB_MISS; } break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_PA: // selective cache line invalidate with physical address // requires 2 cycles: read cache / inval cache // In this state we read dcache. { uint32_t data; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_p0_paddr.read(), &data, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif if ( hit ) // inval to be done { r_dcache_xtn_way = way; r_dcache_xtn_set = set; r_dcache_fsm = DCACHE_XTN_DC_INVAL_GO; } else // miss : nothing to do { r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } break; } //////////////////////////// case DCACHE_XTN_DC_INVAL_GO: // In this state, we invalidate the cache line & cleanup // Blocked if previous cleanup not completed { if ( not r_dcache_cleanup_req.read() ) { paddr_t nline; size_t way = r_dcache_xtn_way.read(); size_t set = r_dcache_xtn_set.read(); bool inval_tlb = false; r_icache.inval( way, set, &nline ); // request cleanup r_dcache_cleanup_req = true; r_dcache_cleanup_line = nline; // possible itlb & dtlb invalidate requests r_dcache_tlb_inval_line = nline; if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) { r_dcache_dtlb_inval_req = true; r_dcache_in_dtlb[way*m_dcache_sets+set] = false; inval_tlb = true; } if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[m_dcache_sets*way+set] ) { r_dcache_itlb_inval_req = true; r_dcache_in_itlb[way*m_dcache_sets+set] = false; inval_tlb = true; } // no valid response until itlb & dtlb invalidated if (inval_tlb ) { r_dcache_fsm = DCACHE_XTN_DC_INVAL_WAIT; } else { r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } } break; } ////////////////////////////// case DCACHE_XTN_DC_INVAL_WAIT: // waiting completion of itlb and dtlb invalidate { if ( not (r_dcache_itlb_inval_req.read() or r_dcache_dtlb_inval_req.read()) ) { r_dcache_fsm = DCACHE_IDLE; drsp.valid = true; } break; } //////////////////////// case DCACHE_MISS_VICTIM: // Selects a victim line // Set the r_dcache_cleanup_req flip-flop // when the selected slot is not empty { bool valid; size_t way; size_t set; paddr_t victim; valid = r_dcache.victim_select( r_dcache_vci_paddr.read(), &victim, &way, &set ); r_dcache_miss_way = way; r_dcache_miss_set = set; if ( valid ) { r_dcache_cleanup_req = true; r_dcache_cleanup_line = victim; r_dcache_fsm = DCACHE_MISS_INVAL; } else { r_dcache_fsm = DCACHE_MISS_WAIT; } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Select a slot:" << " / way = " << way << " / set = " << set << " / valid = " << valid << " / line = " << std::hex << victim << std::endl; } #endif break; } /////////////////////// case DCACHE_MISS_INVAL: // invalidate the victim line // and possibly request itlb or dtlb invalidate { paddr_t nline; size_t way = r_dcache_miss_way.read(); size_t set = r_dcache_miss_set.read(); r_dcache.inval( way, set, &nline ); // if itlb & dtlb invalidate are required // the miss response is not handled before invalidate completed if ( (r_mmu_mode.read() & DATA_TLB_MASK) and ( r_dcache_in_itlb[way*m_dcache_sets+set] or r_dcache_in_dtlb[m_dcache_sets*way+set] ) ) { r_dcache_tlb_inval_line = r_dcache_vci_paddr.read() >> (uint32_log2(m_dcache_words)+2); r_dcache_itlb_inval_req = r_dcache_in_itlb[way*m_dcache_sets+set]; r_dcache_in_itlb[way*m_dcache_sets+set] = false; r_dcache_dtlb_inval_req = r_dcache_in_dtlb[way*m_dcache_sets+set]; r_dcache_in_dtlb[way*m_dcache_sets+set] = false; r_dcache_fsm = DCACHE_MISS_INVAL_WAIT; } else { r_dcache_fsm = DCACHE_MISS_WAIT; } break; } //////////////////////////// case DCACHE_MISS_INVAL_WAIT: // waiting completion of itlb / dtlb invalidate { if ( (not r_dcache_itlb_inval_req.read()) or (not r_dcache_dtlb_inval_req.read()) ) { r_dcache_fsm = DCACHE_MISS_WAIT; } break; } ////////////////////// case DCACHE_MISS_WAIT: // waiting the response to a miss request from VCI_RSP FSM // This state is in charge of error signaling // There is 5 types of error depending on the requester { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = r_dcache_fsm; break; } if ( r_vci_rsp_data_error.read() ) // bus error { switch ( r_dcache_miss_type.read() ) { case PROC_MISS: { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_p0_vaddr.read(); drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } case PTE1_MISS: { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT1_ILLEGAL_ACCESS; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT1_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); drsp.valid = true; drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; break; } case PTE2_MISS: { if ( r_dcache_tlb_ins.read() ) { r_mmu_ietr = MMU_READ_PT2_ILLEGAL_ACCESS; r_mmu_ibvar = r_dcache_tlb_vaddr.read(); r_icache_tlb_miss_req = false; r_icache_tlb_rsp_error = true; } else { r_mmu_detr = MMU_READ_PT2_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_tlb_vaddr.read(); drsp.valid = true; drsp.error = true; } r_dcache_fsm = DCACHE_IDLE; break; } } // end switch type r_vci_rsp_data_error = false; } else if ( r_vci_rsp_fifo_dcache.rok() ) // valid response available { r_dcache_miss_word = 0; r_dcache_fsm = DCACHE_MISS_UPDT; } break; } ////////////////////// case DCACHE_MISS_UPDT: // update the dcache (one word per cycle) // returns the response depending on the miss type { if ( r_vci_rsp_fifo_dcache.rok() ) // one word available { if ( r_dcache_miss_inval.read() ) // Matching coherence request // pop the FIFO, without cache update // send a cleanup for the missing line // if the previous cleanup is completed { if ( r_dcache_miss_word.read() < (m_dcache_words - 1) ) // not the last { vci_rsp_fifo_dcache_get = true; r_dcache_miss_word = r_dcache_miss_word.read() + 1; } else // last word { if ( not r_dcache_cleanup_req.read() ) // no pending cleanup { vci_rsp_fifo_dcache_get = true; r_dcache_cleanup_req = true; r_dcache_cleanup_line = r_dcache_vci_paddr.read() >> (uint32_log2(m_dcache_words)+2); r_dcache_miss_inval = false; r_dcache_fsm = DCACHE_IDLE; } } } else // No matching coherence request // pop the FIFO and update the cache // update the directory at the last word // send a response to ICACHE FSM // in case of itlb miss { #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif r_dcache.write( r_dcache_miss_way.read(), r_dcache_miss_set.read(), r_dcache_miss_word.read(), r_vci_rsp_fifo_dcache.read()); vci_rsp_fifo_dcache_get = true; r_dcache_miss_word = r_dcache_miss_word.read() + 1; // if last word, update directory, set in_itlb & in_dtlb bits if ( r_dcache_miss_word.read() == (m_dcache_words - 1) ) { #ifdef INSTRUMENTATION m_cpt_dcache_dir_write++; #endif r_dcache.victim_update_tag( r_dcache_vci_paddr.read(), r_dcache_miss_way.read(), r_dcache_miss_set.read() ); if (r_dcache_miss_type.read()==PTE1_MISS) r_dcache_fsm = DCACHE_TLB_PTE1_GET; else if (r_dcache_miss_type.read()==PTE2_MISS) r_dcache_fsm = DCACHE_TLB_PTE2_GET; else r_dcache_fsm = DCACHE_IDLE; } } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { if ( r_dcache_miss_inval.read() ) { if ( r_dcache_miss_word.read() < m_dcache_words-1 ) { std::cout << " Matching coherence request:" << " pop the FIFO, don't update the cache" << std::endl; } else { std::cout << " Matching coherence request:" << " last word : send a cleanup request " << std::endl; } } else { std::cout << " Write one word:" << " address = " << r_dcache_vci_paddr.read() << " / data = " << r_vci_rsp_fifo_dcache.read() << " / way = " << r_dcache_miss_way.read() << " / set = " << r_dcache_miss_set.read() << " / word = " << r_dcache_miss_word.read() << std::endl; } } #endif } // end if rok break; } ///////////////////// case DCACHE_UNC_WAIT: { // external coherence request if ( r_tgt_dcache_req.read() ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = r_dcache_fsm; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_READ_DATA_ILLEGAL_ACCESS; r_mmu_dbvar = dreq.addr; r_vci_rsp_data_error = false; drsp.error = true; drsp.valid = true; r_dcache_fsm = DCACHE_IDLE; break; } else if ( r_vci_rsp_fifo_dcache.rok() ) // data available { vci_rsp_fifo_dcache_get = true; r_dcache_fsm = DCACHE_IDLE; // we acknowledge the processor request if it has not been modified if ( dreq.valid and (dreq.addr == r_dcache_p0_vaddr.read()) ) { drsp.valid = true; drsp.rdata = r_vci_rsp_fifo_dcache.read(); } } break; } //////////////////////////// case DCACHE_WRITE_TLB_DIRTY: // set PTE dirty bit in dtlb { // set dirty bit in dtlb r_dtlb.set_dirty( r_dcache_p2_tlb_way.read(), r_dcache_p2_tlb_set.read() ); // get PTE in dcache uint32_t pte_flags = 0; size_t way; size_t set; size_t word; bool hit = r_dcache.read( r_dcache_p2_pte_paddr.read(), &pte_flags, &way, &set, &word ); #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Set PTE dirty bit in dtlb:" << " paddr = " << r_dcache_p2_pte_paddr.read() << " / tlb_way = " << r_dcache_p2_tlb_way.read() << " / tlb_set = " << r_dcache_p2_tlb_set.read() << std::endl; } #endif assert( hit and "error in DCACHE_WRITE_TLB_DIRTY: the PTE should be in dcache" ); r_dcache_p2_pte_way = way; // register pte way in dcache r_dcache_p2_pte_set = set; // register pte set in dcache; r_dcache_p2_pte_word = word; // register pte word in dcache; r_dcache_p2_pte_flags = pte_flags; // register pte value r_dcache_fsm = DCACHE_WRITE_CACHE_DIRTY; break; } ////////////////////////////// case DCACHE_WRITE_CACHE_DIRTY: // set PTE dirty bit in dcache // request SC tranansaction to CMD FSM { // set PTE dirty bit in dcache r_dcache.write( r_dcache_p2_pte_way.read(), r_dcache_p2_pte_set.read(), r_dcache_p2_pte_word.read(), r_dcache_p2_pte_flags.read() | PTE_D_MASK, 0xF ); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Set PTE dirty bit in dcache:" << " / way = " << r_dcache_p2_pte_way.read() << " / set = " << r_dcache_p2_pte_set.read() << " / word = " << r_dcache_p2_pte_word.read() << std::endl; } #endif // request sc transaction to CMD_FSM r_dcache_vci_sc_req = true; r_dcache_vci_sc_old = r_dcache_p2_pte_flags.read(); r_dcache_vci_sc_new = r_dcache_p2_pte_flags.read() | PTE_D_MASK; r_dcache_fsm = DCACHE_WRITE_SC_WAIT; break; } ////////////////////////// case DCACHE_WRITE_SC_WAIT: // wait completion of SC // if atomic, write completed : return to IDLE state // else, makes an uncacheable read to retry the SC { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = r_dcache_fsm; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_WRITE_PT2_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_p2_vaddr; drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } else if ( r_vci_rsp_fifo_dcache.rok() ) // response available { if ( r_vci_rsp_fifo_dcache.read() == 0 ) // atomic { drsp.valid = true; // acknowledge the initial write r_dcache_fsm = DCACHE_IDLE; } else { r_dcache_vci_paddr = r_dcache_p2_pte_paddr; r_dcache_vci_unc_req = true; r_dcache_vci_unc_be = 0xF; r_dcache_fsm = DCACHE_WRITE_UNC_WAIT; } } break; } /////////////////////////// case DCACHE_WRITE_UNC_WAIT: // wait completion of uncacheable read // in case of success we retry a SC request to // set the dirty bit in the PTE { // external coherence request if ( r_tgt_dcache_req ) { r_dcache_fsm = DCACHE_CC_CHECK; r_dcache_fsm_save = r_dcache_fsm; break; } if ( r_vci_rsp_data_error.read() ) // bus error { r_mmu_detr = MMU_READ_PT2_ILLEGAL_ACCESS; r_mmu_dbvar = r_dcache_p2_vaddr; drsp.valid = true; drsp.error = true; r_dcache_fsm = DCACHE_IDLE; break; } if ( r_vci_rsp_fifo_dcache.rok() ) // PTE available { r_dcache_vci_sc_req = true; r_dcache_vci_sc_old = r_vci_rsp_fifo_dcache.read(); r_dcache_vci_sc_new = r_vci_rsp_fifo_dcache.read() | PTE_D_MASK; r_dcache_fsm = DCACHE_WRITE_SC_WAIT; } break; } ///////////////////// case DCACHE_CC_CHECK: // This state is the entry point for the sub-FSM // handling coherence requests. // If there is a matching pending miss on the modified cache // line this is signaled in the r_dcache_miss inval flip-flop. // If the updated (or invalidated) cache line has copies in TLBs // these TLB copies are invalidated. // The return state is defined in r_dcache_fsm_save { paddr_t paddr = r_tgt_paddr.read(); paddr_t mask = ~((m_dcache_words<<2)-1); if( (r_dcache_fsm_save == DCACHE_MISS_WAIT) and ((r_dcache_vci_paddr.read() & mask) == (paddr & mask)) ) // matching pending miss { r_dcache_miss_inval = true; // signaling the match r_tgt_dcache_req = false; // coherence request completed r_tgt_dcache_rsp = r_tgt_update.read(); // response required if update r_dcache_fsm = r_dcache_fsm_save; #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Coherence request matching a pending miss:" << " address = " << std::hex << paddr << std::endl; } #endif } else // no match { uint32_t rdata; size_t way; size_t set; size_t word; bool hit = r_dcache.read(paddr, &rdata, // unused &way, &set, &word); // unused #ifdef INSTRUMENTATION m_cpt_dcache_data_read++; m_cpt_dcache_dir_read++; #endif r_dcache_cc_way = way; r_dcache_cc_set = set; if ( hit and r_tgt_update.read() ) // hit update { r_dcache_fsm = DCACHE_CC_UPDT; r_dcache_cc_word = r_tgt_word_min.read(); } else if ( hit and not r_tgt_update.read() ) // hit inval { r_dcache_fsm = DCACHE_CC_INVAL; } else // miss can happen { r_tgt_dcache_req = false; r_tgt_dcache_rsp = r_tgt_update.read(); r_dcache_fsm = r_dcache_fsm_save.read(); } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Coherence request received :" << " address = " << std::hex << paddr << std::dec; if ( hit ) { std::cout << " / HIT" << " / way = " << way << " / set = " << set << std::endl; } else { std::cout << " / MISS" << std::endl; } } #endif } break; } ///////////////////// case DCACHE_CC_INVAL: // invalidate one cache line // and test possible copies in TLBs { paddr_t nline; size_t way = r_dcache_cc_way.read(); size_t set = r_dcache_cc_set.read(); bool inval_tlb = false; r_dcache.inval( way, set, &nline ); // possible itlb & dtlb invalidate requests r_dcache_tlb_inval_line = nline; if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) { r_dcache_dtlb_inval_req = true; r_dcache_in_dtlb[way*m_dcache_sets+set] = false; inval_tlb = true; } if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[m_dcache_sets*way+set] ) { r_dcache_itlb_inval_req = true; r_dcache_in_itlb[way*m_dcache_sets+set] = false; inval_tlb = true; } // no valid response until itlb & dtlb invalidated if (inval_tlb ) { r_dcache_fsm = DCACHE_CC_WAIT; } else { r_tgt_dcache_rsp = true; r_tgt_dcache_req = false; r_dcache_fsm = r_dcache_fsm_save.read(); } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Invalidate cache line :" << std::dec << " way = " << way << " / set = " << set << std::endl; } #endif break; } /////////////////// case DCACHE_CC_UPDT: // write one word per cycle (from word_min to word_max) // and test possible copies in TLBs { size_t word = r_dcache_cc_word.read(); size_t way = r_dcache_cc_way.read(); size_t set = r_dcache_cc_set.read(); r_dcache.write( way, set, word, r_tgt_buf[word], r_tgt_be[word] ); #ifdef INSTRUMENTATION m_cpt_dcache_data_write++; #endif r_dcache_cc_word = word + 1; if ( word == r_tgt_word_max.read() ) // last word { // invalidate copies in TLBs if ( (r_mmu_mode.read() & DATA_TLB_MASK) and ( r_dcache_in_itlb[way*m_dcache_sets+set] or r_dcache_in_dtlb[m_dcache_sets*way+set] ) ) { r_dcache_tlb_inval_line = r_tgt_paddr.read() >> (uint32_log2(m_dcache_words)+2); r_dcache_itlb_inval_req = r_dcache_in_itlb[m_dcache_sets*way+set]; r_dcache_in_itlb[way*m_dcache_sets+set] = false; r_dcache_dtlb_inval_req = r_dcache_in_dtlb[m_dcache_sets*way+set]; r_dcache_in_dtlb[way*m_dcache_sets+set] = false; r_dcache_fsm = DCACHE_CC_WAIT; } else { r_tgt_dcache_rsp = true; r_tgt_dcache_req = false; r_dcache_fsm = r_dcache_fsm_save.read(); } } #if DEBUG_DCACHE if ( m_debug_dcache_fsm ) { std::cout << " Update one word :" << std::dec << " way = " << way << " / set = " << set << " / word = " << word << " / value = " << std::hex << r_tgt_buf[word] << std::endl; } #endif break; } //////////////////// case DCACHE_CC_WAIT: // wait completion of TLB invalidate { if ( not r_dcache_itlb_inval_req.read() and not r_dcache_dtlb_inval_req.read() ) { r_tgt_dcache_rsp = true; r_tgt_dcache_req = false; r_dcache_fsm = r_dcache_fsm_save.read(); } } } // end switch r_dcache_fsm //////////////////// save DREQ and DRSP fields for print_trace() //////////////// m_dreq_valid = dreq.valid; m_dreq_addr = dreq.addr; m_dreq_mode = dreq.mode; m_dreq_type = dreq.type; m_dreq_wdata = dreq.wdata; m_dreq_be = dreq.be; m_drsp_valid = drsp.valid; m_drsp_rdata = drsp.rdata; m_drsp_error = drsp.error; ///////////////// wbuf update ////////////////////////////////////////////////////// r_wbuf.update(); //////////////////////////////////////////////////////////////////////////////////// // INVAL DTLB FSM // This FSM works in parallel with the DCACHE FSM. // When the r_dcache_dtlb_inval_req flip-flop is activated by the DCACHE FSM // it scans sequencially all entries in the DTLB, and invalidates the // entries matching the evicted line. // It signals the completion of invalidation by reseting r_dcache_itlb_inval_req. //////////////////////////////////////////////////////////////////////////////////// switch(r_inval_dtlb_fsm) { ///////////////////// case INVAL_DTLB_IDLE: { if ( r_dcache_dtlb_inval_req.read() ) { r_dtlb.reset_bypass(r_dcache_tlb_inval_line.read()); r_inval_dtlb_count = 0; r_inval_dtlb_fsm = INVAL_DTLB_SCAN; #if DEBUG_INVAL_DTLB if ( m_debug_inval_dtlb_fsm ) { std::cout << " Invalidate request for line " << std::hex << r_dcache_tlb_inval_line.read() << std::endl; r_dtlb.print(); } #endif } break; } ///////////////////// case INVAL_DTLB_SCAN: { paddr_t line = r_dcache_tlb_inval_line.read(); // nline size_t way = r_inval_dtlb_count.read()/m_itlb_sets; // way size_t set = r_inval_dtlb_count.read()%m_itlb_sets; // set bool ok = r_dtlb.inval( line, way, set ); #if DEBUG_INVAL_DTLB if ( m_debug_inval_dtlb_fsm ) { std::cout << " " << std::hex << " line = " << line << std::dec << " / set = " << set << " / way = " << way; if ( ok ) std::cout << " / HIT" << std::endl; else std::cout << " / MISS" << std::endl; } #endif r_inval_dtlb_count = r_inval_dtlb_count.read() + 1; if ( r_inval_dtlb_count.read() == (m_dtlb_sets*m_dtlb_ways - 1) ) { r_inval_dtlb_fsm = INVAL_DTLB_IDLE; r_dcache_dtlb_inval_req = false; } break; } } // end switch r_inval_dtlb_fsm /////////// test processor frozen ///////////////////////////////////////////// // The simulation exit if the number of consecutive frozen cycles // is larger than the m_max_frozen_cycles (constructor parameter) if ( (ireq.valid and not irsp.valid) or (dreq.valid and not drsp.valid) ) { m_cpt_frz_cycles++; // used for instrumentation m_cpt_stop_simulation++; // used for debug if ( m_cpt_stop_simulation > m_max_frozen_cycles ) { std::cout << std::dec << "ERROR in CC_VCACHE_WRAPPER " << name() << std::endl << " stop at cycle " << m_cpt_total_cycles << std::endl << " frozen since cycle " << m_cpt_total_cycles - m_max_frozen_cycles << std::endl; exit(1); } } else { m_cpt_stop_simulation = 0; } /////////// execute one iss cycle ///////////////////////////////// { uint32_t it = 0; for (size_t i=0; i<(size_t)iss_t::n_irq; i++) if(p_irq[i].read()) it |= (1<> (vci_param::T-1)) != 0 ) // Write transaction { r_vci_rsp_fsm = RSP_DATA_WRITE; } else if ( p_vci_ini_d.rtrdid.read() == TYPE_INS_MISS ) { r_vci_rsp_fsm = RSP_INS_MISS; } else if ( p_vci_ini_d.rtrdid.read() == TYPE_INS_UNC ) { r_vci_rsp_fsm = RSP_INS_UNC; } else if ( p_vci_ini_d.rtrdid.read() == TYPE_DATA_MISS ) { r_vci_rsp_fsm = RSP_DATA_MISS; } else if ( p_vci_ini_d.rtrdid.read() == TYPE_DATA_UNC ) { r_vci_rsp_fsm = RSP_DATA_UNC; } else { assert(false and "Unexpected VCI response"); } } break; } ////////////////// case RSP_INS_MISS: { if ( p_vci_ini_d.rspval.read() ) { if ( (p_vci_ini_d.rerror.read()&0x1) != 0 ) // error reported { r_vci_rsp_ins_error = true; if ( p_vci_ini_d.reop.read() ) r_vci_rsp_fsm = RSP_IDLE; } else // no error reported { if ( r_vci_rsp_fifo_icache.wok() ) { assert( (r_vci_rsp_cpt.read() < m_icache_words) and "The VCI response packet for instruction miss is too long" ); r_vci_rsp_cpt = r_vci_rsp_cpt.read() + 1; vci_rsp_fifo_icache_put = true, vci_rsp_fifo_icache_data = p_vci_ini_d.rdata.read(); if ( p_vci_ini_d.reop.read() ) { assert( (r_vci_rsp_cpt.read() == m_icache_words - 1) and "The VCI response packet for instruction miss is too short"); r_vci_rsp_fsm = RSP_IDLE; } } } } break; } ///////////////// case RSP_INS_UNC: { if (p_vci_ini_d.rspval.read() ) { assert( p_vci_ini_d.reop.read() and "illegal VCI response packet for uncachable instruction"); if ( (p_vci_ini_d.rerror.read()&0x1) != 0 ) // error reported { r_vci_rsp_ins_error = true; r_vci_rsp_fsm = RSP_IDLE; } else // no error reported { if ( r_vci_rsp_fifo_icache.wok()) { vci_rsp_fifo_icache_put = true; vci_rsp_fifo_icache_data = p_vci_ini_d.rdata.read(); r_vci_rsp_fsm = RSP_IDLE; } } } break; } /////////////////// case RSP_DATA_MISS: { if ( p_vci_ini_d.rspval.read() ) { if ( (p_vci_ini_d.rerror.read()&0x1) != 0 ) // error reported { r_vci_rsp_data_error = true; if ( p_vci_ini_d.reop.read() ) r_vci_rsp_fsm = RSP_IDLE; } else // no error reported { if ( r_vci_rsp_fifo_dcache.wok() ) { assert( (r_vci_rsp_cpt.read() < m_dcache_words) and "The VCI response packet for data miss is too long"); r_vci_rsp_cpt = r_vci_rsp_cpt.read() + 1; vci_rsp_fifo_dcache_put = true, vci_rsp_fifo_dcache_data = p_vci_ini_d.rdata.read(); if ( p_vci_ini_d.reop.read() ) { assert( (r_vci_rsp_cpt.read() == m_dcache_words - 1) and "The VCI response packet for data miss is too short"); r_vci_rsp_fsm = RSP_IDLE; } } } } break; } ////////////////// case RSP_DATA_UNC: { if (p_vci_ini_d.rspval.read() ) { assert( p_vci_ini_d.reop.read() and "illegal VCI response packet for uncachable read data"); if ( (p_vci_ini_d.rerror.read()&0x1) != 0 ) // error reported { r_vci_rsp_data_error = true; r_vci_rsp_fsm = RSP_IDLE; } else // no error reported { if ( r_vci_rsp_fifo_dcache.wok()) { vci_rsp_fifo_dcache_put = true; vci_rsp_fifo_dcache_data = p_vci_ini_d.rdata.read(); r_vci_rsp_fsm = RSP_IDLE; } } } break; } //////////////////// case RSP_DATA_WRITE: { if (p_vci_ini_d.rspval.read()) { assert( p_vci_ini_d.reop.read() and "a VCI response packet must contain one flit for a write transaction"); r_vci_rsp_fsm = RSP_IDLE; uint32_t wbuf_index = p_vci_ini_d.rtrdid.read() - (1<<(vci_param::T-1)); bool cacheable = r_wbuf.completed(wbuf_index); if ( not cacheable ) r_dcache_pending_unc_write = false; if ( (p_vci_ini_d.rerror.read()&0x1) != 0 ) r_iss.setWriteBerr(); } break; } } // end switch r_vci_rsp_fsm //////////////////////////////////////////////////////////////////////////////// // The CLEANUP FSM send the cleanup commands on the coherence network, // and supports simultaneous cleanup transactions, but two simultaneous // transactions mut address different cache lines. // Therefore, the line number is registered in an associative // registration buffer (Content Adressable Memory) by the CLEANUP FSM, // and the corresponding slot (identified by the VCI TRDID field) is cleared // when the cleanup transaction response is received. // It handles cleanup requests from both the DCACHE FSM & ICACHE FSM // with a round robin priority, and can support up to 4 simultaneous // cleanup transactions (4 slots in the registration buffer). // The r_dcache_cleanup_req (or r_icache_cleanup_req) flip-flops are reset // when the command has been sent. // The VCI TRDID field is used to distinguish data/instruction cleanups: // - if data cleanup : TRDID = 2*index + 0 // - if instruction cleanup : TRDID = 2*index + 1 //////////////////////////////////////////////////////////////////////////// switch ( r_cleanup_fsm.read() ) { /////////////////////// case CLEANUP_DATA_IDLE: // dcache has highest priority { size_t index = 0; bool ok; if ( r_dcache_cleanup_req.read() ) // dcache request { ok = r_cleanup_buffer.register_value( r_dcache_cleanup_line.read(), &index ); if ( ok ) // successful registration { r_cleanup_fsm = CLEANUP_DATA_GO; r_cleanup_trdid = index<<1; } } else if ( r_icache_cleanup_req.read() ) // icache request { ok = r_cleanup_buffer.register_value( r_icache_cleanup_line.read(), &index ); if ( ok ) // successful registration { r_cleanup_fsm = CLEANUP_INS_GO; r_cleanup_trdid = (index<<1) + 1; } } break; } ////////////////////// case CLEANUP_INS_IDLE: // icache has highest priority { size_t index = 0; bool ok; if ( r_icache_cleanup_req.read() ) // icache request { ok = r_cleanup_buffer.register_value( r_icache_cleanup_line.read(), &index ); if ( ok ) // successful registration { r_cleanup_fsm = CLEANUP_INS_GO; r_cleanup_trdid = (index<<1) + 1; } } else if ( r_dcache_cleanup_req.read() ) // dcache request { ok = r_cleanup_buffer.register_value( r_dcache_cleanup_line.read(), &index ); if ( ok ) // successful registration { r_cleanup_fsm = CLEANUP_DATA_GO; r_cleanup_trdid = index<<1; } } break; } ///////////////////// case CLEANUP_DATA_GO: { if ( p_vci_ini_c.cmdack.read() ) { r_dcache_cleanup_req = false; r_cleanup_fsm = CLEANUP_INS_IDLE; #if DEBUG_CLEANUP if ( m_debug_cleanup_fsm ) { std::cout << " Cleanup request for icache:" << std::hex << " address = " << (r_dcache_cleanup_line.read()*m_dcache_words*4) << " / trdid = " << r_cleanup_trdid.read() << std::endl; } #endif } } //////////////////////// case CLEANUP_INS_GO: { if ( p_vci_ini_c.cmdack.read() ) { r_icache_cleanup_req = false; r_cleanup_fsm = CLEANUP_DATA_IDLE; #if DEBUG_CLEANUP if ( m_debug_cleanup_fsm ) { std::cout << " Cleanup request for dcache:" << std::hex << " address = " << (r_icache_cleanup_line.read()*m_icache_words*4) << " / trdid = " << r_cleanup_trdid.read() << std::endl; } #endif } } } // end switch CLEANUP FSM //////////////// Handling cleanup responses ////////////////// if ( p_vci_ini_c.rspval.read() ) // valid response { r_cleanup_buffer.cancel_index( p_vci_ini_c.rtrdid.read() >> 1); } ///////////////// Response FIFOs update ////////////////////// r_vci_rsp_fifo_icache.update(vci_rsp_fifo_icache_get, vci_rsp_fifo_icache_put, vci_rsp_fifo_icache_data); r_vci_rsp_fifo_dcache.update(vci_rsp_fifo_dcache_get, vci_rsp_fifo_dcache_put, vci_rsp_fifo_dcache_data); } // end transition() /////////////////////// tmpl(void)::genMoore() /////////////////////// { //////////////////////////////////////////////////////////////// // VCI initiator command on the coherence network (cleanup) // it depends on the CLEANUP FSM state paddr_t address; if ( r_cleanup_fsm.read() == CLEANUP_DATA_GO ) address = r_dcache_cleanup_line.read()*m_dcache_words*4; else if ( r_cleanup_fsm.read() == CLEANUP_INS_GO ) address = r_icache_cleanup_line.read()*m_icache_words*4; else address = 0; p_vci_ini_c.cmdval = ((r_cleanup_fsm.read() == CLEANUP_DATA_GO) or (r_cleanup_fsm.read() == CLEANUP_INS_GO) ); p_vci_ini_c.address = address; p_vci_ini_c.wdata = 0; p_vci_ini_c.be = 0xF; p_vci_ini_c.plen = 4; p_vci_ini_c.cmd = vci_param::CMD_WRITE; p_vci_ini_c.trdid = r_cleanup_trdid.read(); p_vci_ini_c.pktid = 0; p_vci_ini_c.srcid = m_srcid_c; p_vci_ini_c.cons = false; p_vci_ini_c.wrap = false; p_vci_ini_c.contig = false; p_vci_ini_c.clen = 0; p_vci_ini_c.cfixed = false; p_vci_ini_c.eop = true; ///////////////////////////////////////////////////////////////// // VCI initiator response on the coherence network (cleanup) // We always consume the response, and we don't use it. p_vci_ini_c.rspack = true; ///////////////////////////////////////////////////////////////// // VCI initiator command on the direct network // it depends on the CMD FSM state p_vci_ini_d.pktid = 0; p_vci_ini_d.srcid = m_srcid_d; p_vci_ini_d.cons = (r_vci_cmd_fsm.read() == CMD_DATA_SC); p_vci_ini_d.contig = not (r_vci_cmd_fsm.read() == CMD_DATA_SC); p_vci_ini_d.wrap = false; p_vci_ini_d.clen = 0; p_vci_ini_d.cfixed = false; switch ( r_vci_cmd_fsm.read() ) { case CMD_IDLE: p_vci_ini_d.cmdval = false; p_vci_ini_d.address = 0; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0; p_vci_ini_d.trdid = 0; p_vci_ini_d.plen = 0; p_vci_ini_d.cmd = vci_param::CMD_NOP; p_vci_ini_d.eop = false; break; case CMD_INS_MISS: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_icache_vci_paddr.read() & m_icache_yzmask; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = TYPE_INS_MISS; p_vci_ini_d.plen = m_icache_words<<2; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_INS_UNC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_icache_vci_paddr.read() & ~0x3; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = TYPE_INS_UNC; p_vci_ini_d.plen = 4; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_MISS: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & m_dcache_yzmask; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = TYPE_DATA_MISS; p_vci_ini_d.plen = m_dcache_words << 2; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_UNC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; p_vci_ini_d.wdata = 0; p_vci_ini_d.be = r_dcache_vci_unc_be.read(); p_vci_ini_d.trdid = TYPE_DATA_UNC; p_vci_ini_d.plen = 4; p_vci_ini_d.cmd = vci_param::CMD_READ; p_vci_ini_d.eop = true; break; case CMD_DATA_WRITE: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_wbuf.getAddress(r_vci_cmd_cpt.read()) & ~0x3; p_vci_ini_d.wdata = r_wbuf.getData(r_vci_cmd_cpt.read()); p_vci_ini_d.be = r_wbuf.getBe(r_vci_cmd_cpt.read()); p_vci_ini_d.trdid = r_wbuf.getIndex() + (1<<(vci_param::T-1)); p_vci_ini_d.plen = (r_vci_cmd_max.read() - r_vci_cmd_min.read() + 1) << 2; p_vci_ini_d.cmd = vci_param::CMD_WRITE; p_vci_ini_d.eop = (r_vci_cmd_cpt.read() == r_vci_cmd_max.read()); break; case CMD_DATA_SC: p_vci_ini_d.cmdval = true; p_vci_ini_d.address = r_dcache_vci_paddr.read() & ~0x3; if ( r_vci_cmd_cpt.read() == 0 ) p_vci_ini_d.wdata = r_dcache_vci_sc_old.read(); else p_vci_ini_d.wdata = r_dcache_vci_sc_new.read(); p_vci_ini_d.be = 0xF; p_vci_ini_d.trdid = TYPE_DATA_UNC; p_vci_ini_d.plen = 8; p_vci_ini_d.cmd = vci_param::CMD_STORE_COND; p_vci_ini_d.eop = (r_vci_cmd_cpt.read() == 1); break; } // end switch r_vci_cmd_fsm ////////////////////////////////////////////////////////// // VCI initiator response on the direct network // it depends on the VCI RSP state switch (r_vci_rsp_fsm.read() ) { case RSP_DATA_WRITE : p_vci_ini_d.rspack = true; break; case RSP_INS_MISS : p_vci_ini_d.rspack = r_vci_rsp_fifo_icache.wok(); break; case RSP_INS_UNC : p_vci_ini_d.rspack = r_vci_rsp_fifo_icache.wok(); break; case RSP_DATA_MISS : p_vci_ini_d.rspack = r_vci_rsp_fifo_dcache.wok(); break; case RSP_DATA_UNC : p_vci_ini_d.rspack = r_vci_rsp_fifo_dcache.wok(); break; case RSP_IDLE : p_vci_ini_d.rspack = false; break; } // end switch r_vci_rsp_fsm //////////////////////////////////////////////////////////////// // VCI target command and response on the coherence network switch ( r_tgt_fsm.read() ) { case TGT_IDLE: case TGT_UPDT_WORD: case TGT_UPDT_DATA: p_vci_tgt_c.cmdack = true; p_vci_tgt_c.rspval = false; break; case TGT_RSP_BROADCAST: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_icache_req.read() and not r_tgt_dcache_req.read() and ( r_tgt_icache_rsp.read() or r_tgt_dcache_rsp.read() ); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_RSP_ICACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_icache_req.read() and r_tgt_icache_rsp.read(); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_RSP_DCACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = not r_tgt_dcache_req.read() and r_tgt_dcache_rsp.read(); p_vci_tgt_c.rsrcid = r_tgt_srcid.read(); p_vci_tgt_c.rpktid = r_tgt_pktid.read(); p_vci_tgt_c.rtrdid = r_tgt_trdid.read(); p_vci_tgt_c.rdata = 0; p_vci_tgt_c.rerror = 0; p_vci_tgt_c.reop = true; break; case TGT_REQ_BROADCAST: case TGT_REQ_ICACHE: case TGT_REQ_DCACHE: p_vci_tgt_c.cmdack = false; p_vci_tgt_c.rspval = false; break; } // end switch TGT_FSM } // end genMoore }} // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4