/* * hal_gpt.c - implementation of the Generic Page Table API for TSAR-MIPS32 * * Author Alain Greiner (2016,2017,2018,2019) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH.is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH.is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH.; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include //////////////////////////////////////////////////////////////////////////////////////// // This define the masks for the TSAR MMU PTE attributes (from TSAR MMU specification) //////////////////////////////////////////////////////////////////////////////////////// #define TSAR_PTE_MAPPED 0x80000000 #define TSAR_PTE_SMALL 0x40000000 #define TSAR_PTE_LOCAL 0x20000000 #define TSAR_PTE_REMOTE 0x10000000 #define TSAR_PTE_CACHABLE 0x08000000 #define TSAR_PTE_WRITABLE 0x04000000 #define TSAR_PTE_EXECUTABLE 0x02000000 #define TSAR_PTE_USER 0x01000000 #define TSAR_PTE_GLOBAL 0x00800000 #define TSAR_PTE_DIRTY 0x00400000 #define TSAR_PTE_COW 0x00000001 // only for small pages #define TSAR_PTE_SWAP 0x00000004 // only for small pages #define TSAR_PTE_LOCKED 0x00000008 // only for small pages //////////////////////////////////////////////////////////////////////////////////////// // TSAR MMU related macros (from the TSAR MMU specification) // - IX1 on 11 bits // - IX2 on 9 bits // - PPN on 28 bits //////////////////////////////////////////////////////////////////////////////////////// #define TSAR_MMU_IX1_WIDTH 11 #define TSAR_MMU_IX2_WIDTH 9 #define TSAR_MMU_PPN_WIDTH 28 #define TSAR_MMU_PTE1_ATTR_MASK 0xFFC00000 #define TSAR_MMU_PTE1_PPN_MASK 0x0007FFFF #define TSAR_MMU_IX1_FROM_VPN( vpn ) ((vpn >> 9) & 0x7FF) #define TSAR_MMU_IX2_FROM_VPN( vpn ) (vpn & 0x1FF) #define TSAR_MMU_PPN2_FROM_PTE1( pte1 ) (pte1 & 0x0FFFFFFF) #define TSAR_MMU_PPN1_FROM_PTE1( pte1 ) ((pte1 & 0x0007FFFF)<<9) #define TSAR_MMU_ATTR_FROM_PTE1( pte1 ) (pte1 & 0xFFC00000) #define TSAR_MMU_PPN_FROM_PTE2( pte2 ) (pte2 & 0x0FFFFFFF) #define TSAR_MMU_ATTR_FROM_PTE2( pte2 ) (pte2 & 0xFFC000FF) /////////////////////////////////////////////////////////////////////////////////////// // This static function translates the GPT attributes to the TSAR attributes /////////////////////////////////////////////////////////////////////////////////////// static inline uint32_t gpt2tsar( uint32_t gpt_attr ) { uint32_t tsar_attr = 0; if( gpt_attr & GPT_MAPPED ) tsar_attr |= TSAR_PTE_MAPPED; if( gpt_attr & GPT_SMALL ) tsar_attr |= TSAR_PTE_SMALL; if( gpt_attr & GPT_WRITABLE ) tsar_attr |= TSAR_PTE_WRITABLE; if( gpt_attr & GPT_EXECUTABLE ) tsar_attr |= TSAR_PTE_EXECUTABLE; if( gpt_attr & GPT_CACHABLE ) tsar_attr |= TSAR_PTE_CACHABLE; if( gpt_attr & GPT_USER ) tsar_attr |= TSAR_PTE_USER; if( gpt_attr & GPT_DIRTY ) tsar_attr |= TSAR_PTE_DIRTY; if( gpt_attr & GPT_ACCESSED ) tsar_attr |= TSAR_PTE_LOCAL; if( gpt_attr & GPT_GLOBAL ) tsar_attr |= TSAR_PTE_GLOBAL; if( gpt_attr & GPT_COW ) tsar_attr |= TSAR_PTE_COW; if( gpt_attr & GPT_SWAP ) tsar_attr |= TSAR_PTE_SWAP; if( gpt_attr & GPT_LOCKED ) tsar_attr |= TSAR_PTE_LOCKED; return tsar_attr; } /////////////////////////////////////////////////////////////////////////////////////// // This static function translates the TSAR attributes to the GPT attributes /////////////////////////////////////////////////////////////////////////////////////// static inline uint32_t tsar2gpt( uint32_t tsar_attr ) { uint32_t gpt_attr = 0; if( tsar_attr & TSAR_PTE_MAPPED ) gpt_attr |= GPT_MAPPED; if( tsar_attr & TSAR_PTE_MAPPED ) gpt_attr |= GPT_READABLE; if( tsar_attr & TSAR_PTE_SMALL ) gpt_attr |= GPT_SMALL; if( tsar_attr & TSAR_PTE_WRITABLE ) gpt_attr |= GPT_WRITABLE; if( tsar_attr & TSAR_PTE_EXECUTABLE ) gpt_attr |= GPT_EXECUTABLE; if( tsar_attr & TSAR_PTE_CACHABLE ) gpt_attr |= GPT_CACHABLE; if( tsar_attr & TSAR_PTE_USER ) gpt_attr |= GPT_USER; if( tsar_attr & TSAR_PTE_DIRTY ) gpt_attr |= GPT_DIRTY; if( tsar_attr & TSAR_PTE_LOCAL ) gpt_attr |= GPT_ACCESSED; if( tsar_attr & TSAR_PTE_REMOTE ) gpt_attr |= GPT_ACCESSED; if( tsar_attr & TSAR_PTE_GLOBAL ) gpt_attr |= GPT_GLOBAL; if( tsar_attr & TSAR_PTE_COW ) gpt_attr |= GPT_COW; if( tsar_attr & TSAR_PTE_SWAP ) gpt_attr |= GPT_SWAP; if( tsar_attr & TSAR_PTE_LOCKED ) gpt_attr |= GPT_LOCKED; return gpt_attr; } /////////////////////////////////////////////////////////////////////////////////////// // The blocking hal_gpt_lock_pte() function implements a busy-waiting policy to get // exclusive access to a specific GPT entry. // - when non zero, the following variable defines the max number of iterations // in the busy waiting loop. // - when zero, the watchdog mechanism is deactivated. /////////////////////////////////////////////////////////////////////////////////////// #define GPT_LOCK_WATCHDOG 100000 ///////////////////////////////////// error_t hal_gpt_create( gpt_t * gpt ) { void * base; thread_t * this = CURRENT_THREAD; #if DEBUG_HAL_GPT_CREATE uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_CREATE < cycle ) printk("\n[%s] thread[%x,%x] enter / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // check page size assert( (CONFIG_PPM_PAGE_SIZE == 4096) , "the TSAR page size must be 4 Kbytes\n" ); // allocates 2 physical pages for PT1 kmem_req_t req; req.type = KMEM_PPM; req.order = 1; // 2 small pages req.flags = AF_KERNEL | AF_ZERO; base = kmem_alloc( &req ); if( base == NULL ) { printk("\n[PANIC] in %s : no memory for PT1 / process %x / cluster %x\n", __FUNCTION__, this->process->pid, local_cxy ); return ENOMEM; } // initialze the GPT descriptor gpt->ptr = base; gpt->pte1_wait_events = 0; gpt->pte1_wait_iters = 0; gpt->pte2_wait_events = 0; gpt->pte2_wait_iters = 0; #if DEBUG_HAL_GPT_CREATE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_CREATE < cycle ) printk("\n[%s] thread[%x,%x] exit / pt1_base %x / pt1_ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, base, ppm_base2ppn( XPTR( local_cxy , base ) ), cycle ); #endif return 0; } // end hal_gpt_create() /////////////////////////////////// void hal_gpt_destroy( gpt_t * gpt ) { uint32_t ix1; uint32_t ix2; uint32_t * pt1; uint32_t pte1; ppn_t pt2_ppn; uint32_t * pt2; uint32_t attr; kmem_req_t req; thread_t * this = CURRENT_THREAD; #if DEBUG_HAL_GPT_DESTROY uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] enter / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // get pointer on PT1 pt1 = (uint32_t *)gpt->ptr; // scan the PT1 for( ix1 = 0 ; ix1 < 2048 ; ix1++ ) { pte1 = pt1[ix1]; if( (pte1 & TSAR_PTE_MAPPED) != 0 ) // PTE1 mapped { if( (pte1 & TSAR_PTE_SMALL) == 0 ) // BIG page { printk("\n[WARNING] %s : valid PTE1 / thread[%x,%x] / ix1 %x\n", __FUNCTION__, this->process->pid, this->trdid, ix1 ); } else // PT2 exist { // get local pointer on PT2 pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // scan the PT2 for( ix2 = 0 ; ix2 < 512 ; ix2++ ) { attr = TSAR_MMU_ATTR_FROM_PTE2( pt2[2 * ix2] ); if( (attr & TSAR_PTE_MAPPED) != 0 ) // PTE2 mapped { printk("\n[WARNING] %s : valid PTE2 / thread[%x,%x] / ix1 %x / ix2 %x\n", __FUNCTION__, this->process->pid, this->trdid, ix1, ix2 ); } } // release the page allocated for the PT2 req.type = KMEM_PPM; req.ptr = pt2; kmem_free( &req ); } } } // release the PT1 req.type = KMEM_PPM; req.ptr = pt1; kmem_free( &req ); #if DEBUG_HAL_GPT_DESTROY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] exit / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif } // end hal_gpt_destroy() //////////////////////////////////////////// error_t hal_gpt_lock_pte( xptr_t gpt_xp, vpn_t vpn, uint32_t * attr, ppn_t * ppn ) { uint32_t * pt1; // local pointer on PT1 base xptr_t pte1_xp; // extended pointer on PT1[x1] entry uint32_t pte1; // value of PT1[x1] entry kmem_req_t req; // kmem request fro PT2 allocation uint32_t * pt2; // local pointer on PT2 base ppn_t pt2_ppn; // PPN of page containing PT2 xptr_t pte2_xp; // extended pointer on PT2[ix2].attr uint32_t pte2_attr; // PT2[ix2].attr current value uint32_t pte2_ppn; // PT2[ix2].ppn current value bool_t success; // used for both PTE1 and PTE2 mapping uint32_t count; // watchdog uint32_t sr_save; // for critical section // get cluster and local pointer on GPT cxy_t gpt_cxy = GET_CXY( gpt_xp ); gpt_t * gpt_ptr = GET_PTR( gpt_xp ); #if DEBUG_HAL_GPT_LOCK_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] thread[%x,%x] enters / vpn %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy, cycle ); #endif // get indexes in PTI & PT2 from vpn uint32_t ix1 = TSAR_MMU_IX1_FROM_VPN( vpn ); uint32_t ix2 = TSAR_MMU_IX2_FROM_VPN( vpn ); // get local pointer on PT1 pt1 = hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); // build extended pointer on PTE1 == PT1[ix1] pte1_xp = XPTR( gpt_cxy , &pt1[ix1] ); // get current PT1 entry value pte1 = hal_remote_l32( pte1_xp ); // If PTE1 is unmapped, the calling thread try to map this PTE1. // To prevent multiple concurrent PT2 allocations, only the thread that // successfully locked the PTE1 allocates a new PT2 and updates the PTE1. // All other threads simply wait until the missing PTE1 is mapped. if( (pte1 & TSAR_PTE_MAPPED) == 0 ) { if( (pte1 & TSAR_PTE_LOCKED) == 0 ) { // try to atomically lock the PTE1 success = hal_remote_atomic_cas( pte1_xp, pte1, TSAR_PTE_LOCKED ); } else { success = false; } if( success ) // winner thread allocates one 4 Kbytes page for PT2 { // enter critical section hal_disable_irq( &sr_save ); req.type = KMEM_PPM; req.order = 0; req.flags = AF_ZERO | AF_KERNEL; pt2 = kmem_remote_alloc( gpt_cxy , &req ); if( pt2 == NULL ) { printk("\n[ERROR] in %s : cannot allocate memory for PT2 in cluster %d\n", __FUNCTION__, gpt_cxy ); return -1; } // get the PT2 PPN pt2_ppn = ppm_base2ppn( XPTR( gpt_cxy , pt2 ) ); // build PTE1 pte1 = TSAR_PTE_MAPPED | TSAR_PTE_SMALL | pt2_ppn; // set the PTE1 value in PT1 / this unlocks the PTE1 hal_remote_s32( pte1_xp , pte1 ); hal_fence(); // exit critical section hal_restore_irq( sr_save ); #if DEBUG_HAL_GPT_LOCK_PTE // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] PTE1 unmapped : winner thread[%x,%x] allocates a PT2 for vpn %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy ); #endif } else // other threads wait until PTE1 mapped by the winner { #if DEBUG_HAL_GPT_LOCK_PTE // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] PTE1 unmapped : loser thread[%x,%x] wait PTE1 for vpn %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy ); #endif count = 0; do { // get current pte1 value pte1 = hal_remote_l32( pte1_xp ); // check iterations number if( count > GPT_LOCK_WATCHDOG ) { thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); printk("\n[PANIC] in %s for PTE1 after %d iterations\n" " thread[%x,%x] / vpn %x / cluster %x / pte1 %x / cycle %d\n", __FUNCTION__, count, this->process->pid, this->trdid, vpn, gpt_cxy, pte1, cycle ); xptr_t process_xp = cluster_get_process_from_pid_in_cxy( gpt_cxy, this->process->pid ); hal_vmm_display( process_xp , true ); hal_core_sleep(); } // increment watchdog count++; } while( (pte1 & TSAR_PTE_MAPPED) == 0 ); #if CONFIG_INSTRUMENTATION_GPT hal_remote_atomic_add( XPTR( gpt_cxy , &gpt_ptr->pte1_wait_events ) , 1 ); hal_remote_atomic_add( XPTR( gpt_cxy , &gpt_ptr->pte1_wait_iters ) , count ); #endif #if DEBUG_HAL_GPT_LOCK_PTE // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] PTE1 unmapped : loser thread[%x,%x] get PTE1 for vpn %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy ); #endif } } // end if pte1 unmapped // This code is executed by all calling threads // check PTE1 : only small and mapped pages can be locked assert( (pte1 & (TSAR_PTE_SMALL | TSAR_PTE_MAPPED)) , "cannot lock a big or unmapped page\n"); #if DEBUG_HAL_GPT_LOCK_PTE // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] thread[%x,%x] get pte1 %x for vpn %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, pte1, vpn, gpt_cxy ); #endif // get pointer on PT2 base pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // build extended pointers on PT2[ix2].attr pte2_xp = XPTR( gpt_cxy , &pt2[2 * ix2] ); // initialize external loop watchdog count = 0; // in this busy waiting loop, each thread try to atomically // lock the PTE2, after checking that the PTE2 is not locked do { // get current value of pte2_attr pte2_attr = hal_remote_l32( pte2_xp ); // check loop watchdog if( count > GPT_LOCK_WATCHDOG ) { thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); printk("\n[PANIC] in %s for PTE2 after %d iterations\n" " thread[%x,%x] / vpn %x / cluster %x / pte2_attr %x / cycle %d\n", __FUNCTION__, count, this->process->pid, this->trdid, vpn, gpt_cxy, pte2_attr, cycle ); xptr_t process_xp = cluster_get_process_from_pid_in_cxy( gpt_cxy, this->process->pid ); hal_vmm_display( process_xp , true ); hal_core_sleep(); } // increment loop watchdog count++; if( (pte2_attr & TSAR_PTE_LOCKED) == 0 ) { // try to atomically set the TSAR_PTE_LOCKED attribute success = hal_remote_atomic_cas( pte2_xp, pte2_attr, (pte2_attr | TSAR_PTE_LOCKED) ); } else { success = false; } } while( success == false ); #if CONFIG_INSTRUMENTATION_GPT hal_remote_atomic_add( XPTR( gpt_cxy , &gpt_ptr->pte2_wait_events ) , 1 ); hal_remote_atomic_add( XPTR( gpt_cxy , &gpt_ptr->pte2_wait_iters ) , count ); #endif // get PTE2.ppn pte2_ppn = hal_remote_l32( pte2_xp + 4 ); #if DEBUG_HAL_GPT_LOCK_PTE cycle = (uint32_t)hal_get_cycles(); // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc1fff) && (gpt_cxy == 0x1) ) printk("\n[%s] thread[%x,%x] success / vpn %x in cluster %x / attr %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy, pte2_attr, pte2_ppn, cycle ); #endif // return PPN and GPT attributes *ppn = pte2_ppn & ((1<ptr ) ); // build extended pointer on PTE1 == PT1[ix1] pte1_xp = XPTR( gpt_cxy , &pt1[ix1] ); // get current pte1 value pte1 = hal_remote_l32( pte1_xp ); assert( ((pte1 & TSAR_PTE_MAPPED) != 0), "PTE1 for vpn %x in cluster %x is unmapped / pte1 = %x\n", vpn, gpt_cxy, pte1 ); assert( ((pte1 & TSAR_PTE_SMALL ) != 0), "PTE1 for vpn %x in cluster %x is not small / pte1 = %x\n", vpn, gpt_cxy, pte1 ); // get pointer on PT2 base pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // build extended pointers on PT2[ix2].attr pte2_xp = XPTR( gpt_cxy , &pt2[2 * ix2] ); // get PT2[ix2].attr pte2_attr = hal_remote_l32( pte2_xp ); assert( ((pte2_attr & TSAR_PTE_LOCKED) != 0), "PTE2 for vpn %x in cluster %x is unlocked / pte2_attr = %x\n", vpn, gpt_cxy, pte2_attr ); // reset TSAR_PTE_LOCKED attribute hal_remote_s32( pte2_xp , pte2_attr & ~TSAR_PTE_LOCKED ); #if DEBUG_HAL_GPT_LOCK_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); // if( DEBUG_HAL_GPT_LOCK_PTE < cycle ) if( (vpn == 0xc5fff) && (gpt_cxy == 0x1) ) printk("\n[%s] thread[%x,%x] unlocks vpn %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, gpt_cxy, cycle ); #endif } // end hal_gpt_unlock_pte() /////////////////////////////////////// void hal_gpt_set_pte( xptr_t gpt_xp, vpn_t vpn, uint32_t attr, ppn_t ppn ) { cxy_t gpt_cxy; // target GPT cluster gpt_t * gpt_ptr; // target GPT local pointer uint32_t * pt1; // local pointer on PT1 base xptr_t pte1_xp; // extended pointer on PT1 entry uint32_t pte1; // PT1 entry value if PTE1 uint32_t * pt2; // local pointer on PT2 base ppn_t pt2_ppn; // PPN of PT2 xptr_t pte2_attr_xp; // extended pointer on PT2[ix2].attr xptr_t pte2_ppn_xp; // extended pointer on PT2[ix2].ppn uint32_t pte2_attr; // current value of PT2[ix2].attr uint32_t ix1; // index in PT1 uint32_t ix2; // index in PT2 uint32_t tsar_attr; // PTE attributes for TSAR MMU uint32_t small; // requested PTE is for a small page // get cluster and local pointer on GPT gpt_cxy = GET_CXY( gpt_xp ); gpt_ptr = GET_PTR( gpt_xp ); // compute indexes in PT1 and PT2 ix1 = TSAR_MMU_IX1_FROM_VPN( vpn ); ix2 = TSAR_MMU_IX2_FROM_VPN( vpn ); #if DEBUG_HAL_GPT_SET_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_SET_PTE < cycle ) printk("\n[%s] thread[%x,%x] enter gpt (%x,%x) / vpn %x / attr %x / ppn %x\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, &gpt_ptr->ptr, vpn, attr, ppn ); #endif small = attr & GPT_SMALL; // get local pointer on PT1 pt1 = hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); // compute tsar attributes from generic attributes tsar_attr = gpt2tsar( attr ); // build extended pointer on PTE1 = PT1[ix1] pte1_xp = XPTR( gpt_cxy , &pt1[ix1] ); // get current pte1 value pte1 = hal_remote_l32( pte1_xp ); if( small == 0 ) ///////////////// map a big page in PT1 { // check PT1 entry not mapped assert( (pte1 == 0) , "try to set a big page in an already mapped PTE1\n" ); // check VPN aligned assert( (ix2 == 0) , "illegal vpn for a big page\n" ); // check PPN aligned assert( ((ppn & 0x1FF) == 0) , "illegal ppn for a big page\n" ); // set the PTE1 value in PT1 pte1 = (tsar_attr & TSAR_MMU_PTE1_ATTR_MASK) | ((ppn >> 9) & TSAR_MMU_PTE1_PPN_MASK); hal_remote_s32( pte1_xp , pte1 ); hal_fence(); #if DEBUG_HAL_GPT_SET_PTE if( DEBUG_HAL_GPT_SET_PTE < cycle ) printk("\n[%s] thread[%x,%x] map PTE1 / cxy %x / ix1 %x / pt1 %x / pte1 %x\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, ix1, pt1, pte1 ); #endif } else ///////////////// map a small page in PT2 { // PTE1 must be mapped because PTE2 must be locked assert( (pte1 & TSAR_PTE_MAPPED), "PTE1 for vpn %x in cluster %x must be mapped / pte1 = %x\n", vpn, gpt_cxy, pte1 ); // get PT2 base pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // build extended pointers on PT2[ix2].attr and PT2[ix2].ppn pte2_attr_xp = XPTR( gpt_cxy , &pt2[2 * ix2] ); pte2_ppn_xp = XPTR( gpt_cxy , &pt2[2 * ix2 + 1] ); // get current value of PTE2.attr pte2_attr = hal_remote_l32( pte2_attr_xp ); // PTE2 must be locked assert( (pte2_attr & TSAR_PTE_LOCKED), "PTE2 for vpn %x in cluster %x must be locked / pte2_attr = %x\n", vpn, gpt_cxy, pte2_attr ); // set PTE2 in PT2 (in this order) hal_remote_s32( pte2_ppn_xp , ppn ); hal_fence(); hal_remote_s32( pte2_attr_xp , tsar_attr ); hal_fence(); #if DEBUG_HAL_GPT_SET_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_SET_PTE < cycle ) printk("\n[%s] thread[%x,%x] map PTE2 / cxy %x / ix2 %x / pt2 %x / attr %x / ppn %x\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, ix2, pt2, tsar_attr, ppn ); #endif } } // end of hal_gpt_set_pte() /////////////////////////////////////// void hal_gpt_reset_pte( xptr_t gpt_xp, vpn_t vpn ) { cxy_t gpt_cxy; // target GPT cluster gpt_t * gpt_ptr; // target GPT local pointer uint32_t ix1; // index in PT1 uint32_t ix2; // index in PT2 uint32_t * pt1; // PT1 base address xptr_t pte1_xp; // extended pointer on PT1[ix1] uint32_t pte1; // PT1 entry value uint32_t * pt2; // PT2 base address ppn_t pt2_ppn; // PPN of PT2 xptr_t pte2_attr_xp; // extended pointer on PT2[ix2].attr xptr_t pte2_ppn_xp; // extended pointer on PT2[ix2].ppn // get cluster and local pointer on GPT gpt_cxy = GET_CXY( gpt_xp ); gpt_ptr = GET_PTR( gpt_xp ); // get ix1 & ix2 indexes ix1 = TSAR_MMU_IX1_FROM_VPN( vpn ); ix2 = TSAR_MMU_IX2_FROM_VPN( vpn ); // get local pointer on PT1 base pt1 = hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); // build extended pointer on PTE1 = PT1[ix1] pte1_xp = XPTR( gpt_cxy , &pt1[ix1] ); // get current PTE1 value pte1 = hal_remote_l32( pte1_xp ); if( (pte1 & TSAR_PTE_MAPPED) == 0 ) // PTE1 unmapped => do nothing { return; } if( (pte1 & TSAR_PTE_SMALL) == 0 ) // it's a PTE1 => unmap it from PT1 { hal_remote_s32( pte1_xp , 0 ); hal_fence(); #if DEBUG_HAL_GPT_RESET_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_RESET_PTE < cycle ) printk("\n[%s] thread[%x,%x] unmap PTE1 / cxy %x / vpn %x / ix1 %x\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, vpn, ix1 ); #endif return; } else // it's a PTE2 => unmap it from PT2 { // get PT2 base pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // build extended pointer on PT2[ix2].attr and PT2[ix2].ppn pte2_attr_xp = XPTR( gpt_cxy , &pt2[2 * ix2] ); pte2_ppn_xp = XPTR( gpt_cxy , &pt2[2 * ix2 + 1] ); // unmap the PTE2 hal_remote_s32( pte2_attr_xp , 0 ); hal_fence(); hal_remote_s32( pte2_ppn_xp , 0 ); hal_fence(); #if DEBUG_HAL_GPT_RESET_PTE thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_GPT_RESET_PTE < cycle ) printk("\n[%s] thread[%x,%x] unmap PTE2 / cxy %x / vpn %x / ix2 %x\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, vpn, ix2 ); #endif return; } } // end hal_gpt_reset_pte() //////////////////////////////////////// void hal_gpt_get_pte( xptr_t gpt_xp, vpn_t vpn, uint32_t * attr, ppn_t * ppn ) { uint32_t * pt1; // local pointer on PT1 base uint32_t pte1; // PTE1 value uint32_t * pt2; // local pointer on PT2 base ppn_t pt2_ppn; // PPN of page containing the PT2 xptr_t pte2_attr_xp; // extended pointer on PT2[ix2].attr xptr_t pte2_ppn_xp; // extended pointer on PT2[ix2].ppn uint32_t pte2_attr; // current value of PT2[ix2].attr ppn_t pte2_ppn; // current value of PT2[ix2].ppn // get cluster and local pointer on GPT cxy_t gpt_cxy = GET_CXY( gpt_xp ); gpt_t * gpt_ptr = GET_PTR( gpt_xp ); // compute indexes in PT1 and PT2 uint32_t ix1 = TSAR_MMU_IX1_FROM_VPN( vpn ); uint32_t ix2 = TSAR_MMU_IX2_FROM_VPN( vpn ); // get PT1 base pt1 = hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); // get pte1 pte1 = hal_remote_l32( XPTR( gpt_cxy , &pt1[ix1] ) ); // check PTE1 mapped if( (pte1 & TSAR_PTE_MAPPED) == 0 ) // PTE1 unmapped { *attr = 0; *ppn = 0; return; } // access GPT if( (pte1 & TSAR_PTE_SMALL) == 0 ) // it's a PTE1 { // get PPN & ATTR *attr = tsar2gpt( TSAR_MMU_ATTR_FROM_PTE1( pte1 ) ); *ppn = TSAR_MMU_PPN1_FROM_PTE1( pte1 ) | (vpn & ((1<process->pid, this->trdid, src_cxy, local_cxy, cycle ); #endif // get remote src_pt1 and local dst_pt1 src_pt1 = (uint32_t *)hal_remote_lpt( XPTR( src_cxy , &src_gpt->ptr ) ); dst_pt1 = (uint32_t *)dst_gpt->ptr; // check src_pt1 and dst_pt1 existence assert( (src_pt1 != NULL) , "src_pt1 does not exist\n"); assert( (dst_pt1 != NULL) , "dst_pt1 does not exist\n"); // compute SRC indexes src_ix1 = TSAR_MMU_IX1_FROM_VPN( src_vpn ); src_ix2 = TSAR_MMU_IX2_FROM_VPN( src_vpn ); // compute DST indexes dst_ix1 = TSAR_MMU_IX1_FROM_VPN( dst_vpn ); dst_ix2 = TSAR_MMU_IX2_FROM_VPN( dst_vpn ); // get src_pte1 src_pte1 = hal_remote_l32( XPTR( src_cxy , &src_pt1[src_ix1] ) ); // do nothing if src_pte1 not MAPPED or not SMALL if( (src_pte1 & TSAR_PTE_MAPPED) && (src_pte1 & TSAR_PTE_SMALL) ) { // get dst_pt1 entry dst_pte1 = dst_pt1[dst_ix1]; // map dst_pte1 when this entry is not mapped if( (dst_pte1 & TSAR_PTE_MAPPED) == 0 ) { // allocate one physical page for a new PT2 req.type = KMEM_PPM; req.order = 0; // 1 small page req.flags = AF_KERNEL | AF_ZERO; dst_pt2 = kmem_alloc( &req ); if( dst_pt2 == NULL ) { printk("\n[ERROR] in %s : cannot allocate PT2\n", __FUNCTION__ ); return -1; } // build extended pointer on page descriptor page_xp = XPTR( local_cxy , page ); // get PPN for this new PT2 dst_pt2_ppn = ppm_base2ppn( XPTR( local_cxy , dst_pt2 ) ); // build new dst_pte1 dst_pte1 = TSAR_PTE_MAPPED | TSAR_PTE_SMALL | dst_pt2_ppn; // register it in DST_GPT dst_pt1[dst_ix1] = dst_pte1; } // get pointer on src_pt2 src_pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( src_pte1 ); src_pt2 = GET_PTR( ppm_ppn2base( src_pt2_ppn ) ); // get pointer on dst_pt2 dst_pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( dst_pte1 ); dst_pt2 = GET_PTR( ppm_ppn2base( dst_pt2_ppn ) ); // get attr and ppn from SRC_PT2 src_pte2_attr = hal_remote_l32( XPTR( src_cxy , &src_pt2[2 * src_ix2] ) ); src_pte2_ppn = hal_remote_l32( XPTR( src_cxy , &src_pt2[2 * src_ix2 + 1] ) ); // do nothing if src_pte2 not MAPPED if( (src_pte2_attr & TSAR_PTE_MAPPED) != 0 ) { // set PPN in DST PTE2 dst_pt2[2 * dst_ix2 + 1] = src_pte2_ppn; // set attributes in DST PTE2 if( cow && (src_pte2_attr & TSAR_PTE_WRITABLE) ) { dst_pt2[2 * dst_ix2] = (src_pte2_attr | TSAR_PTE_COW) & (~TSAR_PTE_WRITABLE); } else { dst_pt2[2 * dst_ix2] = src_pte2_attr; } // return "successfully copied" *mapped = true; *ppn = src_pte2_ppn; #if DEBUG_HAL_GPT_COPY cycle = (uint32_t)hal_get_cycles; if( DEBUG_HAL_GPT_COPY < cycle ) printk("\n[%s] thread[%x,%x] exit / copy done for src_vpn %x / dst_vpn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, src_vpn, dst_vpn, cycle ); #endif hal_fence(); return 0; } // end if PTE2 mapped } // end if PTE1 mapped // return "nothing done" *mapped = false; *ppn = 0; #if DEBUG_HAL_GPT_COPY cycle = (uint32_t)hal_get_cycles; if( DEBUG_HAL_GPT_COPY < cycle ) printk("\n[%s] thread[%x,%x] exit / nothing done / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif hal_fence(); return 0; } // end hal_gpt_pte_copy() ///////////////////////////////////////// void hal_gpt_set_cow( xptr_t gpt_xp, vpn_t vpn_base, vpn_t vpn_size ) { cxy_t gpt_cxy; gpt_t * gpt_ptr; uint32_t ix1; // current uint32_t ix2; // current vpn_t vpn_min; vpn_t vpn_max; // included uint32_t ix1_min; uint32_t ix1_max; // included uint32_t ix2_min; uint32_t ix2_max; // included uint32_t * pt1; uint32_t pte1; uint32_t * pt2; ppn_t pt2_ppn; uint32_t attr; // get GPT cluster and local pointer gpt_cxy = GET_CXY( gpt_xp ); gpt_ptr = GET_PTR( gpt_xp ); #if DEBUG_HAL_GPT_SET_COW uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] enter / gpt[%x,%x] / vpn_base %x / vpn_size %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, gpt_cxy, gpt_ptr, vpn_base, vpn_size, cycle ); #endif // get PT1 pointer pt1 = (uint32_t *)hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] get pt1 = %x\n", __FUNCTION__, this->process->pid, this->trdid, pt1 ); #endif vpn_min = vpn_base; vpn_max = vpn_base + vpn_size - 1; ix1_min = TSAR_MMU_IX1_FROM_VPN( vpn_base ); ix1_max = TSAR_MMU_IX1_FROM_VPN( vpn_max ); for( ix1 = ix1_min ; ix1 <= ix1_max ; ix1++ ) { #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] : &pt1[%x] = %x\n", __FUNCTION__, this->process->pid, this->trdid, ix1, &pt1[ix1] ); #endif // get PTE1 value pte1 = hal_remote_l32( XPTR( gpt_cxy , &pt1[ix1] ) ); #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] : pt1[%x] = %x\n", __FUNCTION__, this->process->pid, this->trdid, ix1, pte1 ); #endif // only MAPPED & SMALL PTEs are modified if( (pte1 & TSAR_PTE_MAPPED) && (pte1 & TSAR_PTE_SMALL) ) { // get PT2 pointer pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] : get pt2 = %x\n", __FUNCTION__, this->process->pid, this->trdid, pt2 ); #endif ix2_min = (ix1 == ix1_min) ? TSAR_MMU_IX2_FROM_VPN(vpn_min) : 0; ix2_max = (ix1 == ix1_max) ? TSAR_MMU_IX2_FROM_VPN(vpn_max) : 511; for( ix2 = ix2_min ; ix2 <= ix2_max ; ix2++ ) { #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] : &pte2[%x] = %x\n", __FUNCTION__, this->process->pid, this->trdid, 2*ix2, &pt2[2*ix2] ); #endif // get current PTE2 attributes attr = hal_remote_l32( XPTR( gpt_cxy , &pt2[2*ix2] ) ); #if (DEBUG_HAL_GPT_SET_COW & 1) if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] : pte2[%x] (attr) = %x\n", __FUNCTION__, this->process->pid, this->trdid, 2*ix2, attr ); #endif // only MAPPED PTEs are modified if( attr & TSAR_PTE_MAPPED ) { attr = (attr | TSAR_PTE_COW) & (~TSAR_PTE_WRITABLE); hal_remote_s32( XPTR( gpt_cxy , &pt2[2*ix2] ) , attr ); } } // end loop on ix2 } } // end loop on ix1 #if DEBUG_HAL_GPT_SET_COW cycle = (uint32_t)hal_get_cycles(); if(DEBUG_HAL_GPT_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] exit / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif } // end hal_gpt_set_cow() ////////////////////////////////////////// void hal_gpt_update_pte( xptr_t gpt_xp, vpn_t vpn, uint32_t attr, // generic GPT attributes ppn_t ppn ) { uint32_t * pt1; // PT1 base addres uint32_t pte1; // PT1 entry value ppn_t pt2_ppn; // PPN of PT2 uint32_t * pt2; // PT2 base address xptr_t pte2_attr_xp; // exended pointer on pte2.attr xptr_t pte2_ppn_xp; // exended pointer on pte2.ppn uint32_t ix1; // index in PT1 uint32_t ix2; // index in PT2 // check MAPPED, SMALL, and not LOCKED in attr argument assert( ((attr & GPT_MAPPED) != 0), "attribute MAPPED must be set in new attributes\n" ); assert( ((attr & GPT_SMALL ) != 0), "attribute SMALL must be set in new attributes\n" ); assert( ((attr & GPT_LOCKED) == 0), "attribute LOCKED must not be set in new attributes\n" ); // get cluster and local pointer on remote GPT cxy_t gpt_cxy = GET_CXY( gpt_xp ); gpt_t * gpt_ptr = GET_PTR( gpt_xp ); // compute indexes in PT1 and PT2 ix1 = TSAR_MMU_IX1_FROM_VPN( vpn ); ix2 = TSAR_MMU_IX2_FROM_VPN( vpn ); // get PT1 base pt1 = (uint32_t *)hal_remote_lpt( XPTR( gpt_cxy , &gpt_ptr->ptr ) ); // get PTE1 value pte1 = hal_remote_l32( XPTR( gpt_cxy , &pt1[ix1] ) ); // check MAPPED and SMALL in target PTE1 assert( ((pte1 & TSAR_PTE_MAPPED) != 0), "attribute MAPPED must be set in target PTE1\n" ); assert( ((pte1 & TSAR_PTE_SMALL ) != 0), "attribute SMALL must be set in target PTE1\n" ); // get PT2 base pt2_ppn = TSAR_MMU_PPN2_FROM_PTE1( pte1 ); pt2 = GET_PTR( ppm_ppn2base( pt2_ppn ) ); // build extended pointers on PT2[ix2].attr and PT2[ix2].ppn pte2_attr_xp = XPTR( gpt_cxy , &pt2[2 * ix2] ); pte2_ppn_xp = XPTR( gpt_cxy , &pt2[2 * ix2 + 1] ); // check MAPPED in target PTE2 assert( ((hal_remote_l32(pte2_attr_xp) & TSAR_PTE_MAPPED) != 0), "attribute MAPPED must be set in target PTE2\n" ); // set PTE2 in this order hal_remote_s32( pte2_ppn_xp , ppn ); hal_fence(); hal_remote_s32( pte2_attr_xp , gpt2tsar( attr ) ); hal_fence(); } // end hal_gpt_update_pte()