/* * vmm.c - virtual memory manager related operations definition. * * Authors Ghassan Almaless (2008,2009,2010,2011, 2012) * Mohamed Lamine Karaoui (2015) * Alain Greiner (2016,2017,2018,2019) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ////////////////////////////////////////////////////////////////////////////////// // Extern global variables ////////////////////////////////////////////////////////////////////////////////// extern process_t process_zero; // allocated in cluster.c //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_create_vseg() function, and implements // the VMM STACK specific allocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : [in] pointer on VMM. // @ ltid : [in] requested slot == local user thread identifier. // @ vpn_base : [out] first allocated page // @ vpn_size : [out] number of allocated pages //////////////////////////////////////////////////////////////////////////////////////////// static void vmm_stack_alloc( vmm_t * vmm, ltid_t ltid, vpn_t * vpn_base, vpn_t * vpn_size ) { // check ltid argument assert( (ltid <= ((CONFIG_VMM_VSPACE_SIZE - CONFIG_VMM_STACK_BASE) / CONFIG_VMM_STACK_SIZE)), "slot index %d too large for an user stack vseg", ltid ); // get stack allocator pointer stack_mgr_t * mgr = &vmm->stack_mgr; // get lock on stack allocator busylock_acquire( &mgr->lock ); // check requested slot is available assert( (bitmap_state( &mgr->bitmap , ltid ) == false), "slot index %d already allocated", ltid ); // update bitmap bitmap_set( &mgr->bitmap , ltid ); // release lock on stack allocator busylock_release( &mgr->lock ); // returns vpn_base, vpn_size (first page non allocated) *vpn_base = mgr->vpn_base + ltid * CONFIG_VMM_STACK_SIZE + 1; *vpn_size = CONFIG_VMM_STACK_SIZE - 1; } // end vmm_stack_alloc() //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_remove_vseg() function, and implements // the VMM STACK specific desallocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : [in] pointer on VMM. // @ vseg : [in] pointer on released vseg. //////////////////////////////////////////////////////////////////////////////////////////// static void vmm_stack_free( vmm_t * vmm, vseg_t * vseg ) { // get stack allocator pointer stack_mgr_t * mgr = &vmm->stack_mgr; // compute slot index uint32_t index = (vseg->vpn_base - 1 - mgr->vpn_base) / CONFIG_VMM_STACK_SIZE; // check index assert( (index <= ((CONFIG_VMM_VSPACE_SIZE - CONFIG_VMM_STACK_BASE) / CONFIG_VMM_STACK_SIZE)), "slot index %d too large for an user stack vseg", index ); // check released slot is allocated assert( (bitmap_state( &mgr->bitmap , index ) == true), "released slot index %d non allocated", index ); // get lock on stack allocator busylock_acquire( &mgr->lock ); // update stacks_bitmap bitmap_clear( &mgr->bitmap , index ); // release lock on stack allocator busylock_release( &mgr->lock ); } // end vmm_stack_free() //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_create_vseg() function, and implements // the VMM MMAP specific allocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : [in] pointer on VMM. // @ npages : [in] requested number of pages. // @ vpn_base : [out] first allocated page. // @ vpn_size : [out] actual number of allocated pages. //////////////////////////////////////////////////////////////////////////////////////////// static error_t vmm_mmap_alloc( vmm_t * vmm, vpn_t npages, vpn_t * vpn_base, vpn_t * vpn_size ) { uint32_t order; xptr_t vseg_xp; vseg_t * vseg; vpn_t base; vpn_t size; vpn_t free; #if DEBUG_VMM_MMAP_ALLOC thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_MMAP_ALLOC < cycle ) printk("\n[%s] thread[%x,%x] enter / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // number of allocated pages must be power of 2 // compute actual size and order size = POW2_ROUNDUP( npages ); order = bits_log2( size ); // get mmap allocator pointer mmap_mgr_t * mgr = &vmm->mmap_mgr; // build extended pointer on root of zombi_list[order] xptr_t root_xp = XPTR( local_cxy , &mgr->zombi_list[order] ); // take lock protecting zombi_lists busylock_acquire( &mgr->lock ); // get vseg from zombi_list or from mmap zone if( xlist_is_empty( root_xp ) ) // from mmap zone { // check overflow free = mgr->first_free_vpn; if( (free + size) > mgr->vpn_size ) return -1; // update MMAP allocator mgr->first_free_vpn += size; // compute base base = free; } else // from zombi_list { // get pointer on zombi vseg from zombi_list vseg_xp = XLIST_FIRST( root_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); // remove vseg from free-list xlist_unlink( XPTR( local_cxy , &vseg->xlist ) ); // compute base base = vseg->vpn_base; } // release lock busylock_release( &mgr->lock ); #if DEBUG_VMM_MMAP_ALLOC cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] exit / vpn_base %x / vpn_size %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, base, size, cycle ); #endif // returns vpn_base, vpn_size *vpn_base = base; *vpn_size = size; return 0; } // end vmm_mmap_alloc() //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_remove_vseg() function, and implements // the VMM MMAP specific desallocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : [in] pointer on VMM. // @ vseg : [in] pointer on released vseg. //////////////////////////////////////////////////////////////////////////////////////////// static void vmm_mmap_free( vmm_t * vmm, vseg_t * vseg ) { // get pointer on mmap allocator mmap_mgr_t * mgr = &vmm->mmap_mgr; // compute zombi_list order uint32_t order = bits_log2( vseg->vpn_size ); // take lock protecting zombi lists busylock_acquire( &mgr->lock ); // update relevant zombi_list xlist_add_first( XPTR( local_cxy , &mgr->zombi_list[order] ), XPTR( local_cxy , &vseg->xlist ) ); // release lock busylock_release( &mgr->lock ); } // end of vmm_mmap_free() //////////////////////////////////////////////////////////////////////////////////////////// // This static function registers one vseg in the VSL of a local process descriptor. //////////////////////////////////////////////////////////////////////////////////////////// // vmm : [in] pointer on VMM. // vseg : [in] pointer on vseg. //////////////////////////////////////////////////////////////////////////////////////////// void vmm_attach_vseg_to_vsl( vmm_t * vmm, vseg_t * vseg ) { // update vseg descriptor vseg->vmm = vmm; // increment vsegs number vmm->vsegs_nr++; // add vseg in vmm list xlist_add_last( XPTR( local_cxy , &vmm->vsegs_root ), XPTR( local_cxy , &vseg->xlist ) ); } // end vmm_attach_vseg_from_vsl() //////////////////////////////////////////////////////////////////////////////////////////// // This static function removes one vseg from the VSL of a local process descriptor. //////////////////////////////////////////////////////////////////////////////////////////// // vmm : [in] pointer on VMM. // vseg : [in] pointer on vseg. //////////////////////////////////////////////////////////////////////////////////////////// void vmm_detach_vseg_from_vsl( vmm_t * vmm, vseg_t * vseg ) { // update vseg descriptor vseg->vmm = NULL; // decrement vsegs number vmm->vsegs_nr--; // remove vseg from VSL xlist_unlink( XPTR( local_cxy , &vseg->xlist ) ); } // end vmm_detach_from_vsl() //////////////////////////////////////////// error_t vmm_user_init( process_t * process ) { vseg_t * vseg_args; vseg_t * vseg_envs; intptr_t base; intptr_t size; uint32_t i; #if DEBUG_VMM_USER_INIT thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_USER_INIT ) printk("\n[%s] thread[%x,%x] enter for process %x in cluster %x / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, process->pid, local_cxy, cycle ); #endif // get pointer on VMM vmm_t * vmm = &process->vmm; // check UTILS zone assert( ((CONFIG_VMM_ARGS_SIZE + CONFIG_VMM_ENVS_SIZE) <= (CONFIG_VMM_ELF_BASE - CONFIG_VMM_UTILS_BASE)) , "UTILS zone too small\n" ); // check STACK zone assert( ((CONFIG_VMM_STACK_SIZE * CONFIG_THREADS_MAX_PER_CLUSTER) <= (CONFIG_VMM_VSPACE_SIZE - CONFIG_VMM_STACK_BASE)) , "STACK zone too small\n"); // register "args" vseg in VSL base = CONFIG_VMM_UTILS_BASE << CONFIG_PPM_PAGE_SHIFT; size = CONFIG_VMM_ARGS_SIZE << CONFIG_PPM_PAGE_SHIFT; vseg_args = vmm_create_vseg( process, VSEG_TYPE_DATA, base, size, 0, // file_offset unused 0, // file_size unused XPTR_NULL, // mapper_xp unused local_cxy ); if( vseg_args == NULL ) { printk("\n[ERROR] in %s : cannot register args vseg\n", __FUNCTION__ ); return -1; } vmm->args_vpn_base = base; // register "envs" vseg in VSL base = (CONFIG_VMM_UTILS_BASE + CONFIG_VMM_ARGS_SIZE) << CONFIG_PPM_PAGE_SHIFT; size = CONFIG_VMM_ENVS_SIZE << CONFIG_PPM_PAGE_SHIFT; vseg_envs = vmm_create_vseg( process, VSEG_TYPE_DATA, base, size, 0, // file_offset unused 0, // file_size unused XPTR_NULL, // mapper_xp unused local_cxy ); if( vseg_envs == NULL ) { printk("\n[ERROR] in %s : cannot register envs vseg\n", __FUNCTION__ ); return -1; } vmm->envs_vpn_base = base; // initialize STACK allocator vmm->stack_mgr.bitmap = 0; vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE; busylock_init( &vmm->stack_mgr.lock , LOCK_VMM_STACK ); // initialize MMAP allocator vmm->mmap_mgr.vpn_base = CONFIG_VMM_HEAP_BASE; vmm->mmap_mgr.vpn_size = CONFIG_VMM_STACK_BASE - CONFIG_VMM_HEAP_BASE; vmm->mmap_mgr.first_free_vpn = CONFIG_VMM_HEAP_BASE; busylock_init( &vmm->mmap_mgr.lock , LOCK_VMM_MMAP ); for( i = 0 ; i < 32 ; i++ ) { xlist_root_init( XPTR( local_cxy , &vmm->mmap_mgr.zombi_list[i] ) ); } // initialize instrumentation counters vmm->pgfault_nr = 0; hal_fence(); #if DEBUG_VMM_USER_INIT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_USER_INIT ) printk("\n[%s] thread[%x,%x] exit for process %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, local_cxy, cycle ); #endif return 0; } // end vmm_user_init() ////////////////////////////////////////// void vmm_user_reset( process_t * process ) { xptr_t vseg_xp; vseg_t * vseg; vseg_type_t vseg_type; #if DEBUG_VMM_USER_RESET uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_USER_RESET < cycle ) printk("\n[%s] thread[%x,%x] enter for process %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, local_cxy, cycle ); #endif #if (DEBUG_VMM_USER_RESET & 1 ) if( DEBUG_VMM_USER_RESET < cycle ) hal_vmm_display( process , true ); #endif // get pointer on local VMM vmm_t * vmm = &process->vmm; // build extended pointer on VSL root and VSL lock xptr_t root_xp = XPTR( local_cxy , &vmm->vsegs_root ); xptr_t lock_xp = XPTR( local_cxy , &vmm->vsl_lock ); // take the VSL lock remote_rwlock_wr_acquire( lock_xp ); // scan the VSL to delete all non kernel vsegs // (we don't use a FOREACH in case of item deletion) xptr_t iter_xp; xptr_t next_xp; for( iter_xp = hal_remote_l64( root_xp ) ; iter_xp != root_xp ; iter_xp = next_xp ) { // save extended pointer on next item in xlist next_xp = hal_remote_l64( iter_xp ); // get pointers on current vseg in VSL vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); vseg_type = vseg->type; #if( DEBUG_VMM_USER_RESET & 1 ) if( DEBUG_VMM_USER_RESET < cycle ) printk("\n[%s] found %s vseg / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif // delete non kernel vseg if( (vseg_type != VSEG_TYPE_KCODE) && (vseg_type != VSEG_TYPE_KDATA) && (vseg_type != VSEG_TYPE_KDEV ) ) { // remove vseg from VSL vmm_remove_vseg( process , vseg ); #if( DEBUG_VMM_USER_RESET & 1 ) if( DEBUG_VMM_USER_RESET < cycle ) printk("\n[%s] %s vseg deleted / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif } else { #if( DEBUG_VMM_USER_RESET & 1 ) if( DEBUG_VMM_USER_RESET < cycle ) printk("\n[%s] keep %s vseg / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif } } // end loop on vsegs in VSL // release the VSL lock remote_rwlock_wr_release( lock_xp ); // FIXME il faut gérer les process copies... #if DEBUG_VMM_USER_RESET cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_USER_RESET < cycle ) printk("\n[%s] thread[%x,%x] exit for process %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, local_cxy , cycle ); #endif } // end vmm_user_reset() //////////////////////////////////////////////// void vmm_global_update_pte( process_t * process, vpn_t vpn, uint32_t attr, ppn_t ppn ) { xlist_entry_t * process_root_ptr; xptr_t process_root_xp; xptr_t process_iter_xp; xptr_t remote_process_xp; cxy_t remote_process_cxy; process_t * remote_process_ptr; xptr_t remote_gpt_xp; pid_t pid; cxy_t owner_cxy; lpid_t owner_lpid; #if DEBUG_VMM_UPDATE_PTE uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_UPDATE_PTE < cycle ) printk("\n[%s] thread[%x,%x] enter for process %x / vpn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid , vpn , cycle ); #endif // check cluster is reference assert( (GET_CXY( process->ref_xp ) == local_cxy) , "not called in reference cluster\n"); // get extended pointer on root of process copies xlist in owner cluster pid = process->pid; owner_cxy = CXY_FROM_PID( pid ); owner_lpid = LPID_FROM_PID( pid ); process_root_ptr = &LOCAL_CLUSTER->pmgr.copies_root[owner_lpid]; process_root_xp = XPTR( owner_cxy , process_root_ptr ); // loop on destination process copies XLIST_FOREACH( process_root_xp , process_iter_xp ) { // get cluster and local pointer on remote process remote_process_xp = XLIST_ELEMENT( process_iter_xp , process_t , copies_list ); remote_process_ptr = GET_PTR( remote_process_xp ); remote_process_cxy = GET_CXY( remote_process_xp ); #if (DEBUG_VMM_UPDATE_PTE & 0x1) if( DEBUG_VMM_UPDATE_PTE < cycle ) printk("\n[%s] threadr[%x,%x] handling vpn %x for process %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, process->pid, remote_process_cxy ); #endif // get extended pointer on remote gpt remote_gpt_xp = XPTR( remote_process_cxy , &remote_process_ptr->vmm.gpt ); // update remote GPT hal_gpt_update_pte( remote_gpt_xp, vpn, attr, ppn ); } #if DEBUG_VMM_UPDATE_PTE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_UPDATE_PTE < cycle ) printk("\n[%s] thread[%x,%x] exit for process %x / vpn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid , vpn , cycle ); #endif } // end vmm_global_update_pte() /////////////////////////////////////// void vmm_set_cow( process_t * process ) { vmm_t * vmm; xlist_entry_t * process_root_ptr; xptr_t process_root_xp; xptr_t process_iter_xp; xptr_t remote_process_xp; cxy_t remote_process_cxy; process_t * remote_process_ptr; xptr_t remote_gpt_xp; xptr_t vseg_root_xp; xptr_t vseg_iter_xp; xptr_t vseg_xp; vseg_t * vseg; pid_t pid; cxy_t owner_cxy; lpid_t owner_lpid; #if DEBUG_VMM_SET_COW uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] enter for process %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid , cycle ); #endif // check cluster is reference assert( (GET_CXY( process->ref_xp ) == local_cxy) , "local cluster is not process reference cluster\n"); // get pointer on reference VMM vmm = &process->vmm; // get extended pointer on root of process copies xlist in owner cluster pid = process->pid; owner_cxy = CXY_FROM_PID( pid ); owner_lpid = LPID_FROM_PID( pid ); process_root_ptr = &LOCAL_CLUSTER->pmgr.copies_root[owner_lpid]; process_root_xp = XPTR( owner_cxy , process_root_ptr ); // get extended pointer on root of vsegs xlist from reference VMM vseg_root_xp = XPTR( local_cxy , &vmm->vsegs_root ); // loop on destination process copies XLIST_FOREACH( process_root_xp , process_iter_xp ) { // get cluster and local pointer on remote process remote_process_xp = XLIST_ELEMENT( process_iter_xp , process_t , copies_list ); remote_process_ptr = GET_PTR( remote_process_xp ); remote_process_cxy = GET_CXY( remote_process_xp ); #if (DEBUG_VMM_SET_COW & 1) if( DEBUG_VMM_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] handling process %x in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, process->pid , remote_process_cxy ); #endif // get extended pointer on remote gpt remote_gpt_xp = XPTR( remote_process_cxy , &remote_process_ptr->vmm.gpt ); // loop on vsegs in (local) reference process VSL XLIST_FOREACH( vseg_root_xp , vseg_iter_xp ) { // get pointer on vseg vseg_xp = XLIST_ELEMENT( vseg_iter_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); assert( (GET_CXY( vseg_xp ) == local_cxy) , "all vsegs in reference VSL must be local\n" ); // get vseg type, base and size uint32_t type = vseg->type; vpn_t vpn_base = vseg->vpn_base; vpn_t vpn_size = vseg->vpn_size; #if (DEBUG_VMM_SET_COW & 1) if( DEBUG_VMM_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] handling vseg %s / vpn_base = %x / vpn_size = %x\n", __FUNCTION__, this->process->pid, this->trdid, vseg_type_str(type), vpn_base, vpn_size ); #endif // only DATA, ANON and REMOTE vsegs if( (type == VSEG_TYPE_DATA) || (type == VSEG_TYPE_ANON) || (type == VSEG_TYPE_REMOTE) ) { vpn_t vpn; uint32_t attr; ppn_t ppn; xptr_t page_xp; cxy_t page_cxy; page_t * page_ptr; xptr_t forks_xp; xptr_t lock_xp; // update flags in remote GPT hal_gpt_set_cow( remote_gpt_xp, vpn_base, vpn_size ); // atomically increment pending forks counter in physical pages, // for all vseg pages that are mapped in reference cluster if( remote_process_cxy == local_cxy ) { // scan all pages in vseg for( vpn = vpn_base ; vpn < (vpn_base + vpn_size) ; vpn++ ) { // get page attributes and PPN from reference GPT hal_gpt_get_pte( remote_gpt_xp , vpn , &attr , &ppn ); // atomically update pending forks counter if page is mapped if( attr & GPT_MAPPED ) { // get pointers and cluster on page descriptor page_xp = ppm_ppn2page( ppn ); page_cxy = GET_CXY( page_xp ); page_ptr = GET_PTR( page_xp ); // get extended pointers on "forks" and "lock" forks_xp = XPTR( page_cxy , &page_ptr->forks ); lock_xp = XPTR( page_cxy , &page_ptr->lock ); // take lock protecting "forks" counter remote_busylock_acquire( lock_xp ); // increment "forks" hal_remote_atomic_add( forks_xp , 1 ); // release lock protecting "forks" counter remote_busylock_release( lock_xp ); } } // end loop on vpn } // end if local } // end if vseg type } // end loop on vsegs } // end loop on process copies #if DEBUG_VMM_SET_COW cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_SET_COW < cycle ) printk("\n[%s] thread[%x,%x] exit for process %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid , cycle ); #endif } // end vmm_set-cow() ///////////////////////////////////////////////// error_t vmm_fork_copy( process_t * child_process, xptr_t parent_process_xp ) { error_t error; cxy_t parent_cxy; process_t * parent_process; vmm_t * parent_vmm; xptr_t parent_lock_xp; vmm_t * child_vmm; xptr_t iter_xp; xptr_t parent_vseg_xp; vseg_t * parent_vseg; vseg_t * child_vseg; uint32_t type; bool_t cow; vpn_t vpn; vpn_t vpn_base; vpn_t vpn_size; xptr_t page_xp; // extended pointer on page descriptor page_t * page_ptr; cxy_t page_cxy; xptr_t forks_xp; // extended pointer on forks counter in page descriptor xptr_t parent_root_xp; bool_t mapped; ppn_t ppn; #if DEBUG_VMM_FORK_COPY uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_FORK_COPY < cycle ) printk("\n[%s] thread %x enter / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, cycle ); #endif // get parent process cluster and local pointer parent_cxy = GET_CXY( parent_process_xp ); parent_process = GET_PTR( parent_process_xp ); // get local pointers on parent and child VMM parent_vmm = &parent_process->vmm; child_vmm = &child_process->vmm; // initialize the lock protecting the child VSL remote_rwlock_init( XPTR( local_cxy , &child_vmm->vsl_lock ) , LOCK_VMM_VSL ); // initialize the child VSL as empty xlist_root_init( XPTR( local_cxy, &child_vmm->vsegs_root ) ); child_vmm->vsegs_nr = 0; // create an empty child GPT error = hal_gpt_create( &child_vmm->gpt ); if( error ) { printk("\n[ERROR] in %s : cannot create GPT\n", __FUNCTION__ ); return -1; } // build extended pointer on parent VSL root and lock parent_root_xp = XPTR( parent_cxy , &parent_vmm->vsegs_root ); parent_lock_xp = XPTR( parent_cxy , &parent_vmm->vsl_lock ); // take the lock protecting the parent VSL in read mode remote_rwlock_rd_acquire( parent_lock_xp ); // loop on parent VSL xlist XLIST_FOREACH( parent_root_xp , iter_xp ) { // get pointers on current parent vseg parent_vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist ); parent_vseg = GET_PTR( parent_vseg_xp ); // get vseg type type = hal_remote_l32( XPTR( parent_cxy , &parent_vseg->type ) ); #if DEBUG_VMM_FORK_COPY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_FORK_COPY < cycle ) printk("\n[%s] thread[%x,%x] found parent vseg %s / vpn_base = %x / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, vseg_type_str(type), hal_remote_l32( XPTR( parent_cxy , &parent_vseg->vpn_base ) ) , cycle ); #endif // all parent vsegs - but STACK and kernel vsegs - must be copied in child VSL if( (type != VSEG_TYPE_STACK) && (type != VSEG_TYPE_KCODE) && (type != VSEG_TYPE_KDATA) && (type != VSEG_TYPE_KDEV) ) { // allocate memory for a new child vseg child_vseg = vseg_alloc(); if( child_vseg == NULL ) // release all allocated vsegs { vmm_destroy( child_process ); printk("\n[ERROR] in %s : cannot create vseg for child\n", __FUNCTION__ ); return -1; } // copy parent vseg to child vseg vseg_init_from_ref( child_vseg , parent_vseg_xp ); // build extended pointer on VSL lock xptr_t lock_xp = XPTR( local_cxy , &child_vmm->vsl_lock ); // take the VSL lock in write mode remote_rwlock_wr_acquire( lock_xp ); // register child vseg in child VSL vmm_attach_vseg_to_vsl( child_vmm , child_vseg ); // release the VSL lock remote_rwlock_wr_release( lock_xp ); #if DEBUG_VMM_FORK_COPY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_FORK_COPY < cycle ) printk("\n[%s] thread[%x,%x] copied vseg %s / vpn_base = %x to child VSL / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, vseg_type_str(type), hal_remote_l32( XPTR( parent_cxy , &parent_vseg->vpn_base ) ) , cycle ); #endif // copy DATA, ANON, REMOTE, FILE parent GPT entries to child GPT if( type != VSEG_TYPE_CODE ) { // activate the COW for DATA, ANON, REMOTE vsegs only cow = ( type != VSEG_TYPE_FILE ); vpn_base = child_vseg->vpn_base; vpn_size = child_vseg->vpn_size; // scan pages in parent vseg for( vpn = vpn_base ; vpn < (vpn_base + vpn_size) ; vpn++ ) { error = hal_gpt_pte_copy( &child_vmm->gpt, vpn, XPTR( parent_cxy , &parent_vmm->gpt ), vpn, cow, &ppn, &mapped ); if( error ) { vmm_destroy( child_process ); printk("\n[ERROR] in %s : cannot copy GPT\n", __FUNCTION__ ); return -1; } // increment pending forks counter in page if mapped if( mapped ) { // get pointers and cluster on page descriptor page_xp = ppm_ppn2page( ppn ); page_cxy = GET_CXY( page_xp ); page_ptr = GET_PTR( page_xp ); // get extended pointers on "forks" and "lock" forks_xp = XPTR( page_cxy , &page_ptr->forks ); lock_xp = XPTR( page_cxy , &page_ptr->lock ); // get lock protecting "forks" counter remote_busylock_acquire( lock_xp ); // increment "forks" hal_remote_atomic_add( forks_xp , 1 ); // release lock protecting "forks" counter remote_busylock_release( lock_xp ); #if DEBUG_VMM_FORK_COPY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_FORK_COPY < cycle ) printk("\n[%s] thread[%x,%x] copied vpn %x to child GPT / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid , vpn , cycle ); #endif } } } // end if no code & no stack } // end if no stack } // end loop on vsegs // release the parent VSL lock in read mode remote_rwlock_rd_release( parent_lock_xp ); // update child VMM with kernel vsegs error = hal_vmm_kernel_update( child_process ); if( error ) { printk("\n[ERROR] in %s : cannot update child VMM\n", __FUNCTION__ ); return -1; } // initialize the child VMM STACK allocator child_vmm->stack_mgr.bitmap = 0; child_vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE; // initialize the child VMM MMAP allocator uint32_t i; child_vmm->mmap_mgr.vpn_base = CONFIG_VMM_HEAP_BASE; child_vmm->mmap_mgr.vpn_size = CONFIG_VMM_STACK_BASE - CONFIG_VMM_HEAP_BASE; child_vmm->mmap_mgr.first_free_vpn = CONFIG_VMM_HEAP_BASE; for( i = 0 ; i < 32 ; i++ ) { xlist_root_init( XPTR( local_cxy , &child_vmm->mmap_mgr.zombi_list[i] ) ); } // initialize instrumentation counters child_vmm->pgfault_nr = 0; // copy base addresses from parent VMM to child VMM child_vmm->args_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->args_vpn_base)); child_vmm->envs_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->envs_vpn_base)); child_vmm->heap_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->heap_vpn_base)); child_vmm->code_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->code_vpn_base)); child_vmm->data_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->data_vpn_base)); child_vmm->entry_point = (intptr_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->entry_point)); hal_fence(); #if DEBUG_VMM_FORK_COPY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_FORK_COPY < cycle ) printk("\n[%s] thread[%x,%x] exit successfully / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid , cycle ); #endif return 0; } // vmm_fork_copy() /////////////////////////////////////// void vmm_destroy( process_t * process ) { xptr_t vseg_xp; vseg_t * vseg; #if DEBUG_VMM_DESTROY uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] enter for process %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, local_cxy, cycle ); #endif #if (DEBUG_VMM_DESTROY & 1 ) if( DEBUG_VMM_DESTROY < cycle ) hal_vmm_display( process , true ); #endif // get pointer on local VMM vmm_t * vmm = &process->vmm; // build extended pointer on VSL root, VSL lock and GPT lock xptr_t vsl_root_xp = XPTR( local_cxy , &vmm->vsegs_root ); xptr_t vsl_lock_xp = XPTR( local_cxy , &vmm->vsl_lock ); // take the VSL lock remote_rwlock_wr_acquire( vsl_lock_xp ); // scan the VSL to delete all registered vsegs // (we don't use a FOREACH in case of item deletion) xptr_t iter_xp; xptr_t next_xp; for( iter_xp = hal_remote_l64( vsl_root_xp ) ; iter_xp != vsl_root_xp ; iter_xp = next_xp ) { // save extended pointer on next item in xlist next_xp = hal_remote_l64( iter_xp ); // get pointers on current vseg in VSL vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); // delete vseg and release physical pages vmm_remove_vseg( process , vseg ); #if( DEBUG_VMM_DESTROY & 1 ) if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] %s vseg deleted / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif } // release the VSL lock remote_rwlock_wr_release( vsl_lock_xp ); // remove all registered MMAP vsegs // from zombi_lists in MMAP allocator uint32_t i; for( i = 0 ; i<32 ; i++ ) { // build extended pointer on zombi_list[i] xptr_t root_xp = XPTR( local_cxy , &vmm->mmap_mgr.zombi_list[i] ); // scan zombi_list[i] while( !xlist_is_empty( root_xp ) ) { vseg_xp = XLIST_FIRST( root_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); #if( DEBUG_VMM_DESTROY & 1 ) if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] found zombi vseg / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif // clean vseg descriptor vseg->vmm = NULL; // remove vseg from zombi_list xlist_unlink( XPTR( local_cxy , &vseg->xlist ) ); // release vseg descriptor vseg_free( vseg ); #if( DEBUG_VMM_DESTROY & 1 ) if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] zombi vseg released / vpn_base %x / vpn_size %d\n", __FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size ); #endif } } // release memory allocated to the GPT itself hal_gpt_destroy( &vmm->gpt ); #if DEBUG_VMM_DESTROY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] exit for process %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, local_cxy , cycle ); #endif } // end vmm_destroy() ///////////////////////////////////////////////// vseg_t * vmm_check_conflict( process_t * process, vpn_t vpn_base, vpn_t vpn_size ) { vmm_t * vmm = &process->vmm; // scan the VSL vseg_t * vseg; xptr_t iter_xp; xptr_t vseg_xp; xptr_t root_xp = XPTR( local_cxy , &vmm->vsegs_root ); XLIST_FOREACH( root_xp , iter_xp ) { vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); if( ((vpn_base + vpn_size) > vseg->vpn_base) && (vpn_base < (vseg->vpn_base + vseg->vpn_size)) ) return vseg; } return NULL; } // end vmm_check_conflict() //////////////////////////////////////////////// vseg_t * vmm_create_vseg( process_t * process, vseg_type_t type, intptr_t base, uint32_t size, uint32_t file_offset, uint32_t file_size, xptr_t mapper_xp, cxy_t cxy ) { vseg_t * vseg; // created vseg pointer vpn_t vpn_base; // first page index vpn_t vpn_size; // number of pages covered by vseg error_t error; #if DEBUG_VMM_CREATE_VSEG thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_CREATE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] enter for process %x / %s / cxy %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, vseg_type_str(type), cxy, cycle ); #endif // get pointer on VMM vmm_t * vmm = &process->vmm; // compute base, size, vpn_base, vpn_size, depending on vseg type // we use the VMM specific allocators for "stack", "file", "anon", & "remote" vsegs if( type == VSEG_TYPE_STACK ) { // get vpn_base and vpn_size from STACK allocator vmm_stack_alloc( vmm , base , &vpn_base , &vpn_size ); // compute vseg base and size from vpn_base and vpn_size base = vpn_base << CONFIG_PPM_PAGE_SHIFT; size = vpn_size << CONFIG_PPM_PAGE_SHIFT; } else if( type == VSEG_TYPE_FILE ) { // compute page index (in mapper) for first byte vpn_t vpn_min = file_offset >> CONFIG_PPM_PAGE_SHIFT; // compute page index (in mapper) for last byte vpn_t vpn_max = (file_offset + size - 1) >> CONFIG_PPM_PAGE_SHIFT; // compute offset in first page uint32_t offset = file_offset & CONFIG_PPM_PAGE_MASK; // compute number of pages required in virtual space vpn_t npages = vpn_max - vpn_min + 1; // get vpn_base and vpn_size from MMAP allocator error = vmm_mmap_alloc( vmm , npages , &vpn_base , &vpn_size ); if( error ) { printk("\n[ERROR] in %s : no vspace for mmap vseg / process %x in cluster %x\n", __FUNCTION__ , process->pid , local_cxy ); return NULL; } // set the vseg base (not always aligned for FILE) base = (vpn_base << CONFIG_PPM_PAGE_SHIFT) + offset; } else if( (type == VSEG_TYPE_ANON) || (type == VSEG_TYPE_REMOTE) ) { // compute number of required pages in virtual space vpn_t npages = size >> CONFIG_PPM_PAGE_SHIFT; if( size & CONFIG_PPM_PAGE_MASK) npages++; // get vpn_base and vpn_size from MMAP allocator error = vmm_mmap_alloc( vmm , npages , &vpn_base , &vpn_size ); if( error ) { printk("\n[ERROR] in %s : no vspace for mmap vseg / process %x in cluster %x\n", __FUNCTION__ , process->pid , local_cxy ); return NULL; } // set vseg base (always aligned for ANON or REMOTE) base = vpn_base << CONFIG_PPM_PAGE_SHIFT; } else // VSEG_TYPE_DATA, VSEG_TYPE_CODE or KERNEL vseg { uint32_t vpn_min = base >> CONFIG_PPM_PAGE_SHIFT; uint32_t vpn_max = (base + size - 1) >> CONFIG_PPM_PAGE_SHIFT; vpn_base = vpn_min; vpn_size = vpn_max - vpn_min + 1; } // check collisions vseg = vmm_check_conflict( process , vpn_base , vpn_size ); if( vseg != NULL ) { printk("\n[ERROR] in %s for process %x : new vseg [vpn_base %x / vpn_size %x]\n" " overlap existing vseg [vpn_base %x / vpn_size %x]\n", __FUNCTION__ , process->pid, vpn_base, vpn_size, vseg->vpn_base, vseg->vpn_size ); return NULL; } // allocate physical memory for vseg descriptor vseg = vseg_alloc(); if( vseg == NULL ) { printk("\n[ERROR] in %s for process %x : cannot allocate memory for vseg\n", __FUNCTION__ , process->pid ); return NULL; } #if DEBUG_VMM_CREATE_VSEG if( DEBUG_VMM_CREATE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] : base %x / size %x / vpn_base %x / vpn_size %x\n", __FUNCTION__, this->process->pid, this->trdid, base, size, vpn_base, vpn_size ); #endif // initialize vseg descriptor vseg_init( vseg, type, base, size, vpn_base, vpn_size, file_offset, file_size, mapper_xp, cxy ); // build extended pointer on VSL lock xptr_t lock_xp = XPTR( local_cxy , &vmm->vsl_lock ); // take the VSL lock in write mode remote_rwlock_wr_acquire( lock_xp ); // attach vseg to VSL vmm_attach_vseg_to_vsl( vmm , vseg ); // release the VSL lock remote_rwlock_wr_release( lock_xp ); #if DEBUG_VMM_CREATE_VSEG cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_CREATE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] exit / %s / cxy %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vseg_type_str(type), cxy, cycle ); #endif return vseg; } // vmm_create_vseg() ////////////////////////////////////////// void vmm_remove_vseg( process_t * process, vseg_t * vseg ) { vmm_t * vmm; // local pointer on process VMM xptr_t gpt_xp; // extended pointer on GPT bool_t is_ref; // local process is reference process uint32_t vseg_type; // vseg type vpn_t vpn; // VPN of current PTE vpn_t vpn_min; // VPN of first PTE vpn_t vpn_max; // VPN of last PTE (excluded) ppn_t ppn; // current PTE ppn value uint32_t attr; // current PTE attributes xptr_t page_xp; // extended pointer on page descriptor cxy_t page_cxy; // page descriptor cluster page_t * page_ptr; // page descriptor pointer xptr_t count_xp; // extended pointer on page refcount // check arguments assert( (process != NULL), "process argument is NULL" ); assert( (vseg != NULL), "vseg argument is NULL" ); // compute is_ref is_ref = (GET_CXY( process->ref_xp ) == local_cxy); // get pointers on local process VMM vmm = &process->vmm; // build extended pointer on GPT gpt_xp = XPTR( local_cxy , &vmm->gpt ); // get relevant vseg infos vseg_type = vseg->type; vpn_min = vseg->vpn_base; vpn_max = vpn_min + vseg->vpn_size; #if DEBUG_VMM_REMOVE_VSEG uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_REMOVE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] enter / process %x / %s / base %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, vseg_type_str(vseg->type), vseg->min, cycle ); #endif // loop on PTEs in GPT for( vpn = vpn_min ; vpn < vpn_max ; vpn++ ) { // get ppn and attr hal_gpt_get_pte( gpt_xp , vpn , &attr , &ppn ); if( attr & GPT_MAPPED ) // PTE is mapped { #if( DEBUG_VMM_REMOVE_VSEG & 1 ) if( DEBUG_VMM_REMOVE_VSEG < cycle ) printk("- unmap vpn %x / ppn %x / %s" , vpn , ppn, vseg_type_str(vseg_type) ); #endif // unmap GPT entry in local GPT hal_gpt_reset_pte( gpt_xp , vpn ); // get pointers on physical page descriptor page_xp = ppm_ppn2page( ppn ); page_cxy = GET_CXY( page_xp ); page_ptr = GET_PTR( page_xp ); // decrement page refcount count_xp = XPTR( page_cxy , &page_ptr->refcount ); hal_remote_atomic_add( count_xp , -1 ); // compute the ppn_release condition depending on vseg type bool_t ppn_release; if( (vseg_type == VSEG_TYPE_FILE) || (vseg_type == VSEG_TYPE_KCODE) || (vseg_type == VSEG_TYPE_KDATA) || (vseg_type == VSEG_TYPE_KDEV) ) { // no physical page release for FILE and KERNEL ppn_release = false; } else if( (vseg_type == VSEG_TYPE_CODE) || (vseg_type == VSEG_TYPE_STACK) ) { // always release physical page for private vsegs ppn_release = true; } else if( (vseg_type == VSEG_TYPE_ANON) || (vseg_type == VSEG_TYPE_REMOTE) ) { // release physical page if reference cluster ppn_release = is_ref; } else if( is_ref ) // vseg_type == DATA in reference cluster { // get extended pointers on forks and lock field in page descriptor xptr_t forks_xp = XPTR( page_cxy , &page_ptr->forks ); xptr_t lock_xp = XPTR( page_cxy , &page_ptr->lock ); // take lock protecting "forks" counter remote_busylock_acquire( lock_xp ); // get number of pending forks from page descriptor uint32_t forks = hal_remote_l32( forks_xp ); // decrement pending forks counter if required if( forks ) hal_remote_atomic_add( forks_xp , -1 ); // release lock protecting "forks" counter remote_busylock_release( lock_xp ); // release physical page if forks == 0 ppn_release = (forks == 0); } else // vseg_type == DATA not in reference cluster { // no physical page release if not in reference cluster ppn_release = false; } // release physical page to relevant kmem when required if( ppn_release ) ppm_remote_free_pages( page_cxy , page_ptr ); #if( DEBUG_VMM_REMOVE_VSEG & 1 ) if( DEBUG_VMM_REMOVE_VSEG < cycle ) { if( ppn_release ) printk(" / released to kmem\n" ); else printk("\n"); } #endif } } // remove vseg from VSL vmm_detach_vseg_from_vsl( vmm , vseg ); // release vseg descriptor depending on vseg type if( vseg_type == VSEG_TYPE_STACK ) { // release slot to local stack allocator vmm_stack_free( vmm , vseg ); // release vseg descriptor to local kmem vseg_free( vseg ); } else if( (vseg_type == VSEG_TYPE_ANON) || (vseg_type == VSEG_TYPE_FILE) || (vseg_type == VSEG_TYPE_REMOTE) ) { // release vseg to local mmap allocator vmm_mmap_free( vmm , vseg ); } else { // release vseg descriptor to local kmem vseg_free( vseg ); } #if DEBUG_VMM_REMOVE_VSEG cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_REMOVE_VSEG < cycle ) printk("[%s] thread[%x,%x] exit / process %x / %s / base %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, vseg_type_str(vseg->type), vseg->min, cycle ); #endif } // end vmm_remove_vseg() /////////////////////////////////// void vmm_delete_vseg( pid_t pid, intptr_t vaddr ) { process_t * process; // local pointer on local process vseg_t * vseg; // local pointer on local vseg containing vaddr // get local pointer on local process descriptor process = cluster_get_local_process_from_pid( pid ); if( process == NULL ) { printk("\n[WARNING] in %s : cannot get local process descriptor\n", __FUNCTION__ ); return; } // get local pointer on local vseg containing vaddr vseg = vmm_vseg_from_vaddr( &process->vmm , vaddr ); if( vseg == NULL ) { printk("\n[WARNING] in %s : cannot get vseg descriptor\n", __FUNCTION__ ); return; } // call relevant function vmm_remove_vseg( process , vseg ); } // end vmm_delete_vseg ///////////////////////////////////////////// vseg_t * vmm_vseg_from_vaddr( vmm_t * vmm, intptr_t vaddr ) { xptr_t vseg_xp; vseg_t * vseg; xptr_t iter_xp; // get extended pointers on VSL lock and root xptr_t lock_xp = XPTR( local_cxy , &vmm->vsl_lock ); xptr_t root_xp = XPTR( local_cxy , &vmm->vsegs_root ); // get lock protecting the VSL remote_rwlock_rd_acquire( lock_xp ); // scan the list of vsegs in VSL XLIST_FOREACH( root_xp , iter_xp ) { // get pointers on vseg vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist ); vseg = GET_PTR( vseg_xp ); // return success when match if( (vaddr >= vseg->min) && (vaddr < vseg->max) ) { // return success remote_rwlock_rd_release( lock_xp ); return vseg; } } // return failure remote_rwlock_rd_release( lock_xp ); return NULL; } // end vmm_vseg_from_vaddr() ///////////////////////////////////////////// error_t vmm_resize_vseg( process_t * process, intptr_t base, intptr_t size ) { error_t error; vseg_t * new; vpn_t vpn_min; vpn_t vpn_max; #if DEBUG_VMM_RESIZE_VSEG uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] enter / process %x / base %x / size %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, base, size, cycle ); #endif // get pointer on process VMM vmm_t * vmm = &process->vmm; intptr_t addr_min = base; intptr_t addr_max = base + size; // get pointer on vseg vseg_t * vseg = vmm_vseg_from_vaddr( vmm , base ); if( vseg == NULL) { printk("\n[ERROR] in %s : vseg(%x,%d) not found\n", __FUNCTION__, base , size ); return -1; } // resize depends on unmapped region base and size if( (vseg->min > addr_min) || (vseg->max < addr_max) ) // not included in vseg { printk("\n[ERROR] in %s : unmapped region[%x->%x[ not included in vseg[%x->%x[\n", __FUNCTION__, addr_min, addr_max, vseg->min, vseg->max ); error = -1; } else if( (vseg->min == addr_min) && (vseg->max == addr_max) ) // vseg must be deleted { #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] unmapped region[%x->%x[ equal vseg[%x->%x[\n", __FUNCTION__, addr_min, addr_max, vseg->min, vseg->max ); #endif vmm_delete_vseg( process->pid , vseg->min ); #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] deleted vseg\n", __FUNCTION__, this->process->pid, this->trdid ); #endif error = 0; } else if( vseg->min == addr_min ) // vseg must be resized { #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] unmapped region[%x->%x[ included in vseg[%x->%x[\n", __FUNCTION__, addr_min, addr_max, vseg->min, vseg->max ); #endif // update vseg min address vseg->min = addr_max; // update vpn_base and vpn_size vpn_min = vseg->min >> CONFIG_PPM_PAGE_SHIFT; vpn_max = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT; vseg->vpn_base = vpn_min; vseg->vpn_size = vpn_max - vpn_min + 1; #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] changed vseg_min\n", __FUNCTION__, this->process->pid, this->trdid ); #endif error = 0; } else if( vseg->max == addr_max ) // vseg must be resized { #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] unmapped region[%x->%x[ included in vseg[%x->%x[\n", __FUNCTION__, addr_min, addr_max, vseg->min, vseg->max ); #endif // update vseg max address vseg->max = addr_min; // update vpn_base and vpn_size vpn_min = vseg->min >> CONFIG_PPM_PAGE_SHIFT; vpn_max = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT; vseg->vpn_base = vpn_min; vseg->vpn_size = vpn_max - vpn_min + 1; #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] changed vseg_max\n", __FUNCTION__, this->process->pid, this->trdid ); #endif error = 0; } else // vseg cut in three regions { #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] unmapped region[%x->%x[ included in vseg[%x->%x[\n", __FUNCTION__, addr_min, addr_max, vseg->min, vseg->max ); #endif // resize existing vseg vseg->max = addr_min; // update vpn_base and vpn_size vpn_min = vseg->min >> CONFIG_PPM_PAGE_SHIFT; vpn_max = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT; vseg->vpn_base = vpn_min; vseg->vpn_size = vpn_max - vpn_min + 1; // create new vseg new = vmm_create_vseg( process, vseg->type, addr_min, (vseg->max - addr_max), vseg->file_offset, vseg->file_size, vseg->mapper_xp, vseg->cxy ); #if( DEBUG_VMM_RESIZE_VSEG & 1 ) if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] replaced vseg by two smal vsegs\n", __FUNCTION__, this->process->pid, this->trdid ); #endif if( new == NULL ) error = -1; else error = 0; } #if DEBUG_VMM_RESIZE_VSEG if( DEBUG_VMM_RESIZE_VSEG < cycle ) printk("\n[%s] thread[%x,%x] exit / process %x / base %x / size %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, process->pid, base, size, cycle ); #endif return error; } // vmm_resize_vseg() /////////////////////////////////////////// error_t vmm_get_vseg( process_t * process, intptr_t vaddr, vseg_t ** found_vseg ) { xptr_t vseg_xp; vseg_t * vseg; vmm_t * vmm; error_t error; // get pointer on local VMM vmm = &process->vmm; // try to get vseg from local VMM vseg = vmm_vseg_from_vaddr( vmm , vaddr ); if( vseg == NULL ) // vseg not found in local cluster => try to get it from ref { // get extended pointer on reference process xptr_t ref_xp = process->ref_xp; // get cluster and local pointer on reference process cxy_t ref_cxy = GET_CXY( ref_xp ); process_t * ref_ptr = GET_PTR( ref_xp ); if( local_cxy == ref_cxy ) return -1; // local cluster is the reference // get extended pointer on reference vseg rpc_vmm_get_vseg_client( ref_cxy , ref_ptr , vaddr , &vseg_xp , &error ); if( error ) return -1; // vseg not found => illegal user vaddr // allocate a vseg in local cluster vseg = vseg_alloc(); if( vseg == NULL ) return -1; // cannot allocate a local vseg // initialise local vseg from reference vseg_init_from_ref( vseg , vseg_xp ); // build extended pointer on VSL lock xptr_t lock_xp = XPTR( local_cxy , &vmm->vsl_lock ); // take the VSL lock in write mode remote_rwlock_wr_acquire( lock_xp ); // register local vseg in local VSL vmm_attach_vseg_to_vsl( vmm , vseg ); // release the VSL lock remote_rwlock_wr_release( lock_xp ); } // success *found_vseg = vseg; return 0; } // end vmm_get_vseg() ////////////////////////////////////////////////////////////////////////////////////// // This static function compute the target cluster to allocate a physical page // for a given in a given , allocates the page and returns an extended // pointer on the allocated page descriptor. // The vseg cannot have the FILE type. ////////////////////////////////////////////////////////////////////////////////////// static xptr_t vmm_page_allocate( vseg_t * vseg, vpn_t vpn ) { #if DEBUG_VMM_PAGE_ALLOCATE uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; if( DEBUG_VMM_PAGE_ALLOCATE < cycle ) printk("\n[%s] thread[%x,%x] enter for vpn %x / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, vpn, cycle ); #endif xptr_t page_xp; cxy_t page_cxy; uint32_t index; uint32_t type = vseg->type; uint32_t flags = vseg->flags; uint32_t x_size = LOCAL_CLUSTER->x_size; uint32_t y_size = LOCAL_CLUSTER->y_size; // check vseg type assert( ( type != VSEG_TYPE_FILE ) , "illegal vseg type\n" ); if( flags & VSEG_DISTRIB ) // distributed => cxy depends on vpn LSB { index = vpn & ((x_size * y_size) - 1); page_cxy = HAL_CXY_FROM_XY( (index / y_size) , (index % y_size) ); // If the cluster selected from VPN's LSBs is empty, we select one randomly if ( cluster_is_active( page_cxy ) == false ) { page_cxy = cluster_random_select(); } } else // other cases => cxy specified in vseg { page_cxy = vseg->cxy; } // allocate a 4 Kbytes physical page from target cluster page_xp = ppm_remote_alloc_pages( page_cxy , 0 ); #if DEBUG_VMM_PAGE_ALLOCATE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_PAGE_ALLOCATE < cycle ) printk("\n[%s] thread[%x,%x] exit for vpn %x / ppn %x / cluster %x / cycle %d\n", __FUNCTION__ , this->process->pid, this->trdid, vpn, ppm_page2ppn(page_xp), page_cxy, cycle ); #endif return page_xp; } // end vmm_page_allocate() //////////////////////////////////////// error_t vmm_get_one_ppn( vseg_t * vseg, vpn_t vpn, ppn_t * ppn ) { error_t error; xptr_t page_xp; // extended pointer on physical page descriptor uint32_t page_id; // missing page index in vseg mapper uint32_t type; // vseg type; type = vseg->type; page_id = vpn - vseg->vpn_base; #if DEBUG_VMM_GET_ONE_PPN uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] enter for vpn %x / type %s / page_id %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, vseg_type_str(type), page_id, cycle ); #endif // FILE type : get the physical page from the file mapper if( type == VSEG_TYPE_FILE ) { // get extended pointer on mapper xptr_t mapper_xp = vseg->mapper_xp; assert( (mapper_xp != XPTR_NULL), "mapper not defined for a FILE vseg\n" ); // get extended pointer on page descriptor page_xp = mapper_remote_get_page( mapper_xp , page_id ); if ( page_xp == XPTR_NULL ) return EINVAL; } // Other types : allocate a physical page from target cluster, // as defined by vseg type and vpn value else { // allocate one physical page page_xp = vmm_page_allocate( vseg , vpn ); if( page_xp == XPTR_NULL ) return ENOMEM; // initialise missing page from .elf file mapper for DATA and CODE types // the vseg->mapper_xp field is an extended pointer on the .elf file mapper if( (type == VSEG_TYPE_CODE) || (type == VSEG_TYPE_DATA) ) { // get extended pointer on mapper xptr_t mapper_xp = vseg->mapper_xp; assert( (mapper_xp != XPTR_NULL), "mapper not defined for a CODE or DATA vseg\n" ); // compute missing page offset in vseg uint32_t offset = page_id << CONFIG_PPM_PAGE_SHIFT; // compute missing page offset in .elf file uint32_t elf_offset = vseg->file_offset + offset; #if (DEBUG_VMM_GET_ONE_PPN & 0x1) // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] for vpn = %x / elf_offset = %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, elf_offset ); #endif // compute extended pointer on page base xptr_t base_xp = ppm_page2base( page_xp ); // file_size (in .elf mapper) can be smaller than vseg_size (BSS) uint32_t file_size = vseg->file_size; if( file_size < offset ) // missing page fully in BSS { #if (DEBUG_VMM_GET_ONE_PPN & 0x1) // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] for vpn %x / fully in BSS\n", __FUNCTION__, this->process->pid, this->trdid, vpn ); #endif if( GET_CXY( page_xp ) == local_cxy ) { memset( GET_PTR( base_xp ) , 0 , CONFIG_PPM_PAGE_SIZE ); } else { hal_remote_memset( base_xp , 0 , CONFIG_PPM_PAGE_SIZE ); } } else if( file_size >= (offset + CONFIG_PPM_PAGE_SIZE) ) // fully in mapper { #if (DEBUG_VMM_GET_ONE_PPN & 0x1) // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] for vpn %x / fully in mapper\n", __FUNCTION__, this->process->pid, this->trdid, vpn ); #endif error = mapper_move_kernel( mapper_xp, true, // to_buffer elf_offset, base_xp, CONFIG_PPM_PAGE_SIZE ); if( error ) return EINVAL; } else // both in mapper and in BSS : // - (file_size - offset) bytes from mapper // - (page_size + offset - file_size) bytes from BSS { #if (DEBUG_VMM_GET_ONE_PPN & 0x1) // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] for vpn %x / both mapper & BSS\n" " %d bytes from mapper / %d bytes from BSS\n", __FUNCTION__, this->process->pid, this->trdid, vpn, file_size - offset , offset + CONFIG_PPM_PAGE_SIZE - file_size ); #endif // initialize mapper part error = mapper_move_kernel( mapper_xp, true, // to buffer elf_offset, base_xp, file_size - offset ); if( error ) return EINVAL; // initialize BSS part if( GET_CXY( page_xp ) == local_cxy ) { memset( GET_PTR( base_xp ) + file_size - offset , 0 , offset + CONFIG_PPM_PAGE_SIZE - file_size ); } else { hal_remote_memset( base_xp + file_size - offset , 0 , offset + CONFIG_PPM_PAGE_SIZE - file_size ); } } } // end initialisation for CODE or DATA types } // return ppn *ppn = ppm_page2ppn( page_xp ); #if DEBUG_VMM_GET_ONE_PPN cycle = (uint32_t)hal_get_cycles(); // if( DEBUG_VMM_GET_ONE_PPN < cycle ) if( vpn == 0x40B ) printk("\n[%s] thread[%x,%x] exit for vpn %x / ppn %x / cycle\n", __FUNCTION__ , this->process->pid, this->trdid , vpn , *ppn, cycle ); #endif return 0; } // end vmm_get_one_ppn() /////////////////////////////////////////////////// error_t vmm_handle_page_fault( process_t * process, vpn_t vpn ) { vseg_t * vseg; // vseg containing vpn uint32_t attr; // PTE_ATTR value ppn_t ppn; // PTE_PPN value uint32_t ref_attr; // PTE_ATTR value in reference GPT ppn_t ref_ppn; // PTE_PPN value in reference GPT cxy_t ref_cxy; // reference cluster for missing vpn process_t * ref_ptr; // reference process for missing vpn xptr_t local_gpt_xp; // extended pointer on local GPT xptr_t ref_gpt_xp; // extended pointer on reference GPT error_t error; // value returned by called functions thread_t * this = CURRENT_THREAD; #if (CONFIG_INSTRUMENTATION_PGFAULTS || DEBUG_VMM_HANDLE_PAGE_FAULT) uint32_t start_cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] enter for vpn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, start_cycle ); #endif #if (DEBUG_VMM_HANDLE_PAGE_FAULT & 1) hal_vmm_display( this->process , false ); #endif // get local vseg (access to reference VSL can be required) error = vmm_get_vseg( process, (intptr_t)vpn<pid, this->trdid ); return EXCP_USER_ERROR; } #if DEBUG_VMM_HANDLE_PAGE_FAULT uint32_t cycle = (uint32_t)hal_get_cycles(); if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] found vseg %s / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vseg_type_str(vseg->type), cycle ); #endif // build extended pointer on local GPT local_gpt_xp = XPTR( local_cxy , &process->vmm.gpt ); // lock PTE in local GPT and get current PPN and attributes error = hal_gpt_lock_pte( local_gpt_xp, vpn, &attr, &ppn ); if( error ) { printk("\n[PANIC] in %s : cannot lock PTE in local GPT / vpn %x / process %x\n", __FUNCTION__ , vpn , process->pid ); return EXCP_KERNEL_PANIC; } #if DEBUG_VMM_HANDLE_PAGE_FAULT cycle = (uint32_t)hal_get_cycles(); if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] locked vpn %x in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, local_cxy, cycle ); #endif // handle page fault only if local PTE still unmapped after lock if( (attr & GPT_MAPPED) == 0 ) { // get reference process cluster and local pointer ref_cxy = GET_CXY( process->ref_xp ); ref_ptr = GET_PTR( process->ref_xp ); /////////////// private vseg or (local == reference) /////////////// => access only the local GPT if( (vseg->type == VSEG_TYPE_STACK) || (vseg->type == VSEG_TYPE_CODE) || (ref_cxy == local_cxy ) ) { #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] : access local gpt : local_cxy %x / ref_cxy %x / type %s\n", __FUNCTION__, this->process->pid, this->trdid, local_cxy, ref_cxy, vseg_type_str(vseg->type) ); #endif // allocate and initialise a physical page error = vmm_get_one_ppn( vseg , vpn , &ppn ); if( error ) { printk("\n[ERROR] in %s : no physical page / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); // unlock PTE in local GPT hal_gpt_unlock_pte( local_gpt_xp , vpn ); return EXCP_KERNEL_PANIC; } // define attr from vseg flags attr = GPT_MAPPED | GPT_SMALL | GPT_READABLE; if( vseg->flags & VSEG_USER ) attr |= GPT_USER; if( vseg->flags & VSEG_WRITE ) attr |= GPT_WRITABLE; if( vseg->flags & VSEG_EXEC ) attr |= GPT_EXECUTABLE; if( vseg->flags & VSEG_CACHE ) attr |= GPT_CACHABLE; // set PTE to local GPT // it unlocks this PTE hal_gpt_set_pte( local_gpt_xp, vpn, attr, ppn ); #if (CONFIG_INSTRUMENTATION_PGFAULTS || DEBUG_VMM_HANDLE_PAGE_FAULT) uint32_t end_cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] handled local pgfault / ppn %x / attr %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, ppn, attr, end_cycle ); #endif #if CONFIG_INSTRUMENTATION_PGFAULTS this->info.local_pgfault_nr++; this->info.local_pgfault_cost += (end_cycle - start_cycle); #endif return EXCP_NON_FATAL; } // end local GPT access /////////////////// public vseg and (local != reference) /////////////////// => access ref GPT to update local GPT else { #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] access ref gpt : local_cxy %x / ref_cxy %x / type %s\n", __FUNCTION__, this->process->pid, this->trdid, local_cxy, ref_cxy, vseg_type_str(vseg->type) ); #endif // build extended pointer on reference GPT ref_gpt_xp = XPTR( ref_cxy , &ref_ptr->vmm.gpt ); // lock PTE in reference GPT and get current PPN and attributes error = hal_gpt_lock_pte( ref_gpt_xp, vpn, &ref_attr, &ref_ppn ); if( error ) { printk("\n[PANIC] in %s : cannot lock PTE in ref GPT / vpn %x / process %x\n", __FUNCTION__ , vpn , process->pid ); // unlock PTE in local GPT hal_gpt_unlock_pte( local_gpt_xp , vpn ); return EXCP_KERNEL_PANIC; } #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] get pte from ref gpt / attr %x / ppn %x\n", __FUNCTION__, this->process->pid, this->trdid, ref_attr, ref_ppn ); #endif if( ref_attr & GPT_MAPPED ) // false page fault { // update local GPT from reference GPT values // this unlocks the PTE in local GPT hal_gpt_set_pte( local_gpt_xp, vpn, ref_attr, ref_ppn ); #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] updated local gpt for a false pgfault\n", __FUNCTION__, this->process->pid, this->trdid ); #endif // unlock the PTE in reference GPT hal_gpt_unlock_pte( ref_gpt_xp, vpn ); #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] unlock the ref gpt after a false pgfault\n", __FUNCTION__, this->process->pid, this->trdid ); #endif #if (CONFIG_INSTRUMENTATION_PGFAULTS || DEBUG_VMM_HANDLE_PAGE_FAULT) uint32_t end_cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] handled false pgfault / ppn %x / attr %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, ref_ppn, ref_attr, end_cycle ); #endif #if CONFIG_INSTRUMENTATION_PGFAULTS this->info.false_pgfault_nr++; this->info.false_pgfault_cost += (end_cycle - start_cycle); #endif return EXCP_NON_FATAL; } else // true page fault { // allocate and initialise a physical page depending on the vseg type error = vmm_get_one_ppn( vseg , vpn , &ppn ); if( error ) { printk("\n[ERROR] in %s : no memory / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); // unlock PTE in local GPT and in reference GPT hal_gpt_unlock_pte( local_gpt_xp , vpn ); hal_gpt_unlock_pte( ref_gpt_xp , vpn ); return EXCP_KERNEL_PANIC; } // define attr from vseg flags attr = GPT_MAPPED | GPT_SMALL | GPT_READABLE; if( vseg->flags & VSEG_USER ) attr |= GPT_USER; if( vseg->flags & VSEG_WRITE ) attr |= GPT_WRITABLE; if( vseg->flags & VSEG_EXEC ) attr |= GPT_EXECUTABLE; if( vseg->flags & VSEG_CACHE ) attr |= GPT_CACHABLE; #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] build a new PTE for a true pgfault\n", __FUNCTION__, this->process->pid, this->trdid ); #endif // set PTE in reference GPT // this unlock the PTE hal_gpt_set_pte( ref_gpt_xp, vpn, attr, ppn ); #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] set new PTE in ref gpt for a true page fault\n", __FUNCTION__, this->process->pid, this->trdid ); #endif // set PTE in local GPT // this unlock the PTE hal_gpt_set_pte( local_gpt_xp, vpn, attr, ppn ); #if (CONFIG_INSTRUMENTATION_PGFAULTS || DEBUG_VMM_HANDLE_PAGE_FAULT) uint32_t end_cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] thread[%x,%x] handled global pgfault / ppn %x / attr %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, ppn, attr, end_cycle ); #endif #if CONFIG_INSTRUMENTATION_PGFAULTS this->info.global_pgfault_nr++; this->info.global_pgfault_cost += (end_cycle - start_cycle); #endif return EXCP_NON_FATAL; } } } else // page has been locally mapped by another concurrent thread { // unlock the PTE in local GPT hal_gpt_unlock_pte( local_gpt_xp , vpn ); #if (CONFIG_INSTRUMENTATION_PGFAULTS || DEBUG_VMM_HANDLE_PAGE_FAULT) uint32_t end_cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_VMM_HANDLE_PAGE_FAULT if( vpn == 0x40b ) printk("\n[%s] handled by another thread / vpn %x / ppn %x / attr %x / cycle %d\n", __FUNCTION__, vpn, ppn, attr, end_cycle ); #endif #if CONFIG_INSTRUMENTATION_PGFAULTS this->info.false_pgfault_nr++; this->info.false_pgfault_cost += (end_cycle - start_cycle); #endif return EXCP_NON_FATAL; } } // end vmm_handle_page_fault() //////////////////////////////////////////// error_t vmm_handle_cow( process_t * process, vpn_t vpn ) { vseg_t * vseg; // vseg containing vpn xptr_t gpt_xp; // extended pointer on GPT (local or reference) gpt_t * gpt_ptr; // local pointer on GPT (local or reference) cxy_t gpt_cxy; // GPT cluster identifier uint32_t old_attr; // current PTE_ATTR value ppn_t old_ppn; // current PTE_PPN value uint32_t new_attr; // new PTE_ATTR value ppn_t new_ppn; // new PTE_PPN value cxy_t ref_cxy; // reference process cluster process_t * ref_ptr; // local pointer on reference process error_t error; thread_t * this = CURRENT_THREAD; #if DEBUG_VMM_HANDLE_COW uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] enter for vpn %x / core[%x,%d] / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle ); #endif #if (DEBUG_VMM_HANDLE_PAGE_FAULT & 1) hal_vmm_display( process , true ); #endif // get local vseg error = vmm_get_vseg( process, (intptr_t)vpn<pid, this->trdid ); return EXCP_USER_ERROR; } #if DEBUG_VMM_HANDLE_COW if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] get vseg %s\n", __FUNCTION__, this->process->pid, this->trdid, vseg_type_str(vseg->type) ); #endif // get reference process cluster and local pointer ref_cxy = GET_CXY( process->ref_xp ); ref_ptr = GET_PTR( process->ref_xp ); // build pointers on relevant GPT // - access only local GPT for a private vseg // - access reference GPT and all copies for a public vseg if( (vseg->type == VSEG_TYPE_STACK) || (vseg->type == VSEG_TYPE_CODE) ) { gpt_cxy = local_cxy; gpt_ptr = &process->vmm.gpt; gpt_xp = XPTR( gpt_cxy , gpt_ptr ); } else { gpt_cxy = ref_cxy; gpt_ptr = &ref_ptr->vmm.gpt; gpt_xp = XPTR( gpt_cxy , gpt_ptr ); } // lock target PTE in relevant GPT (local or reference) // and get current PTE value error = hal_gpt_lock_pte( gpt_xp, vpn, &old_attr, &old_ppn ); if( error ) { printk("\n[PANIC] in %s : cannot lock PTE in GPT / cxy %x / vpn %x / process %x\n", __FUNCTION__ , gpt_cxy, vpn , process->pid ); return EXCP_KERNEL_PANIC; } #if DEBUG_VMM_HANDLE_COW if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] get pte for vpn %x : ppn %x / attr %x\n", __FUNCTION__, this->process->pid, this->trdid, vpn, old_ppn, old_attr ); #endif // return user error if COW attribute not set or PTE2 unmapped if( ((old_attr & GPT_COW) == 0) || ((old_attr & GPT_MAPPED) == 0) ) { hal_gpt_unlock_pte( gpt_xp , vpn ); return EXCP_USER_ERROR; } // get pointers on physical page descriptor xptr_t page_xp = ppm_ppn2page( old_ppn ); cxy_t page_cxy = GET_CXY( page_xp ); page_t * page_ptr = GET_PTR( page_xp ); // get extended pointers on forks and lock field in page descriptor xptr_t forks_xp = XPTR( page_cxy , &page_ptr->forks ); xptr_t forks_lock_xp = XPTR( page_cxy , &page_ptr->lock ); // take lock protecting "forks" counter remote_busylock_acquire( forks_lock_xp ); // get number of pending forks from page descriptor uint32_t forks = hal_remote_l32( forks_xp ); #if DEBUG_VMM_HANDLE_COW if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] get forks = %d for vpn %x\n", __FUNCTION__, this->process->pid, this->trdid, forks, vpn ); #endif if( forks ) // pending fork => allocate a new page, and copy old to new { // decrement pending forks counter in page descriptor hal_remote_atomic_add( forks_xp , -1 ); // release lock protecting "forks" counter remote_busylock_release( forks_lock_xp ); // allocate a new physical page depending on vseg type page_xp = vmm_page_allocate( vseg , vpn ); if( page_xp == XPTR_NULL ) { printk("\n[PANIC] in %s : no memory for vpn %x in process %x\n", __FUNCTION__ , vpn, process->pid ); hal_gpt_unlock_pte( gpt_xp , vpn ); return EXCP_KERNEL_PANIC; } // compute allocated page PPN new_ppn = ppm_page2ppn( page_xp ); #if DEBUG_VMM_HANDLE_COW if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] get new ppn %x for vpn %x\n", __FUNCTION__, this->process->pid, this->trdid, new_ppn, vpn ); #endif // copy old page content to new page hal_remote_memcpy( ppm_ppn2base( new_ppn ), ppm_ppn2base( old_ppn ), CONFIG_PPM_PAGE_SIZE ); #if DEBUG_VMM_HANDLE_COW if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] copied old page to new page\n", __FUNCTION__, this->process->pid, this->trdid ); #endif } else // no pending fork => keep the existing page { // release lock protecting "forks" counter remote_busylock_release( forks_lock_xp ); #if(DEBUG_VMM_HANDLE_COW & 1) if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] no pending forks / keep existing PPN %x\n", __FUNCTION__, this->process->pid, this->trdid, old_ppn ); #endif new_ppn = old_ppn; } // build new_attr : set WRITABLE, reset COW, reset LOCKED new_attr = (((old_attr | GPT_WRITABLE) & (~GPT_COW)) & (~GPT_LOCKED)); // update the relevant GPT(s) // - private vseg => update only the local GPT // - public vseg => update the reference GPT AND all the GPT copies if( (vseg->type == VSEG_TYPE_STACK) || (vseg->type == VSEG_TYPE_CODE) ) { // set the new PTE2 hal_gpt_set_pte( gpt_xp, vpn, new_attr, new_ppn ); } else { if( ref_cxy == local_cxy ) // reference cluster is local { vmm_global_update_pte( process, vpn, new_attr, new_ppn ); } else // reference cluster is remote { rpc_vmm_global_update_pte_client( ref_cxy, ref_ptr, vpn, new_attr, new_ppn ); } } #if DEBUG_VMM_HANDLE_COW cycle = (uint32_t)hal_get_cycles(); if( DEBUG_VMM_HANDLE_COW < cycle ) printk("\n[%s] thread[%x,%x] exit for vpn %x / core[%x,%d] / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle ); #endif return EXCP_NON_FATAL; } // end vmm_handle_cow()