/* * vmm.c - virtual memory manager related operations interface. * * Authors Ghassan Almaless (2008,2009,2010,2011, 2012) * Mohamed Lamine Karaoui (2015) * Alain Greiner (2016) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ////////////////////////////////////////////////////////////////////////////////// // Extern global variables ////////////////////////////////////////////////////////////////////////////////// extern process_t process_zero; // defined in cluster.c file //////////////////////////////////// void vmm_init( process_t * process ) { error_t error; vseg_t * vseg_kentry; vseg_t * vseg_args; vseg_t * vseg_envs; vseg_t * vseg_heap; intptr_t base; intptr_t size; // get pointer on VMM vmm_t * vmm = &process->vmm; // check UTILS zone size if( (CONFIG_VMM_KENTRY_SIZE + CONFIG_VMM_ARGS_SIZE + CONFIG_VMM_ENVS_SIZE ) > CONFIG_VMM_ELF_BASE ) { printk("\n[PANIC] in %s : UTILS zone too small for process %x\n", __FUNCTION__ , process->pid ); hal_core_sleep(); } // check max number of stacks slots if( CONFIG_THREAD_MAX_PER_CLUSTER > 32 ) { printk("\n[PANIC] in %s : max number of threads per cluster for a single process" " cannot be larger than 32\n", __FUNCTION__ ); hal_core_sleep(); } // check STACK zone size if( (CONFIG_VMM_STACK_SIZE * CONFIG_THREAD_MAX_PER_CLUSTER) > (CONFIG_VMM_VSPACE_SIZE - CONFIG_VMM_STACK_BASE) ) { printk("\n[PANIC] in %s : STACK zone too small for process %x\n", __FUNCTION__ , process->pid ); hal_core_sleep(); } // initialize the rwlock protecting the vsegs list rwlock_init( &vmm->vsegs_lock ); // initialize local list of vsegs and radix-tree vmm->vsegs_nr = 0; list_root_init( &vmm->vsegs_root ); error = grdxt_init( &vmm->grdxt, CONFIG_VMM_GRDXT_W1, CONFIG_VMM_GRDXT_W2, CONFIG_VMM_GRDXT_W3 ); if( error ) { printk("\n[PANIC] in %s : cannot initialize radix tree for process %x\n", __FUNCTION__ , process->pid ); hal_core_sleep(); } // register kentry vseg in VMM base = 1 << CONFIG_PPM_PAGE_SHIFT; size = CONFIG_VMM_KENTRY_SIZE << CONFIG_PPM_PAGE_SHIFT; vseg_kentry = vmm_create_vseg( process , base , size , VSEG_TYPE_CODE ); if( vseg_kentry == NULL ) { printk("\n[PANIC] in %s : cannot register kent vseg for process %x\n", __FUNCTION__ , process->pid ); hal_core_sleep(); } vmm->kent_vpn_base = 1; // register the args vseg in VMM base = (CONFIG_VMM_KENTRY_SIZE + 1 )<pid ); hal_core_sleep(); } vmm->args_vpn_base = CONFIG_VMM_KENTRY_SIZE + 1; // register the envs vseg in VMM base = (CONFIG_VMM_KENTRY_SIZE + CONFIG_VMM_ARGS_SIZE + 1 )<pid ); hal_core_sleep(); } vmm->envs_vpn_base = CONFIG_VMM_KENTRY_SIZE + CONFIG_VMM_ARGS_SIZE + 1; // register the heap vseg in VMM base = CONFIG_VMM_HEAP_BASE << CONFIG_PPM_PAGE_SHIFT; size = (CONFIG_VMM_MMAP_BASE-CONFIG_VMM_HEAP_BASE) << CONFIG_PPM_PAGE_SHIFT; vseg_heap = vmm_create_vseg( process , base , size , VSEG_TYPE_HEAP ); if( vseg_heap == NULL ) { printk("\n[PANIC] in %s : cannot register heap vseg in for process %x\n", __FUNCTION__ , process->pid ); hal_core_sleep(); } vmm->heap_vpn_base = CONFIG_VMM_HEAP_BASE; // initialize generic page table error = hal_gpt_create( &vmm->gpt ); if( error ) { printk("PANIC in %s : cannot initialize page table\n", __FUNCTION__ ); hal_core_sleep(); } // initialize STACK allocator vmm->stack_mgr.bitmap = 0; vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE; // initialize MMAP allocator vmm->mmap_mgr.vpn_base = CONFIG_VMM_MMAP_BASE; vmm->mmap_mgr.vpn_size = CONFIG_VMM_STACK_BASE - CONFIG_VMM_MMAP_BASE; vmm->mmap_mgr.first_free_vpn = CONFIG_VMM_MMAP_BASE; uint32_t i; for( i = 0 ; i < 32 ; i++ ) list_root_init( &vmm->mmap_mgr.zombi_list[i] ); // initialize instrumentation counters vmm->pgfault_nr = 0; vmm->u_err_nr = 0; vmm->m_err_nr = 0; hal_fence(); } ////////////////////////////////////////// error_t vmm_copy( process_t * dst_process, process_t * src_process ) { error_t error; vmm_t * src_vmm = &src_process->vmm; vmm_t * dst_vmm = &dst_process->vmm; // take the src_vmm vsegs_lock rwlock_wr_lock( &src_vmm->vsegs_lock ); // initialize dst_vmm vsegs_lock rwlock_init( &dst_vmm->vsegs_lock ); // initialize the dst_vmm vsegs list and the radix tree dst_vmm->vsegs_nr = 0; list_root_init( &dst_vmm->vsegs_root ); error = grdxt_init( &dst_vmm->grdxt, CONFIG_VMM_GRDXT_W1, CONFIG_VMM_GRDXT_W2, CONFIG_VMM_GRDXT_W3 ); if( error ) { printk("\n[ERROR] in %s : cannot initialize radix tree for process %x\n", __FUNCTION__ , dst_process->pid ); return ENOMEM; } // loop on src_vmm list of vsegs to create // and register vsegs copies in dst_vmm list_entry_t * iter; vseg_t * src_vseg; vseg_t * dst_vseg; LIST_FOREACH( &src_vmm->vsegs_root , iter ) { // get pointer on current src_vseg src_vseg = LIST_ELEMENT( iter , vseg_t , list ); // allocate memory for a new dst_vseg dst_vseg = vseg_alloc(); if( dst_vseg == NULL ) { // release all allocated vsegs LIST_FOREACH( &dst_vmm->vsegs_root , iter ) { dst_vseg = LIST_ELEMENT( iter , vseg_t , list ); vseg_free( dst_vseg ); } return ENOMEM; } // copy src_vseg to dst_vseg vseg_init_from_ref( dst_vseg , XPTR( local_cxy , src_vseg ) ); // register dst_vseg in dst_vmm vseg_attach( dst_vmm , dst_vseg ); } // release the src_vmm vsegs_lock rwlock_wr_unlock( &src_vmm->vsegs_lock ); // initialize generic page table error = hal_gpt_create( &dst_vmm->gpt ); if( error ) { printk("\n[ERROR] in %s : cannot initialize page table\n", __FUNCTION__ ); return ENOMEM; } // initialize STACK allocator dst_vmm->stack_mgr.bitmap = 0; dst_vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE; // initialize MMAP allocator dst_vmm->mmap_mgr.vpn_base = CONFIG_VMM_MMAP_BASE; dst_vmm->mmap_mgr.vpn_size = CONFIG_VMM_STACK_BASE - CONFIG_VMM_MMAP_BASE; dst_vmm->mmap_mgr.first_free_vpn = CONFIG_VMM_MMAP_BASE; uint32_t i; for( i = 0 ; i < 32 ; i++ ) list_root_init( &dst_vmm->mmap_mgr.zombi_list[i] ); // initialize instrumentation counters dst_vmm->pgfault_nr = 0; dst_vmm->u_err_nr = 0; dst_vmm->m_err_nr = 0; // copy base addresses dst_vmm->kent_vpn_base = src_vmm->kent_vpn_base; dst_vmm->args_vpn_base = src_vmm->args_vpn_base; dst_vmm->envs_vpn_base = src_vmm->envs_vpn_base; dst_vmm->heap_vpn_base = src_vmm->heap_vpn_base; dst_vmm->code_vpn_base = src_vmm->code_vpn_base; dst_vmm->data_vpn_base = src_vmm->data_vpn_base; dst_vmm->entry_point = src_vmm->entry_point; // HEAP TODO : new heap for child ??? dst_vmm->heap_vseg = src_vmm->heap_vseg; // initialize generic page table error = hal_gpt_create( &dst_vmm->gpt ); if( error ) { printk("\n[ERROR] in %s : cannot initialize page table\n", __FUNCTION__ ); return ENOMEM; } // copy GPT content from src_vmm to dst_vmm, activating "Copy-On-Write" // TODO register Copy-On_Write in page descriptors bool_t cow = true; hal_gpt_copy( &dst_vmm->gpt , &src_vmm->gpt , cow ); hal_fence(); return 0; } /////////////////////////////////////// void vmm_destroy( process_t * process ) { vseg_t * vseg; // get pointer on VMM vmm_t * vmm = &process->vmm; // get lock protecting vseg list rwlock_wr_lock( &vmm->vsegs_lock ); // remove all vsegs registered in vmm while( !list_is_empty( &vmm->vsegs_root ) ) { vseg = LIST_FIRST( &vmm->vsegs_root , vseg_t , list ); vseg_detach( vmm , vseg ); vseg_free( vseg ); } // delete vsegs radix_tree grdxt_destroy( &vmm->grdxt ); // release lock rwlock_wr_unlock(&vmm->vsegs_lock); // remove all vsegs from zombi_lists in MMAP allocator uint32_t i; for( i = 0 ; i<32 ; i++ ) { while( !list_is_empty( &vmm->mmap_mgr.zombi_list[i] ) ) { vseg = LIST_FIRST( &vmm->mmap_mgr.zombi_list[i] , vseg_t , list ); vseg_detach( vmm , vseg ); vseg_free( vseg ); } } // release memory allocated to the local page table hal_gpt_destroy( &vmm->gpt ); } ///////////////////////////////////////////////// vseg_t * vmm_check_conflict( process_t * process, vpn_t vpn_base, vpn_t vpn_size ) { vmm_t * vmm = &process->vmm; vseg_t * vseg; list_entry_t * iter; // scan the list of registered vsegs LIST_FOREACH( &vmm->vsegs_root , iter ) { vseg = LIST_ELEMENT( iter , vseg_t , list ); if( ((vpn_base + vpn_size) > vseg->vpn_base) && (vpn_base < (vseg->vpn_base + vseg->vpn_size)) ) return vseg; } return NULL; } //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_create_vseg() function, and implements // the VMM stack_vseg specific allocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : pointer on VMM. // @ vpn_base : (return value) first allocated page // @ vpn_size : (return value) number of allocated pages //////////////////////////////////////////////////////////////////////////////////////////// static error_t vmm_stack_alloc( vmm_t * vmm, vpn_t * vpn_base, vpn_t * vpn_size ) { // get stack allocator pointer stack_mgr_t * mgr = &vmm->stack_mgr; // get lock on stack allocator spinlock_lock( &mgr->lock ); // get first free slot index in bitmap int32_t index = bitmap_ffc( &mgr->bitmap , 4 ); if( (index < 0) || (index > 31) ) { spinlock_unlock( &mgr->lock ); return ENOMEM; } // update bitmap bitmap_set( &mgr->bitmap , index ); // release lock on stack allocator spinlock_unlock( &mgr->lock ); // returns vpn_base, vpn_size (one page non allocated) *vpn_base = mgr->vpn_base + index * CONFIG_VMM_STACK_SIZE + 1; *vpn_size = CONFIG_VMM_STACK_SIZE - 1; return 0; } //////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the vmm_create_vseg() function, and implements // the VMM MMAP specific allocator. //////////////////////////////////////////////////////////////////////////////////////////// // @ vmm : [in] pointer on VMM. // @ npages : [in] requested number of pages. // @ vpn_base : [out] first allocated page. // @ vpn_size : [out] actual number of allocated pages. //////////////////////////////////////////////////////////////////////////////////////////// static error_t vmm_mmap_alloc( vmm_t * vmm, vpn_t npages, vpn_t * vpn_base, vpn_t * vpn_size ) { uint32_t index; vseg_t * vseg; vpn_t base; vpn_t size; vpn_t free; // mmap vseg size must be power of 2 // compute actual size and index in zombi_list array size = POW2_ROUNDUP( npages ); index = bits_log2( size ); // get mmap allocator pointer mmap_mgr_t * mgr = &vmm->mmap_mgr; // get lock on mmap allocator spinlock_lock( &mgr->lock ); // get vseg from zombi_list or from mmap zone if( list_is_empty( &mgr->zombi_list[index] ) ) // from mmap zone { // check overflow free = mgr->first_free_vpn; if( (free + size) > mgr->vpn_size ) return ENOMEM; // update STACK allocator mgr->first_free_vpn += size; // compute base base = free; } else // from zombi_list { // get pointer on zombi vseg from zombi_list vseg = LIST_FIRST( &mgr->zombi_list[index] , vseg_t , list ); // remove vseg from free-list list_unlink( &vseg->list ); // compute base base = vseg->vpn_base; } // release lock on mmap allocator spinlock_unlock( &mgr->lock ); // returns vpn_base, vpn_size *vpn_base = base; *vpn_size = size; return 0; } ////////////////////////////////////////////// vseg_t * vmm_create_vseg( process_t * process, intptr_t base, intptr_t size, uint32_t type ) { vseg_t * vseg; // created vseg pointer vpn_t vpn_base; // vseg first page vpn_t vpn_size; // number of pages error_t error; // get pointer on VMM vmm_t * vmm = &process->vmm; vmm_dmsg("\n[INFO] %s enter for process %x / base = %x / size = %x / type = %s\n", __FUNCTION__ , process->pid , base , size , vseg_type_str(type) ); // compute base, size, vpn_base, vpn_size, depending on type // we use the VMM specific allocators for STACK and MMAP vsegs if( type == VSEG_TYPE_STACK ) { // get vpn_base and vpn_size from STACK allocator error = vmm_stack_alloc( vmm , &vpn_base , &vpn_size ); if( error ) { printk("\n[ERROR] in %s : no vspace for stack vseg / process %x in cluster %x\n", __FUNCTION__ , process->pid , local_cxy ); return NULL; } // compute vseg base and size from vpn_base and vpn_size base = vpn_base << CONFIG_PPM_PAGE_SHIFT; size = vpn_size << CONFIG_PPM_PAGE_SHIFT; } else if( (type == VSEG_TYPE_ANON) || (type == VSEG_TYPE_FILE) || (type == VSEG_TYPE_REMOTE) ) { // get vpn_base and vpn_size from MMAP allocator vpn_t npages = size >> CONFIG_PPM_PAGE_SHIFT; error = vmm_mmap_alloc( vmm , npages , &vpn_base , &vpn_size ); if( error ) { printk("\n[ERROR] in %s : no vspace for mmap vseg / process %x in cluster %x\n", __FUNCTION__ , process->pid , local_cxy ); return NULL; } // compute vseg base and size from vpn_base and vpn_size base = vpn_base << CONFIG_PPM_PAGE_SHIFT; size = vpn_size << CONFIG_PPM_PAGE_SHIFT; } else { vpn_base = ARROUND_DOWN( base , CONFIG_PPM_PAGE_SIZE ) >> CONFIG_PPM_PAGE_SHIFT; vpn_size = ARROUND_UP( base + size , CONFIG_PPM_PAGE_SIZE ) >> CONFIG_PPM_PAGE_SHIFT; } // check collisions vseg = vmm_check_conflict( process , vpn_base , vpn_size ); if( vseg != NULL ) { printk("\n[ERROR] in %s for process %x : new vseg [vpn_base = %x / vpn_size = %x]\n" " overlap existing vseg [vpn_base = %x / vpn_size = %x]\n", __FUNCTION__ , process->pid, vpn_base, vpn_size, vseg->vpn_base, vseg->vpn_size ); return NULL; } // allocate physical memory for vseg descriptor vseg = vseg_alloc(); if( vseg == NULL ) { printk("\n[ERROR] in %s for process %x : cannot allocate memory for vseg\n", __FUNCTION__ , process->pid ); return NULL; } // initialize vseg descriptor vseg_init( vseg , base, size , vpn_base , vpn_size , type , local_cxy , 0 , 0 ); // update "heap_vseg" in VMM process->vmm.heap_vseg = vseg; // attach vseg to vmm rwlock_wr_lock( &vmm->vsegs_lock ); vseg_attach( vmm , vseg ); rwlock_wr_unlock( &vmm->vsegs_lock ); vmm_dmsg("\n[INFO] : %s exit for process %x, vseg [%x, %x] has been mapped\n", __FUNCTION__ , process->pid , vseg->min , vseg->max ); return vseg; } ///////////////////////////////////// void vmm_remove_vseg( vseg_t * vseg ) { // get pointers on calling process and VMM thread_t * this = CURRENT_THREAD; process_t * process = this->process; vmm_t * vmm = &this->process->vmm; uint32_t type = vseg->type; // detach vseg from VMM rwlock_wr_lock( &vmm->vsegs_lock ); vseg_detach( &process->vmm , vseg ); rwlock_wr_unlock( &vmm->vsegs_lock ); // release the stack slot to VMM stack allocator if STACK type if( type == VSEG_TYPE_STACK ) { // get pointer on stack allocator stack_mgr_t * mgr = &vmm->stack_mgr; // compute slot index uint32_t index = ((vseg->vpn_base - mgr->vpn_base - 1) / CONFIG_VMM_STACK_SIZE); // update stacks_bitmap spinlock_lock( &mgr->lock ); bitmap_clear( &mgr->bitmap , index ); spinlock_unlock( &mgr->lock ); } // release the vseg to VMM mmap allocator if MMAP type if( (type == VSEG_TYPE_ANON) || (type == VSEG_TYPE_FILE) || (type == VSEG_TYPE_REMOTE) ) { // get pointer on mmap allocator mmap_mgr_t * mgr = &vmm->mmap_mgr; // compute zombi_list index uint32_t index = bits_log2( vseg->vpn_size ); // update zombi_list spinlock_lock( &mgr->lock ); list_add_first( &mgr->zombi_list[index] , &vseg->list ); spinlock_unlock( &mgr->lock ); } // release physical memory allocated for vseg descriptor if no MMAP type if( (type != VSEG_TYPE_ANON) && (type != VSEG_TYPE_FILE) && (type != VSEG_TYPE_REMOTE) ) { vseg_free( vseg ); } } ////////////////////////////////////////////// error_t vmm_map_kernel_vseg( vseg_t * vseg, uint32_t attr ) { vpn_t vpn; // VPN of PTE to be set vpn_t vpn_min; // VPN of first PTE to be set vpn_t vpn_max; // VPN of last PTE to be set (excluded) ppn_t ppn; // PPN of allocated physical page uint32_t order; // ln( number of small pages for one single PTE ) page_t * page; error_t error; // check vseg type : must be a kernel vseg uint32_t type = vseg->type; assert( ((type==VSEG_TYPE_KCODE) || (type==VSEG_TYPE_KDATA) || (type==VSEG_TYPE_KDEV)), __FUNCTION__ , "not a kernel vseg\n" ); // get pointer on page table gpt_t * gpt = &process_zero.vmm.gpt; // define number of small pages per PTE if( attr & GPT_SMALL ) order = 0; // 1 small page else order = 9; // 512 small pages // loop on pages in vseg vpn_min = vseg->vpn_base; vpn_max = vpn_min + vseg->vpn_size; for( vpn = vpn_min ; vpn < vpn_max ; vpn++ ) { // allocate a physical page from local PPM kmem_req_t req; req.type = KMEM_PAGE; req.size = order; req.flags = AF_KERNEL | AF_ZERO; page = (page_t *)kmem_alloc( &req ); if( page == NULL ) { printk("\n[ERROR] in %s : cannot allocate physical memory\n", __FUNCTION__ ); return ENOMEM; } // set page table entry ppn = ppm_page2ppn( page ); error = hal_gpt_set_pte( gpt , vpn , ppn , attr ); if( error ) { printk("\n[ERROR] in %s : cannot register PPE\n", __FUNCTION__ ); return ENOMEM; } } return 0; } ///////////////////////////////////////// void vmm_unmap_vseg( process_t * process, vseg_t * vseg ) { vpn_t vpn; // VPN of current PTE vpn_t vpn_min; // VPN of first PTE vpn_t vpn_max; // VPN of last PTE (excluded) // get pointer on process page table gpt_t * gpt = &process->vmm.gpt; // loop on pages in vseg vpn_min = vseg->vpn_base; vpn_max = vpn_min + vseg->vpn_size; for( vpn = vpn_min ; vpn < vpn_max ; vpn++ ) { hal_gpt_reset_pte( gpt , vpn ); } } ///////////////////////////////////////////// error_t vmm_resize_vseg( process_t * process, intptr_t base, intptr_t size ) { error_t error; // get pointer on process VMM vmm_t * vmm = &process->vmm; intptr_t addr_min = base; intptr_t addr_max = base + size; uint32_t shift = CONFIG_PPM_PAGE_SHIFT; // get pointer on vseg vseg_t * vseg = grdxt_lookup( &vmm->grdxt , (uint32_t)(base >> shift) ); if( vseg == NULL) return EINVAL; // get VMM lock protecting vsegs list rwlock_wr_lock( &vmm->vsegs_lock ); if( (vseg->min > addr_min) || (vseg->max < addr_max) ) // region not included in vseg { error = EINVAL; } else if( (vseg->min == addr_min) && (vseg->max == addr_max) ) // vseg must be removed { vmm_remove_vseg( vseg ); error = 0; } else if( vseg->min == addr_min ) // vseg must be resized { printk("\n[PANIC] in %s : resize not implemented yet\n", __FUNCTION__ ); hal_core_sleep(); error = 0; } else if( vseg->max == addr_max ) // vseg must be resized { printk("\n[PANIC] in %s : resize not implemented yet\n", __FUNCTION__ ); hal_core_sleep(); error = 0; } else // vseg cut in three regions => vseg must be resized & new vseg created { printk("\n[PANIC] in %s : resize not implemented yet\n", __FUNCTION__ ); hal_core_sleep(); error = 0; } // release VMM lock rwlock_wr_unlock( &vmm->vsegs_lock ); return error; } /////////////////////////////////////////// vseg_t * vmm_get_vseg( process_t * process, intptr_t vaddr ) { // get pointer on process VMM vmm_t * vmm = &process->vmm; // get lock protecting the vseg list rwlock_rd_lock( &vmm->vsegs_lock ); // get pointer on vseg from radix tree vseg_t * vseg = grdxt_lookup( &vmm->grdxt, (uint32_t)(vaddr >> CONFIG_PPM_PAGE_SHIFT) ); // release the lock rwlock_rd_unlock( &vmm->vsegs_lock ); return vseg; } ///////////////////////////////////////// error_t vmm_get_pte( process_t * process, vpn_t vpn, uint32_t * ret_attr, ppn_t * ret_ppn ) { vseg_t * vseg; // pointer on vseg containing VPN ppn_t ppn; // PPN from GPT entry uint32_t attr; // attributes from GPT entry error_t error; // this function must be called by a thread running in the reference cluster assert( (GET_CXY( process->ref_xp ) == local_cxy ) , __FUNCTION__ , " not called in the reference cluster\n" ); // get VMM pointer vmm_t * vmm = &process->vmm; // access GPT to get PTE attributes and PPN hal_gpt_get_pte( &vmm->gpt , vpn , &attr , &ppn ); // if PTE unmapped => allocate one small physical page to map it if( (attr & GPT_MAPPED) == 0 ) { // get vseg pointer vseg = vmm_get_vseg( process , vpn<pid , vpn ); return EINVAL; } // select the target cluster for physical mapping uint32_t target_cxy; if( vseg->flags & VSEG_DISTRIB ) // depends on VPN LSB { uint32_t x_width = LOCAL_CLUSTER->x_width; uint32_t y_width = LOCAL_CLUSTER->y_width; target_cxy = vpn & ((1<<(x_width + y_width)) - 1); } else // defined in vseg descriptor { target_cxy = vseg->cxy; } // allocate memory for page fault kmem_req_t req; page_t * page; if( target_cxy == local_cxy ) // target cluster is the local cluster { req.type = KMEM_PAGE; req.size = 0; req.flags = AF_NONE; page = (page_t *)kmem_alloc( &req ); error = ( page == NULL ) ? 1 : 0; ppn = ppm_page2ppn( page ); } else // target cluster is not the local cluster { rpc_pmem_get_pages_client( target_cxy , 0 , &error , &ppn ); } if( error ) { printk("\n[ERROR] in %s : cannot allocate memory / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); return ENOMEM; } // define GPT attributes from vseg flags attr = GPT_MAPPED | GPT_SMALL; if( vseg->flags & VSEG_USER ) attr |= GPT_USER; if( vseg->flags & VSEG_WRITE ) attr |= GPT_WRITABLE; if( vseg->flags & VSEG_EXEC ) attr |= GPT_EXECUTABLE; if( vseg->flags & VSEG_CACHE ) attr |= GPT_CACHABLE; // set the missing PTE in local VMM error = hal_gpt_set_pte( &vmm->gpt , vpn , ppn , attr ); if( error ) { printk("\n[ERROR] in %s : cannot register PTE / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); return ENOMEM; } } *ret_ppn = ppn; *ret_attr = attr; return 0; } /////////////////////////////////////////////////// error_t vmm_handle_page_fault( process_t * process, vseg_t * vseg, vpn_t vpn ) { uint32_t attr; // missing page attributes ppn_t ppn; // missing page PPN error_t error; // return value // get local VMM pointer vmm_t * vmm = &process->vmm; // get reference process cluster and local pointer cxy_t ref_cxy = GET_CXY( process->ref_xp ); process_t * ref_ptr = (process_t *)GET_PTR( process->ref_xp ); // get missing PTE attributes and PPN if( local_cxy != ref_cxy ) // local cluster is not the reference cluster { rpc_vmm_get_pte_client( ref_cxy , ref_ptr , vpn , &attr , &ppn , &error ); } else // local cluster is the reference cluster { error = vmm_get_pte( process , vpn , &attr , &ppn ); } // check page allocation error if( error ) { printk("\n[ERROR] in %s : cannot allocate memory / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); return ENOMEM; } // set the missing PTE in local VMM error = hal_gpt_set_pte( &vmm->gpt , vpn , attr , ppn ); if( error ) { printk("\n[ERROR] in %s : cannot register PTE / process = %x / vpn = %x\n", __FUNCTION__ , process->pid , vpn ); return ENOMEM; } return 0; } /////////////////////////////////////////// error_t vmm_v2p_translate( bool_t ident, void * ptr, paddr_t * paddr ) { process_t * process = CURRENT_THREAD->process; if( ident ) // identity mapping { *paddr = (paddr_t)PADDR( local_cxy , (lpa_t)ptr ); return 0; } // access page table error_t error; vpn_t vpn; uint32_t attr; ppn_t ppn; uint32_t offset; vpn = (vpn_t)( (intptr_t)ptr >> CONFIG_PPM_PAGE_SHIFT ); offset = (uint32_t)( ((intptr_t)ptr) & CONFIG_PPM_PAGE_MASK ); if( local_cxy == GET_CXY( process->ref_xp) ) // calling process is reference process { error = vmm_get_pte( process, vpn , &attr , &ppn ); } else // calling process is not reference process { cxy_t ref_cxy = GET_CXY( process->ref_xp ); process_t * ref_ptr = (process_t *)GET_PTR( process->ref_xp ); rpc_vmm_get_pte_client( ref_cxy , ref_ptr , vpn , &attr , &ppn , &error ); } // set paddr *paddr = (((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT) | offset; return error; } /* /////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////// error_t vmm_inval_shared_page( vseg_t *vseg, vma_t vaddr, ppn_t ppn) { pmm_page_info_t current; error_t err; error= pmm_get_page(&vseg->vmm->pmm, vaddr, ¤t); if((err) || (current.ppn != ppn)) goto ended; current.ppn = 0; current.attr = 0; current.cluster = NULL; error= pmm_set_page(&vseg->vmm->pmm, vaddr, ¤t); ended: return err; } error_t vmm_update_shared_page( vseg_t *vseg, vma_t vaddr, ppn_t ppn) { pmm_page_info_t current; error_t err; error= pmm_get_page(&vseg->vmm->pmm, vaddr, ¤t); if((err) || (current.attr != 0)) goto ended; current.ppn = ppn; current.attr = vseg->vm_pgprot; current.cluster = NULL; // this function is called after invalidate one error= pmm_set_page(&vseg->vmm->pmm, vaddr , ¤t); ended: return err; } // Hypothesis: the vseg is shared-anon, mapper list is rdlocked, page is locked error_t vmm_migrate_shared_page_seq( vseg_t *vseg, struct page_s *page, struct page_s **new) { register vseg_t *reg; register struct process_s *process; register struct process_s *this_process; struct page_s *new_pg; struct list_entry *iter; kmem_req_t req; vma_t vaddr; ppn_t ppn; error_t err; vaddr = (page->index << PMM_PAGE_SHIFT) + vseg->vm_start + vseg->vm_offset; ppn = ppm_page2ppn(page); this_process = (new == NULL) ? NULL : current_process; iter = &vseg->vm_shared_list; error = ECANCELED; // Invalidate All do { reg = list_element(iter, vseg_t, vm_shared_list); process = vmm_get_process(reg->vmm); if(process != this_process) { error= vmm_inval_shared_page(reg, vaddr, ppn); if(err) goto fail_inval; } assert(vseg->vm_mapper.m_home_cid == current_cid); iter = list_next(&vseg->vm_mapper.m_reg_root, iter); }while(iter != NULL); req.type = KMEM_PAGE; req.size = 0; req.excep_code = AF_USER; new_pg = kmem_alloc(&req); *new = new_pg; if(new_pg == NULL) { error= ENOMEM; goto fail_alloc; } page_copy(new_pg, page); page_lock(new_pg); new_pg->mapper = page->mapper; new_pg->index = page->index; // TODO: do the complet job regading dirty page if(PAGE_IS(page, PG_DIRTY)) PAGE_SET(new_pg, PG_DIRTY); ppn = ppm_page2ppn(new_pg); iter = &vseg->vm_shared_list; // Update All do { reg = list_element(iter, vseg_t, vm_shared_list); process = vmm_get_process(reg->vmm); if(process != this_process) (void) vmm_update_shared_page(reg, vaddr, ppn); assert(vseg->vm_mapper.m_home_cid == current_cid); iter = list_next(&vseg->vm_mapper.m_reg_root, iter); }while(iter != NULL); page_unlock(new_pg); fail_alloc: fail_inval: return err; } //TODO: revisit all manipulation of the page->refcount /////////////////////////////////////////////////////////////// static inline error_t vmm_do_migrate( vseg_t * vseg, pmm_page_info_t * pinfo, uint32_t vaddr ) { kmem_req_t req; pmm_page_info_t current; page_t * newpage; cluster_t * cluster; thread_t * this; error_t err; ppn_t ppn; assert( pinfo->ppn != 0 ); ppn = pinfo->ppn; this = current_thread; newpage = NULL; cluster = current_cluster; current.attr = 0; current.ppn = 0; error= pmm_lock_page(&vseg->vmm->pmm, vaddr, ¤t); if(error|| (current.isAtomic == false) || (current.ppn != ppn) || !(current.attr & PMM_MIGRATE)) { #if CONFIG_SHOW_SPURIOUS_PGFAULT printk(INFO, "%s: pid %d, tid %d, cpu %d, nothing to do for vaddr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr); #endif this->info.spurious_pgfault_cntr ++; pmm_unlock_page(&vseg->vmm->pmm, vaddr, ¤t); pmm_tlb_flush_vaddr(vaddr, PMM_DATA); return 0; } if(!ppn_is_local(ppn)) { req.type = KMEM_PAGE; req.size = 0; req.excep_code = AF_PGFAULT; newpage = kmem_alloc(&req); if(newpage) { newpage->mapper = NULL;//? ppn_copy(ppm_page2ppn(newpage), ppn); if(current.attr & PMM_COW) { current.attr |= PMM_WRITE; current.attr &= ~(PMM_COW); } current.ppn = ppm_page2ppn(newpage); } } current.attr |= PMM_PRESENT; current.attr &= ~(PMM_MIGRATE); current.attr &= ~(PMM_LOCKED); current.cluster = NULL; //also unlock the table entry error= pmm_set_page(&vseg->vmm->pmm, vaddr, ¤t); if(err) { // TODO: we should differ the kmem_free call //page_unlock(page); (void)pmm_unlock_page(&vseg->vmm->pmm, vaddr, ¤t); req.ptr = newpage; kmem_free(&req); return err; } if(newpage) { ppn_refcount_down(ppn); current_thread->info.remote_pages_cntr ++; #if CONFIG_SHOW_REMOTE_PGALLOC printk(INFO, "%s: pid %d, tid %x, cpu %d, cid %d: got new remote page from cluster %d (vaddr %x)\n", __FUNCTION__, current_process->pid, current_thread, cpu_get_id(), cluster->id, newpage->cid, vaddr); #endif } #if CONFIG_SHOW_VMMMGRT_MSG printk(INFO, "%s: pid %d, tid %d, cpu %d: Asked to migrate page (vaddr %x) from cluster %d to cluster %d, error%d\n", __FUNCTION__, current_process->pid, current_thread->info.order, cpu_get_id(), vaddr, ppn_ppn2cid(ppn), cluster->id, err); #endif return err; } error_t vmm_do_cow( vseg_t *vseg, pmm_page_info_t *pinfo, uint32_t vaddr) { register struct page_s *newpage; register struct page_s *page; register struct thread_s *this; register error_t err; register uint32_t count; register bool_t isCountDown; pmm_page_info_t old; pmm_page_info_t new; kmem_req_t req; this = current_thread; old.attr = 0; newpage = NULL; isCountDown = true; vmm_dmsg(2,"%s: pid %d, tid %d, cpu %d, vaddr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr); error= pmm_lock_page(&vseg->vmm->pmm, vaddr, &old); //TODO: check this condition if(error|| (old.isAtomic == false) || !(old.attr & PMM_COW)) { #if CONFIG_SHOW_SPURIOUS_PGFAULT printk(INFO, "%s: pid %d, tid %d, cpu %d, nothing to do for vaddr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr); #endif this->info.spurious_pgfault_cntr ++; pmm_tlb_flush_vaddr(vaddr, PMM_DATA); pmm_unlock_page(&vseg->vmm->pmm, vaddr, &old); return err; //goto VMM_COW_END; } //if the ppn is local and the others (processus with wich we share the page) //has done cow, then use the old.ppn directly if(ppn_is_local(old.ppn)) { page = ppm_ppn2page(¤t_cluster->ppm, old.ppn); if(page->mapper == NULL) { count = page_refcount_get(page); if(count == 1) { newpage = page;//don't copy the page. use it directly. isCountDown = false; vmm_dmsg(2, "%s: pid %d, tid %d, cpu %d, reuse same page for vaddr %x, pg_addr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr, ppm_page2addr(page)); } } //else: we need to do the cow even if it's local! } //else: alocate newpage and copy the data from the remote node //also defcount down the ppn if(newpage == NULL) { req.type = KMEM_PAGE; req.size = 0; req.excep_code = AF_PGFAULT; if((newpage = kmem_alloc(&req)) == NULL) { (void)pmm_unlock_page(&vseg->vmm->pmm, vaddr, &old); return ENOMEM; } newpage->mapper = NULL; ppn_copy(ppm_page2ppn(newpage), old.ppn); assert(isCountDown); vmm_dmsg(2, "%s: pid %d, tid %d, cpu %d, newpage for vaddr %x, pg_addr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr, ppm_page2addr(newpage)); if(newpage->cid != current_cid) this->info.remote_pages_cntr ++; } new.attr = vseg->vm_pgprot | PMM_WRITE; new.attr &= ~(PMM_COW | PMM_MIGRATE); new.ppn = ppm_page2ppn(newpage); new.cluster = NULL; //this also unlock the table entry (if no error) error= pmm_set_page(&vseg->vmm->pmm, vaddr, &new); if(err) { (void)pmm_unlock_page(&vseg->vmm->pmm, vaddr, &old); req.ptr = newpage; kmem_free(&req); vmm_dmsg(3, "%s: ended [ error%d ]\n", __FUNCTION__, err); return err; } if(isCountDown) ppn_refcount_down(old.ppn); vmm_dmsg(2, "%s, pid %d, tid %d, cpu %d, COW ended [vaddr %x]\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr); return 0; } //refcount is taken on the file at mmap static inline error_t vmm_do_mapped( vseg_t *vseg, uint32_t vaddr, uint32_t excep_code) { ppn_t ppn; error_t err; uint32_t index; bool_t isDone; pmm_page_info_t info; pmm_page_info_t current; struct thread_s *this; this = current_thread; current.attr = 1; current.ppn = 1; isDone = false; error= pmm_lock_page(&vseg->vmm->pmm, vaddr, ¤t); if(err) return err; if((current.isAtomic == false) || (current.attr != 0)) { #if CONFIG_SHOW_SPURIOUS_PGFAULT printk(INFO, "%s: pid %d, tid %d, cpu %d, nothing to do for vaddr %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), vaddr); #endif this->info.spurious_pgfault_cntr ++; pmm_tlb_flush_vaddr(vaddr, PMM_DATA); return 0; } index = ((vaddr - vseg->vm_start) + vseg->vm_offset) >> PMM_PAGE_SHIFT; //also hold a refcount! ppn = mapper_get_ppn(&vseg->vm_mapper, index, MAPPER_SYNC_OP); if(!ppn) { error= pmm_unlock_page(&vseg->vmm->pmm, vaddr, ¤t); assert(!err); //FIXME: liberate the ppn ... return (VFS_FILE_IS_NULL(vseg->vm_file)) ? EIO : ENOMEM; } info.attr = vseg->vm_pgprot; info.ppn = ppn; info.cluster = NULL; //also unlock the page error= pmm_set_page(&vseg->vmm->pmm, vaddr, &info); assert(!err);//FIXME: liberate the ppn and unlock the table entry ... //error= pmm_unlock_page(&vseg->vmm->pmm, vaddr, ¤t); return err; } ///////////////////////////////////////////////////// static inline error_t vmm_do_aod( vseg_t *vseg, uint32_t vaddr) { register error_t err; register struct page_s *page; register struct cluster_s *cluster; struct thread_s *this; pmm_page_info_t old; pmm_page_info_t new; kmem_req_t req; page = NULL; old.attr = 0; this = current_thread; error= pmm_lock_page(&vseg->vmm->pmm, vaddr, &old); if(err) return err; if(old.isAtomic == false) { this->info.spurious_pgfault_cntr ++; pmm_tlb_flush_vaddr(vaddr, PMM_DATA); return 0; } req.type = KMEM_PAGE; req.size = 0; req.excep_code = AF_PGFAULT | AF_ZERO; if((page = kmem_alloc(&req)) == NULL) { (void)pmm_unlock_page(&vseg->vmm->pmm, vaddr, &old); return ENOMEM; } page->mapper = NULL; new.attr = vseg->vm_pgprot; new.ppn = ppm_page2ppn(page); new.cluster = NULL; error= pmm_set_page(&vseg->vmm->pmm, vaddr, &new); if(err) goto fail_set_pg; cluster = current_cluster; if(page->cid != cluster->id) this->info.remote_pages_cntr ++; return 0; fail_set_pg: (void)pmm_unlock_page(&vseg->vmm->pmm, vaddr, &old); req.ptr = page; kmem_free(&req); vmm_dmsg(3, "%s: ended [ error%d ]\n", __FUNCTION__, err); return err; } VSEGION_PAGE_FAULT(vmm_default_pagefault) { register struct thread_s *this; register error_t err; pmm_page_info_t info; if((error= pmm_get_page(&vseg->vmm->pmm, vaddr, &info))) return err; if((info.attr != 0) && (info.ppn != 0)) { if((info.attr & PMM_COW) && pmm_except_isWrite(excep_code)) { error= vmm_do_cow(vseg, &info, vaddr); return err; } if(info.attr & PMM_MIGRATE) return vmm_do_migrate(vseg, &info, vaddr); if(info.attr & PMM_PRESENT) { this = current_thread; #if CONFIG_SHOW_SPURIOUS_PGFAULT printk(WARNING, "WARNING: %s: pid %d, tid %d, cpu %d, excep_code %x but vaddr is valid %x, attr %x, ppn %x\n", __FUNCTION__, this->process->pid, this->info.order, cpu_get_id(), excep_code, vaddr, info.attr, info.ppn); #endif current_thread->info.spurious_pgfault_cntr ++; pmm_tlb_flush_vaddr(vaddr, PMM_UNKNOWN); return 0; } current_thread->info.spurious_pgfault_cntr ++; pmm_tlb_flush_vaddr(vaddr, PMM_UNKNOWN); return 0; #if 0 #if CONFIG_SHOW_VMM_ERROR_MSG printk(ERROR, "ERROR: %s: pid %d, cpu %d, Unexpected page attributes configuration for vaddr %x, found: ppn %x, attr %x\n", __FUNCTION__, current_process->pid, cpu_get_id(), vaddr, info.ppn, info.attr); #endif return EPERM; #endif } if(!MAPPER_IS_NULL(vseg->vm_mapper)) return vmm_do_mapped(vseg, vaddr, excep_code); return vmm_do_aod(vseg, vaddr); } */