/* * ppm.c - Per-cluster Physical Pages Manager implementation * * Authors Ghassan Almaless (2008,2009,2010,2011,2012) * Alain Greiner (2016,2017,2018) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH.is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH.is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH.; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //////////////////////////////////////////////////////////////////////////////////////// // functions to translate [ page <-> base <-> ppn ] //////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////// inline bool_t ppm_page_is_valid( page_t * page ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; uint32_t pgnr = (uint32_t)( page - ppm->pages_tbl ); return (pgnr <= ppm->pages_nr); } ///////////////////////////////////////////// inline xptr_t ppm_page2base( xptr_t page_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t page_cxy = GET_CXY( page_xp ); page_t * page_ptr = GET_PTR( page_xp ); void * base_ptr = ppm->vaddr_base + ((page_ptr - ppm->pages_tbl)<ppm; cxy_t base_cxy = GET_CXY( base_xp ); void * base_ptr = GET_PTR( base_xp ); page_t * page_ptr = ppm->pages_tbl + ((base_ptr - ppm->vaddr_base)>>CONFIG_PPM_PAGE_SHIFT); return XPTR( base_cxy , page_ptr ); } // end ppm_base2page() /////////////////////////////////////////// inline ppn_t ppm_page2ppn( xptr_t page_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t page_cxy = GET_CXY( page_xp ); page_t * page_ptr = GET_PTR( page_xp ); paddr_t paddr = PADDR( page_cxy , (page_ptr - ppm->pages_tbl)<> CONFIG_PPM_PAGE_SHIFT); } // end hal_page2ppn() /////////////////////////////////////// inline xptr_t ppm_ppn2page( ppn_t ppn ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; paddr_t paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT; cxy_t cxy = CXY_FROM_PADDR( paddr ); lpa_t lpa = LPA_FROM_PADDR( paddr ); return XPTR( cxy , &ppm->pages_tbl[lpa>>CONFIG_PPM_PAGE_SHIFT] ); } // end hal_ppn2page /////////////////////////////////////// inline xptr_t ppm_ppn2base( ppn_t ppn ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; paddr_t paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT; cxy_t cxy = CXY_FROM_PADDR( paddr ); lpa_t lpa = LPA_FROM_PADDR( paddr ); return XPTR( cxy , (void *)ppm->vaddr_base + lpa ); } // end ppm_ppn2base() /////////////////////////////////////////// inline ppn_t ppm_base2ppn( xptr_t base_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t base_cxy = GET_CXY( base_xp ); void * base_ptr = GET_PTR( base_xp ); paddr_t paddr = PADDR( base_cxy , (base_ptr - ppm->vaddr_base) ); return (ppn_t)(paddr >> CONFIG_PPM_PAGE_SHIFT); } // end ppm_base2ppn() //////////////////////////////////////////////////////////////////////////////////////// // functions to allocate / release physical pages //////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////// void ppm_free_pages_nolock( page_t * page ) { page_t * buddy; // searched buddy page descriptor uint32_t buddy_index; // buddy page index page_t * current; // current (merged) page descriptor uint32_t current_index; // current (merged) page index uint32_t current_order; // current (merged) page order ppm_t * ppm = &LOCAL_CLUSTER->ppm; page_t * pages_tbl = ppm->pages_tbl; assert( !page_is_flag( page , PG_FREE ) , "page already released : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) ); assert( !page_is_flag( page , PG_RESERVED ) , "reserved page : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) ); // update released page descriptor flags page_set_flag( page , PG_FREE ); // search the buddy page descriptor // - merge with current page descriptor if found // - exit to release the current page descriptor if not found current = page , current_index = (uint32_t)(page - ppm->pages_tbl); for( current_order = page->order ; current_order < CONFIG_PPM_MAX_ORDER ; current_order++ ) { buddy_index = current_index ^ (1 << current_order); buddy = pages_tbl + buddy_index; if( !page_is_flag( buddy , PG_FREE ) || (buddy->order != current_order) ) break; // remove buddy from free list list_unlink( &buddy->list ); ppm->free_pages_nr[current_order] --; // merge buddy with current buddy->order = 0; current_index &= buddy_index; } // update merged page descriptor order current = pages_tbl + current_index; current->order = current_order; // insert current in free list list_add_first( &ppm->free_pages_root[current_order] , ¤t->list ); ppm->free_pages_nr[current_order] ++; } // end ppm_free_pages_nolock() //////////////////////////////////////////// page_t * ppm_alloc_pages( uint32_t order ) { uint32_t current_order; page_t * remaining_block; uint32_t current_size; #if DEBUG_PPM_ALLOC_PAGES thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_PPM_ALLOC_PAGES < cycle ) printk("\n[%s] thread[%x,%x] enter for %d page(s) / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<ppm; // check order assert( (order < CONFIG_PPM_MAX_ORDER) , "illegal order argument = %d\n" , order ); page_t * block = NULL; // take lock protecting free lists busylock_acquire( &ppm->free_lock ); // find a free block equal or larger to requested size for( current_order = order ; current_order < CONFIG_PPM_MAX_ORDER ; current_order ++ ) { if( !list_is_empty( &ppm->free_pages_root[current_order] ) ) { block = LIST_FIRST( &ppm->free_pages_root[current_order] , page_t , list ); list_unlink( &block->list ); break; } } if( block == NULL ) // return failure { // release lock protecting free lists busylock_release( &ppm->free_lock ); #if DEBUG_PPM_ALLOC_PAGES cycle = (uint32_t)hal_get_cycles(); if( DEBUG_PPM_ALLOC_PAGES < cycle ) printk("\n[%s] thread[%x,%x] cannot allocate %d page(s) / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<free_pages_nr[current_order] --; current_size = (1 << current_order); // split the removed block in smaller sub-blocks if required // and update the free-lists accordingly while( current_order > order ) { current_order --; current_size >>= 1; remaining_block = block + current_size; remaining_block->order = current_order; list_add_first( &ppm->free_pages_root[current_order] , &remaining_block->list ); ppm->free_pages_nr[current_order] ++; } // update page descriptor page_clear_flag( block , PG_FREE ); page_refcount_up( block ); block->order = order; // release lock protecting free lists busylock_release( &ppm->free_lock ); // update DQDT dqdt_increment_pages( order ); #if DEBUG_PPM_ALLOC_PAGES cycle = (uint32_t)hal_get_cycles(); if( DEBUG_PPM_ALLOC_PAGES < cycle ) printk("\n[%s] thread[%x,%x] exit for %d page(s) / ppn = %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<ppm; #if DEBUG_PPM_FREE_PAGES uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_PPM_FREE_PAGES < cycle ) printk("\n[%s] thread[%x,%x] enter for %d page(s) / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<order, ppm_page2ppn(XPTR(local_cxy , page)), cycle ); #endif #if(DEBUG_PPM_FREE_PAGES & 0x1) if( DEBUG_PPM_FREE_PAGES < cycle ) ppm_print("enter ppm_free_pages"); #endif // get lock protecting free_pages[] array busylock_acquire( &ppm->free_lock ); ppm_free_pages_nolock( page ); // release lock protecting free_pages[] array busylock_release( &ppm->free_lock ); // update DQDT dqdt_decrement_pages( page->order ); #if DEBUG_PPM_FREE_PAGES cycle = (uint32_t)hal_get_cycles(); if( DEBUG_PPM_FREE_PAGES < cycle ) printk("\n[%s] thread[%x,%x] exit for %d page(s) / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<order, ppm_page2ppn(XPTR(local_cxy , page)), cycle ); #endif #if(DEBUG_PPM_FREE_PAGES & 0x1) if( DEBUG_PPM_FREE_PAGES < cycle ) ppm_print("exit ppm_free_pages"); #endif } // end ppm_free_pages() /////////////////////////////// void ppm_print( char * string ) { uint32_t order; list_entry_t * iter; page_t * page; ppm_t * ppm = &LOCAL_CLUSTER->ppm; // get lock protecting free lists busylock_acquire( &ppm->free_lock ); printk("\n*** PPM in cluster %x / %s / %d pages ***\n", local_cxy , string, ppm->pages_nr ); for( order = 0 ; order < CONFIG_PPM_MAX_ORDER ; order++ ) { printk("- order = %d / free_pages = %d\t: ", order , ppm->free_pages_nr[order] ); LIST_FOREACH( &ppm->free_pages_root[order] , iter ) { page = LIST_ELEMENT( iter , page_t , list ); printk("%x," , page - ppm->pages_tbl ); } printk("\n"); } // release lock protecting free lists busylock_release( &ppm->free_lock ); } /////////////////////////////////////// error_t ppm_assert_order( ppm_t * ppm ) { uint32_t order; list_entry_t * iter; page_t * page; for( order=0 ; order < CONFIG_PPM_MAX_ORDER ; order++ ) { if( list_is_empty( &ppm->free_pages_root[order] ) ) continue; LIST_FOREACH( &ppm->free_pages_root[order] , iter ) { page = LIST_ELEMENT( iter , page_t , list ); if( page->order != order ) return -1; } } return 0; } ////////////////////////////////////////////////////////////////////////////////////// // functions to handle dirty physical pages ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////// bool_t ppm_page_do_dirty( xptr_t page_xp ) { bool_t done = false; // get page cluster and local pointer page_t * page_ptr = GET_PTR( page_xp ); cxy_t page_cxy = GET_CXY( page_xp ); // get local pointer on PPM (same in all clusters) ppm_t * ppm = &LOCAL_CLUSTER->ppm; // build extended pointers on page lock, page flags, and PPM dirty list lock xptr_t page_lock_xp = XPTR( page_cxy , &page_ptr->lock ); xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags ); xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock ); // lock the remote PPM dirty_list remote_queuelock_acquire( dirty_lock_xp ); // lock the remote page remote_busylock_acquire( page_lock_xp ); // get remote page flags uint32_t flags = hal_remote_l32( page_flags_xp ); if( (flags & PG_DIRTY) == 0 ) { // set dirty flag in page descriptor hal_remote_s32( page_flags_xp , flags | PG_DIRTY ); // The PPM dirty list is a LOCAL list !!! // We must update 4 pointers to insert a new page in this list. // We can use the standard LIST API when the page is local, // but we cannot use the standard API if the page is remote... if( page_cxy == local_cxy ) // locally update the PPM dirty list { list_add_first( &ppm->dirty_root , &page_ptr->list ); } else // remotely update the PPM dirty list { // get local and remote pointers on "root" list entry list_entry_t * root = &ppm->dirty_root; xptr_t root_xp = XPTR( page_cxy , root ); // get local and remote pointers on "page" list entry list_entry_t * list = &page_ptr->list; xptr_t list_xp = XPTR( page_cxy , list ); // get local and remote pointers on first dirty page list_entry_t * dirt = hal_remote_lpt( XPTR( page_cxy, &root->next ) ); xptr_t dirt_xp = XPTR( page_cxy , dirt ); // set root.next, list.next, list pred, curr.pred in remote cluster hal_remote_spt( root_xp , list ); hal_remote_spt( list_xp , dirt ); hal_remote_spt( list_xp + sizeof(intptr_t) , root ); hal_remote_spt( dirt_xp + sizeof(intptr_t) , list ); } done = true; } // unlock the remote page remote_busylock_release( page_lock_xp ); // unlock the remote PPM dirty_list remote_queuelock_release( dirty_lock_xp ); return done; } // end ppm_page_do_dirty() //////////////////////////////////////////// bool_t ppm_page_undo_dirty( xptr_t page_xp ) { bool_t done = false; // get page cluster and local pointer page_t * page_ptr = GET_PTR( page_xp ); cxy_t page_cxy = GET_CXY( page_xp ); // get local pointer on PPM (same in all clusters) ppm_t * ppm = &LOCAL_CLUSTER->ppm; // build extended pointers on page lock, page flags, and PPM dirty list lock xptr_t page_lock_xp = XPTR( page_cxy , &page_ptr->lock ); xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags ); xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock ); // lock the remote PPM dirty_list remote_queuelock_acquire( XPTR( page_cxy , &ppm->dirty_lock ) ); // lock the remote page remote_busylock_acquire( page_lock_xp ); // get remote page flags uint32_t flags = hal_remote_l32( page_flags_xp ); if( (flags & PG_DIRTY) ) // page is dirty { // reset dirty flag in page descriptor hal_remote_s32( page_flags_xp , flags & (~PG_DIRTY) ); // The PPM dirty list is a LOCAL list !!! // We must update 4 pointers to remove a page from this list. // we can use the standard LIST API when the page is local, // but we cannot use the standard API if the page is remote... if( page_cxy == local_cxy ) // locally update the PPM dirty list { list_unlink( &page_ptr->list ); } else // remotely update the PPM dirty list { // get local and remote pointers on "page" list entry list_entry_t * list = &page_ptr->list; xptr_t list_xp = XPTR( page_cxy , list ); // get local and remote pointers on "next" page list entry list_entry_t * next = hal_remote_lpt( list_xp ); xptr_t next_xp = XPTR( page_cxy , next ); // get local and remote pointers on "pred" page list entry list_entry_t * pred = hal_remote_lpt( list_xp + sizeof(intptr_t) ); xptr_t pred_xp = XPTR( page_cxy , pred ); // set root.next, list.next, list pred, curr.pred in remote cluster hal_remote_spt( pred_xp , next ); hal_remote_spt( list_xp , NULL ); hal_remote_spt( list_xp + sizeof(intptr_t) , NULL ); hal_remote_spt( next_xp + sizeof(intptr_t) , pred ); } done = true; } // unlock the remote page remote_busylock_release( page_lock_xp ); // unlock the remote PPM dirty_list remote_queuelock_release( dirty_lock_xp ); return done; } // end ppm_page_undo_dirty() ///////////////////////////////// void ppm_sync_dirty_pages( void ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; // get local pointer on PPM dirty_root list_entry_t * dirty_root = &ppm->dirty_root; // build extended pointer on PPM dirty_lock xptr_t dirty_lock_xp = XPTR( local_cxy , &ppm->dirty_lock ); // get the PPM dirty_list lock remote_queuelock_acquire( dirty_lock_xp ); while( !list_is_empty( &ppm->dirty_root ) ) { page_t * page = LIST_FIRST( dirty_root , page_t , list ); xptr_t page_xp = XPTR( local_cxy , page ); // build extended pointer on page lock xptr_t page_lock_xp = XPTR( local_cxy , &page->lock ); // get the page lock remote_busylock_acquire( page_lock_xp ); // sync the page vfs_fs_move_page( page_xp , false ); // from mapper to device // release the page lock remote_busylock_release( page_lock_xp ); } // release the PPM dirty_list lock remote_queuelock_release( dirty_lock_xp ); } // end ppm_sync_dirty_pages()