/* * ppm.c - Physical Pages Manager implementation * * Authors Ghassan Almaless (2008,2009,2010,2011,2012) * Alain Greiner (2016,2017,2018,2019) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH.is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH.is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH.; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //////////////////////////////////////////////////////////////////////////////////////// // global variables //////////////////////////////////////////////////////////////////////////////////////// extern chdev_directory_t chdev_dir; // allocated in kernel_init.c //////////////////////////////////////////////////////////////////////////////////////// // functions to translate [ page <-> base <-> ppn ] //////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////// inline xptr_t ppm_page2base( xptr_t page_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t page_cxy = GET_CXY( page_xp ); page_t * page_ptr = GET_PTR( page_xp ); void * base_ptr = ppm->vaddr_base + ((page_ptr - ppm->pages_tbl)<ppm; cxy_t base_cxy = GET_CXY( base_xp ); void * base_ptr = GET_PTR( base_xp ); page_t * page_ptr = ppm->pages_tbl + ((base_ptr - ppm->vaddr_base)>>CONFIG_PPM_PAGE_SHIFT); return XPTR( base_cxy , page_ptr ); } // end ppm_base2page() /////////////////////////////////////////// inline ppn_t ppm_page2ppn( xptr_t page_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t page_cxy = GET_CXY( page_xp ); page_t * page_ptr = GET_PTR( page_xp ); paddr_t paddr = PADDR( page_cxy , (page_ptr - ppm->pages_tbl)<> CONFIG_PPM_PAGE_SHIFT); } // end hal_page2ppn() /////////////////////////////////////// inline xptr_t ppm_ppn2page( ppn_t ppn ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; paddr_t paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT; cxy_t cxy = CXY_FROM_PADDR( paddr ); lpa_t lpa = LPA_FROM_PADDR( paddr ); return XPTR( cxy , &ppm->pages_tbl[lpa>>CONFIG_PPM_PAGE_SHIFT] ); } // end hal_ppn2page /////////////////////////////////////// inline xptr_t ppm_ppn2base( ppn_t ppn ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; paddr_t paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT; cxy_t cxy = CXY_FROM_PADDR( paddr ); lpa_t lpa = LPA_FROM_PADDR( paddr ); return XPTR( cxy , (void *)ppm->vaddr_base + lpa ); } // end ppm_ppn2base() /////////////////////////////////////////// inline ppn_t ppm_base2ppn( xptr_t base_xp ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; cxy_t base_cxy = GET_CXY( base_xp ); void * base_ptr = GET_PTR( base_xp ); paddr_t paddr = PADDR( base_cxy , (base_ptr - ppm->vaddr_base) ); return (ppn_t)(paddr >> CONFIG_PPM_PAGE_SHIFT); } // end ppm_base2ppn() //////////////////////////////////////////////////////////////////////////////////////// // functions to allocate / release physical pages //////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////// void ppm_free_pages_nolock( page_t * page ) { page_t * buddy; // searched buddy page descriptor uint32_t buddy_index; // buddy page index in page_tbl[] page_t * current; // current (merged) page descriptor uint32_t current_index; // current (merged) page index in page_tbl[] uint32_t current_order; // current (merged) page order ppm_t * ppm = &LOCAL_CLUSTER->ppm; page_t * pages_tbl = ppm->pages_tbl; assert( !page_is_flag( page , PG_FREE ) , "page already released : ppn = %x\n" , ppm_page2ppn( XPTR( local_cxy , page ) ) ); assert( !page_is_flag( page , PG_RESERVED ) , "reserved page : ppn = %x\n" , ppm_page2ppn( XPTR( local_cxy , page ) ) ); // set FREE flag in released page descriptor page_set_flag( page , PG_FREE ); // initialise loop variables current = page; current_order = page->order; current_index = page - ppm->pages_tbl; // search the buddy page descriptor // - merge with current page if buddy found // - exit to release the current page when buddy not found while( current_order < CONFIG_PPM_MAX_ORDER ) { // compute buddy page index and page descriptor buddy_index = current_index ^ (1 << current_order); buddy = pages_tbl + buddy_index; // exit loop if buddy not found in current free list if( !page_is_flag( buddy , PG_FREE ) || (buddy->order != current_order) ) break; // remove buddy page from current free_list list_unlink( &buddy->list ); ppm->free_pages_nr[current_order] --; // reset order field in buddy page descriptor buddy->order = 0; // compute next (merged) page index in page_tbl[] current_index &= buddy_index; // compute next (merged) page order current_order++; // compute next (merged) page descripror current = pages_tbl + current_index; } // update order field for merged page descriptor current->order = current_order; // insert merged page in relevant free list list_add_first( &ppm->free_pages_root[current_order] , ¤t->list ); ppm->free_pages_nr[current_order] ++; } // end ppm_free_pages_nolock() //////////////////////////////////////////// page_t * ppm_alloc_pages( uint32_t order ) { page_t * current_block; uint32_t current_order; uint32_t current_size; page_t * found_block; thread_t * this = CURRENT_THREAD; #if DEBUG_PPM_ALLOC_PAGES uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if (DEBUG_PPM_ALLOC_PAGES & 1) if( DEBUG_PPM_ALLOC_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] enter for %d page(s) in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<ppm; // check order assert( (order < CONFIG_PPM_MAX_ORDER) , "illegal order argument = %d\n" , order ); //build extended pointer on lock protecting remote PPM xptr_t lock_xp = XPTR( local_cxy , &ppm->free_lock ); // take lock protecting free lists remote_busylock_acquire( lock_xp ); current_block = NULL; current_order = order; // search a free block equal or larger than requested size while( current_order < CONFIG_PPM_MAX_ORDER ) { // get local pointer on the root of relevant free_list (same in all clusters) list_entry_t * root = &ppm->free_pages_root[current_order]; if( !list_is_empty( root ) ) { // get first free block in this free_list current_block = LIST_FIRST( root , page_t , list ); // remove this block from this free_list list_unlink( ¤t_block->list ); ppm->free_pages_nr[current_order] --; // register pointer on found block found_block = current_block; // compute found block size current_size = (1 << current_order); break; } // increment loop index current_order++; } if( current_block == NULL ) // return failure if no free block found { // release lock protecting free lists remote_busylock_release( lock_xp ); printk("\n[%s] thread[%x,%x] cannot allocate %d page(s) in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, 1< order ) { // update size and order current_order --; current_size >>= 1; // update order fiels in new free block current_block = found_block + current_size; current_block->order = current_order; // insert new free block in relevant free_list list_add_first( &ppm->free_pages_root[current_order] , ¤t_block->list ); ppm->free_pages_nr[current_order] ++; } // update found block page descriptor page_clear_flag( found_block , PG_FREE ); page_refcount_up( found_block ); found_block->order = order; // release lock protecting free lists remote_busylock_release( lock_xp ); // update DQDT dqdt_increment_pages( local_cxy , order ); #if DEBUG_PPM_ALLOC_PAGES if( DEBUG_PPM_ALLOC_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] allocated %d page(s) in cluster %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<ppm; #if DEBUG_PPM_FREE_PAGES thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if ( DEBUG_PPM_FREE_PAGES & 1 ) if( DEBUG_PPM_FREE_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] enter for %d page(s) in cluster %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<order, local_cxy, ppm_page2ppn(XPTR(local_cxy , page)), cycle ); ppm_remote_display( local_cxy ); #endif //build extended pointer on lock protecting free_lists xptr_t lock_xp = XPTR( local_cxy , &ppm->free_lock ); // get lock protecting free_pages[] array remote_busylock_acquire( lock_xp ); ppm_free_pages_nolock( page ); // release lock protecting free_lists remote_busylock_release( lock_xp ); // update DQDT dqdt_decrement_pages( local_cxy , page->order ); #if DEBUG_PPM_FREE_PAGES if( DEBUG_PPM_FREE_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] released %d page(s) in cluster %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<order, local_cxy, ppm_page2ppn(XPTR(local_cxy , page)) , cycle ); ppm_remote_display( local_cxy ); } #endif } // end ppm_free_pages() ///////////////////////////////////////////// void * ppm_remote_alloc_pages( cxy_t cxy, uint32_t order ) { uint32_t current_order; uint32_t current_size; page_t * current_block; page_t * found_block; thread_t * this = CURRENT_THREAD; #if DEBUG_PPM_REMOTE_ALLOC_PAGES uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if ( DEBUG_PPM_REMOTE_ALLOC_PAGES & 1 ) if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] enter for %d small page(s) in cluster %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<ppm; //build extended pointer on lock protecting remote PPM xptr_t lock_xp = XPTR( cxy , &ppm->free_lock ); // take lock protecting free lists in remote cluster remote_busylock_acquire( lock_xp ); current_block = NULL; current_order = order; // search a free block equal or larger than requested size while( current_order < CONFIG_PPM_MAX_ORDER ) { // get local pointer on the root of relevant free_list (same in all clusters) list_entry_t * root = &ppm->free_pages_root[current_order]; if( !list_remote_is_empty( cxy , root ) ) // list non empty => success { // get local pointer on first free page descriptor in remote cluster current_block = LIST_REMOTE_FIRST( cxy, root , page_t , list ); // remove first free page from the free-list in remote cluster list_remote_unlink( cxy , ¤t_block->list ); hal_remote_atomic_add( XPTR( cxy , &ppm->free_pages_nr[current_order] ), -1 ); // register found block found_block = current_block; // compute found block size current_size = (1 << current_order); break; } // increment loop index current_order++; } if( current_block == NULL ) // return failure { // release lock protecting free lists remote_busylock_release( lock_xp ); printk("\n[ERROR] in %s : thread[%x,%x] cannot allocate %d page(s) in cluster %x\n", __FUNCTION__, this->process->pid, this->trdid, 1< order ) { // update order and size current_order --; current_size >>= 1; // update new free block order field in remote cluster current_block = found_block + current_size; hal_remote_s32( XPTR( cxy , ¤t_block->order ) , current_order ); // get local pointer on the root of the relevant free_list in remote cluster list_entry_t * root = &ppm->free_pages_root[current_order]; // insert new free block in this free_list list_remote_add_first( cxy , root, ¤t_block->list ); // update free-list number of items in remote cluster hal_remote_atomic_add( XPTR(cxy , &ppm->free_pages_nr[current_order]), 1 ); } // update refcount, flags and order fields in found block page_remote_clear_flag( XPTR( cxy , found_block ), PG_FREE ); page_remote_refcount_up( XPTR( cxy , found_block ) ); hal_remote_s32( XPTR( cxy , &found_block->order ) , order ); // release lock protecting free lists in remote cluster remote_busylock_release( lock_xp ); // update DQDT page counter in remote cluster dqdt_increment_pages( cxy , order ); #if DEBUG_PPM_REMOTE_ALLOC_PAGES if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] allocated %d page(s) in cluster %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<process->pid, this->trdid, 1<order, page_cxy, ppm_page2ppn(XPTR( page_cxy , page_ptr )), cycle ); ppm_remote_display( page_cxy ); } #endif // build extended pointer on released page descriptor page_xp = XPTR( page_cxy , page_ptr ); // get local pointer on PPM (same in all clusters) ppm_t * ppm = &LOCAL_CLUSTER->ppm; // build extended pointer on lock protecting remote PPM xptr_t lock_xp = XPTR( page_cxy , &ppm->free_lock ); // get local pointer on remote PPM page_tbl[] array page_t * pages_tbl = hal_remote_lpt( XPTR( page_cxy , &ppm->pages_tbl ) ); // get lock protecting free_pages in remote cluster remote_busylock_acquire( lock_xp ); assert( !page_remote_is_flag( page_xp , PG_FREE ) , "page already released : ppn = %x\n" , ppm_page2ppn(XPTR( page_cxy , page_ptr ) ) ); assert( !page_remote_is_flag( page_xp , PG_RESERVED ) , "reserved page : ppn = %x\n" , ppm_page2ppn(XPTR( page_cxy , page_ptr ) ) ); // set the FREE flag in released page descriptor page_remote_set_flag( page_xp , PG_FREE ); // initialise loop variables current_ptr = page_ptr; current_order = hal_remote_l32( XPTR( page_cxy , &page_ptr->order ) ); current_index = page_ptr - ppm->pages_tbl; // search the buddy page descriptor // - merge with current page descriptor if buddy found // - exit to release the current page descriptor if buddy not found while( current_order < CONFIG_PPM_MAX_ORDER ) { // compute buddy page index and local pointer on page descriptor buddy_index = current_index ^ (1 << current_order); buddy_ptr = pages_tbl + buddy_index; // exit loop if buddy not found if( !page_remote_is_flag( XPTR( page_cxy , buddy_ptr ) , PG_FREE ) || (buddy_order != current_order) ) break; // remove buddy page from its free list in remote cluster list_remote_unlink( page_cxy , &buddy_ptr->list ); hal_remote_atomic_add( XPTR( page_cxy , &ppm->free_pages_nr[current_order] ) , -1 ); // reset order field in buddy page descriptor hal_remote_s32( XPTR( page_cxy , &buddy_ptr->order ) , 0 ); // compute next (merged) page index in page_tbl[] current_index &= buddy_index; // compute next (merged) page order current_order++; // compute next (merged) page descripror current_ptr = pages_tbl + current_index; } // end loop on order // update current (merged) page descriptor order field current_ptr = pages_tbl + current_index; hal_remote_s32( XPTR( page_cxy , ¤t_ptr->order ) , current_order ); // insert current (merged) page into relevant free list list_remote_add_first( page_cxy , &ppm->free_pages_root[current_order] , ¤t_ptr->list ); hal_remote_atomic_add( XPTR( page_cxy , &ppm->free_pages_nr[current_order] ) , 1 ); // release lock protecting free_pages[] array remote_busylock_release( lock_xp ); // update DQDT dqdt_decrement_pages( page_cxy , page_ptr->order ); #if DEBUG_PPM_REMOTE_FREE_PAGES if( DEBUG_PPM_REMOTE_FREE_PAGES < cycle ) { printk("\n[%s] thread[%x,%x] released %d page(s) in cluster %x / ppn %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, 1<order, page_cxy, ppm_page2ppn(XPTR( page_cxy , page_ptr ) ), cycle ); ppm_remote_display( page_cxy ); } #endif } // end ppm_remote_free_pages() //////////////////////////////////// void ppm_remote_display( cxy_t cxy ) { uint32_t order; list_entry_t * iter; xptr_t page_xp; ppm_t * ppm = &LOCAL_CLUSTER->ppm; // get remote PPM general parameters uint32_t pages_nr = hal_remote_l32( XPTR( cxy , &ppm->pages_nr ) ); void * vaddr_base = hal_remote_lpt( XPTR( cxy , &ppm->vaddr_base ) ); void * pages_tbl = hal_remote_lpt( XPTR( cxy , &ppm->pages_tbl ) ); // build extended pointer on lock protecting remote PPM xptr_t ppm_lock_xp = XPTR( cxy , &ppm->free_lock ); // get pointers on TXT0 chdev xptr_t txt0_xp = chdev_dir.txt_tx[0]; cxy_t txt0_cxy = GET_CXY( txt0_xp ); chdev_t * txt0_ptr = GET_PTR( txt0_xp ); // build extended pointer on remote TXT0 lock xptr_t txt_lock_xp = XPTR( txt0_cxy , &txt0_ptr->wait_lock ); // get PPM lock remote_busylock_acquire( ppm_lock_xp ); // get TXT0 lock remote_busylock_acquire( txt_lock_xp ); nolock_printk("\n***** PPM in cluster %x / %d pages / page_tbl %x / vaddr_base %x\n", local_cxy, pages_nr, pages_tbl, vaddr_base ); for( order = 0 ; order < CONFIG_PPM_MAX_ORDER ; order++ ) { // get number of free pages for free_list[order] in remote cluster uint32_t n = hal_remote_l32( XPTR( cxy , &ppm->free_pages_nr[order] ) ); // display direct free_list[order] nolock_printk("- forward : order = %d / n = %d\t: ", order , n ); LIST_REMOTE_FOREACH( cxy , &ppm->free_pages_root[order] , iter ) { page_xp = XPTR( cxy , LIST_ELEMENT( iter , page_t , list ) ); nolock_printk("%x," , ppm_page2ppn( page_xp ) ); } nolock_printk("\n"); } // release TXT0 lock remote_busylock_release( txt_lock_xp ); // release PPM lock remote_busylock_release( ppm_lock_xp ); } //////////////////////////////// error_t ppm_assert_order( void ) { uint32_t order; list_entry_t * iter; page_t * page; ppm_t * ppm = &LOCAL_CLUSTER->ppm; for( order=0 ; order < CONFIG_PPM_MAX_ORDER ; order++ ) { if( list_is_empty( &ppm->free_pages_root[order] ) ) continue; LIST_FOREACH( &ppm->free_pages_root[order] , iter ) { page = LIST_ELEMENT( iter , page_t , list ); if( page->order != order ) return -1; } } return 0; } ////////////////////////////////////////////////////////////////////////////////////// // functions to handle dirty physical pages ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////// bool_t ppm_page_do_dirty( xptr_t page_xp ) { bool_t done = false; // get page cluster and local pointer page_t * page_ptr = GET_PTR( page_xp ); cxy_t page_cxy = GET_CXY( page_xp ); // get local pointer on PPM (same in all clusters) ppm_t * ppm = &LOCAL_CLUSTER->ppm; // build extended pointers on page lock, page flags, and PPM dirty list lock xptr_t page_lock_xp = XPTR( page_cxy , &page_ptr->lock ); xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags ); xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock ); // lock the remote PPM dirty_list remote_queuelock_acquire( dirty_lock_xp ); // lock the remote page remote_busylock_acquire( page_lock_xp ); // get remote page flags uint32_t flags = hal_remote_l32( page_flags_xp ); if( (flags & PG_DIRTY) == 0 ) { // set dirty flag in page descriptor hal_remote_s32( page_flags_xp , flags | PG_DIRTY ); // insert the page in the remote dirty list list_remote_add_first( page_cxy , &ppm->dirty_root , &page_ptr->list ); done = true; } // unlock the remote page remote_busylock_release( page_lock_xp ); // unlock the remote PPM dirty_list remote_queuelock_release( dirty_lock_xp ); return done; } // end ppm_page_do_dirty() //////////////////////////////////////////// bool_t ppm_page_undo_dirty( xptr_t page_xp ) { bool_t done = false; // get page cluster and local pointer page_t * page_ptr = GET_PTR( page_xp ); cxy_t page_cxy = GET_CXY( page_xp ); // get local pointer on PPM (same in all clusters) ppm_t * ppm = &LOCAL_CLUSTER->ppm; // build extended pointers on page lock, page flags, and PPM dirty list lock xptr_t page_lock_xp = XPTR( page_cxy , &page_ptr->lock ); xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags ); xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock ); // lock the remote PPM dirty_list remote_queuelock_acquire( XPTR( page_cxy , &ppm->dirty_lock ) ); // lock the remote page remote_busylock_acquire( page_lock_xp ); // get remote page flags uint32_t flags = hal_remote_l32( page_flags_xp ); if( (flags & PG_DIRTY) ) // page is dirty { // reset dirty flag in page descriptor hal_remote_s32( page_flags_xp , flags & (~PG_DIRTY) ); // remove the page from remote dirty list list_remote_unlink( page_cxy , &page_ptr->list ); done = true; } // unlock the remote page remote_busylock_release( page_lock_xp ); // unlock the remote PPM dirty_list remote_queuelock_release( dirty_lock_xp ); return done; } // end ppm_page_undo_dirty() ///////////////////////////////// void ppm_sync_dirty_pages( void ) { ppm_t * ppm = &LOCAL_CLUSTER->ppm; // get local pointer on PPM dirty_root list_entry_t * dirty_root = &ppm->dirty_root; // build extended pointer on PPM dirty_lock xptr_t dirty_lock_xp = XPTR( local_cxy , &ppm->dirty_lock ); // get the PPM dirty_list lock remote_queuelock_acquire( dirty_lock_xp ); while( !list_is_empty( &ppm->dirty_root ) ) { page_t * page = LIST_FIRST( dirty_root , page_t , list ); xptr_t page_xp = XPTR( local_cxy , page ); // build extended pointer on page lock xptr_t page_lock_xp = XPTR( local_cxy , &page->lock ); // get the page lock remote_busylock_acquire( page_lock_xp ); // sync the page vfs_fs_move_page( page_xp , false ); // from mapper to device // release the page lock remote_busylock_release( page_lock_xp ); } // release the PPM dirty_list lock remote_queuelock_release( dirty_lock_xp ); } // end ppm_sync_dirty_pages()