/* * almosmkh.c - User level ALMOS-MKH specific library implementation. * * Author Alain Greiner (2016,2017,2018,2019,2020) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_REMOTE_MALLOC 0 #define DEBUG_PTHREAD_PARALLEL 0 ////////////////////////////////////////////////////////////////////////////////////// ///////////// Non standard system calls /////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////// int fg( unsigned int pid ) { return hal_user_syscall( SYS_FG, (reg_t)pid, 0, 0, 0 ); } ////////////////////////////// int is_fg( unsigned int pid, unsigned int * owner ) { return hal_user_syscall( SYS_IS_FG, (reg_t)pid, (reg_t)owner, 0, 0 ); } /////////////////////////////////////////////// int get_config( struct hard_config_s * config ) { return hal_user_syscall( SYS_GET, GET_CONFIG, (reg_t)config, 0, 0); } //////////////////////////////////// int get_core_id( unsigned int * cxy, unsigned int * lid ) { return hal_user_syscall( SYS_GET, GET_CORE_ID, (reg_t)cxy, (reg_t)lid, 0 ); } ///////////////////////////////////// int get_nb_cores( unsigned int cxy, unsigned int * ncores ) { return hal_user_syscall( SYS_GET, GET_NB_CORES, (reg_t)cxy, (reg_t)ncores, 0 ); } /////////////////////////////////////////// int get_best_core( unsigned int base_cxy, unsigned int level, unsigned int * cxy, unsigned int * lid ) { return hal_user_syscall( SYS_GET, GET_BEST_CORE, (reg_t)((base_cxy << 16) | (level & 0xFFFF)), (reg_t)cxy, (reg_t)lid ); } ////////////////////////////////////////////// int get_processes( unsigned int cxy, unsigned int owned, char * u_buf, unsigned int size ) { return hal_user_syscall( SYS_GET, GET_PROCESSES, (reg_t)((cxy << 16) | (owned & 0xFFFF)), (reg_t)u_buf, (reg_t)size ); } /////////////////////////////////////////// int get_cycle( unsigned long long * cycle ) { return hal_user_syscall( SYS_GET, GET_CYCLE, (reg_t)cycle, 0, 0 ); } /////////////////////////////////////////// int get_thread_info( thread_info_t * info ) { return hal_user_syscall( SYS_GET, GET_THREAD_INFO, (reg_t)info, 0, 0 ); } ////////////////////////////////// int place_fork( unsigned int cxy ) { return hal_user_syscall( SYS_PLACE_FORK, (reg_t)cxy, 0, 0, 0 ); } ///////////////////////////////// int utls( unsigned int operation, unsigned int value ) { return hal_user_syscall( SYS_UTLS, (reg_t)operation, (reg_t)value, 0, 0 ); } //////////////////////////////////////// void get_uint32( unsigned int * buffer ) { unsigned int i; int c; // ASCII character value unsigned char buf[32]; unsigned int save = 0; unsigned int value = 0; unsigned int done = 0; unsigned int overflow = 0; unsigned int length = 0; // get characters while (done == 0) { // read one character c = getchar(); // analyse this character if ( ((c > 0x2F) && (c < 0x3A)) || // 0 to 9 ((c > 0x40) && (c < 0x47)) || // A to F ((c > 0x60) && (c < 0x67)) || // a to f (((c == 0x58) || (c == 0x78)) && (length == 1)) ) // X or x { putchar( c ); // echo if ( c > 0x60 ) c = c - 0x20; // to upper case buf[length] = (unsigned char)c; length++; } else if (c == 0x0A) // LF character { done = 1; } else if ( (c == 0x7F) || // DEL character (c == 0x08) ) // BS character { if ( length > 0 ) { length--; printf("\b \b"); // BS / / BS } } else if ( c == 0 ) // EOF character { overflow = 1; done = 1; } // test buffer overflow if ( length >= 32 ) { overflow = 1; done = 1; } } // end while characters // string to int conversion with overflow detection if ( overflow == 0 ) { // test (decimal / hexa) if( (buf[0] == 0x30) && ((buf[1] == 0x58) || (buf[1] == 0x78)) ) // hexa input { for (i = 2; (i < length) && (overflow == 0) ; i++) { if( buf[i] < 0x40 ) value = (value << 4) + (buf[i] - 0x30); else value = (value << 4) + (buf[i] - 0x37); if (value < save) overflow = 1; save = value; } } else // decimal input { for (i = 0; (i < length) && (overflow == 0) ; i++) { value = (value * 10) + (buf[i] - 0x30); if (value < save) overflow = 1; save = value; } } } // write value to buffer if ( overflow == 0 ) { // return value *buffer = value; } else { // cancel all echo characters for (i = 0; i < length ; i++) { printf("\b \b"); // BS / / BS } // echo character '0' putchar( '0' ); // return 0 value *buffer = 0; } } // end get_uint32() /////////////////////////////// int get_string( char * string, int maxlen ) { int c; int length = 0; while( 1 ) { // check buffer overflow if( length >= maxlen-1 ) { // cancel all echo characters while( length ) { printf("\b \b"); // cancel one echo character length--; } } // read one character c = getchar(); // analyse this character if ( (c >= 0x20) && (c < 0x7F) ) // printable character { putchar( c ); // echo string[length] = (char)c; // register character in string length++; // update length } else if( c == 0x0A ) // LF character marks end of string { putchar( c ); // echo string[length] = 0; // register NUL character in string return length + 1; } else if ( (c == 0x7F) || // DEL character (c == 0x08) ) // BS character { if ( length > 0 ) { printf("\b \b"); // cancel one echo character length--; } } } } // end get_string() ////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard debug functions /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// void display_string( char * string ) { hal_user_syscall( SYS_DISPLAY, DISPLAY_STRING, (reg_t)string, 0, 0 ); } ///////////////////////////////////////////////////// int display_vmm( unsigned int cxy, unsigned int pid, unsigned int mapping ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_VMM, (reg_t)cxy, (reg_t)pid, (reg_t)mapping ); } //////////////////////////////////// int display_sched( unsigned int cxy, unsigned int lid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_SCHED, (reg_t)cxy, (reg_t)lid, 0 ); } //////////////////////////////////////////////// int display_cluster_processes( unsigned int cxy, unsigned int owned ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_CLUSTER_PROCESSES, (reg_t)cxy, (reg_t)owned, 0 ); } //////////////////////////////////////// int display_busylocks( unsigned int pid, unsigned int trdid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_BUSYLOCKS, (reg_t)pid, (reg_t)trdid, 0 ); } ///////////////////////// int display_chdev( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_CHDEV, 0, 0, 0 ); } /////////////////////// int display_vfs( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_VFS, 0, 0, 0 ); } //////////////////////////////////////////////// int display_txt_processes( unsigned int txt_id ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_TXT_PROCESSES, (reg_t)txt_id, 0, 0 ); } //////////////////////// int display_dqdt( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_DQDT, 0, 0, 0 ); } /////////////////////////////////////// int display_mapper( char * path, unsigned int page_id, unsigned int nbytes) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_MAPPER, (reg_t)path, (reg_t)page_id, (reg_t)nbytes ); } /////////////////////////////////////// int display_barrier( unsigned int pid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_BARRIER, (reg_t)pid, 0, 0 ); } /////////////////////////////////////// int display_fat( unsigned int min_slot, unsigned int nb_slots ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_FAT, (reg_t)min_slot, (reg_t)nb_slots, 0 ); } ///////////////////////////////////// int display_socket( unsigned int pid, unsigned int fdid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_SOCKET, (reg_t)pid, (reg_t)fdid, 0 ); } //////////////////////////////////////// int display_fd_array( unsigned int pid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_FD, (reg_t)pid, 0, 0 ); } /////////////////////////////////////////// int display_fbf_windows( unsigned int pid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_WINDOWS, (reg_t)pid, 0, 0 ); } /////////////////////////////// int trace( unsigned int active, unsigned int cxy, unsigned int lid ) { return hal_user_syscall( SYS_TRACE, (reg_t)active, (reg_t)cxy, (reg_t)lid, 0 ); } ///////////////// void idbg( void ) { char cmd; while( 1 ) { // display prompt printf("\n[idbg] cmd = "); // get a one character command cmd = (char)getchar(); // display all busylocks owned by thread(pid,trdid) if( cmd == 'b' ) { unsigned int pid; unsigned int trdid; printf("b / pid = "); get_uint32( &pid ); printf(" / trdid = "); get_uint32( &trdid ); display_busylocks( pid , trdid ); } // return to calling process else if( cmd == 'c' ) { printf("c\n"); break; } // display FAT mapper(min,slots) else if( cmd == 'f' ) { unsigned int min; unsigned int slots; printf(" / min = "); get_uint32( &min ); printf(" / slots = "); get_uint32( &slots ); display_fat( min , slots ); } // list all supported commands else if( cmd == 'h' ) { printf("h\n" "- b : display on TXT0 busylocks taken by thread[pid,trdid]\n" "- c : resume calling process execution\n" "- f : display on TXT0 FAT mapper[min_slot,nb_slotss]\n" "- h : list of supported commands\n" "- m : display on TXT0 mapper[path,page,nbytes]\n" "- p : display on TXT0 process descriptors in cluster[cxy]\n" "- q : display on TXT0 DQDT state\n" "- s : display on TXT0 scheduler state for core[cxy,lid]\n" "- t : display on TXT0 process decriptors attached to TXT[tid]\n" "- v : display on TXT0 VMM state for process[cxy,pid]\n" "- x : force calling process to exit\n" "- y : activate/desactivate trace for core[cxy,lid]\n" ); } // display MAPPER(path,page,nbytes) else if( cmd == 'm' ) { char path[128]; unsigned int page; unsigned int nbytes; printf("m / path = "); get_string( path , 128 ); printf(" / page = "); get_uint32( &page ); printf(" / nbytes = "); get_uint32( &nbytes ); display_mapper( path , page , nbytes ); } // display all processes in cluster(cxy) else if( cmd == 'p' ) { unsigned int cxy; printf("p / cxy = "); get_uint32( &cxy ); display_cluster_processes( cxy , 0 ); } // display DQDT else if( cmd == 'q' ) { printf("q\n"); display_dqdt(); } // display scheduler state for core(cxy,lid) else if( cmd == 's' ) { unsigned int cxy; unsigned int lid; printf("s / cxy = "); get_uint32( &cxy ); printf(" / lid = "); get_uint32( &lid ); display_sched( cxy , lid ); } // display all processes attached to TXT(txt_id) else if( cmd == 't' ) { unsigned int txt_id; printf("t / txt_id = "); get_uint32( &txt_id ); display_txt_processes( txt_id ); } // display vmm state for process(cxy, pid) else if( cmd == 'v' ) { unsigned int cxy; unsigned int pid; unsigned int map; printf("v / cxy = "); get_uint32( &cxy ); printf(" / pid = "); get_uint32( &pid ); printf(" / mapping = "); get_uint32( &map ); display_vmm( cxy , pid , map ); } // force the calling process to exit else if( cmd == 'x' ) { printf("x\n"); exit( 0 ); } // activate scheduler trace for core(cxy,lid) else if( cmd == 'y' ) { unsigned int active; unsigned int cxy; unsigned int lid; printf("y / active = "); get_uint32( &active ); printf(" / cxy = "); get_uint32( &cxy ); printf(" / lid = "); get_uint32( &lid ); trace( active , cxy , lid ); } } // en while } // end idbg() ///////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard remote_malloc //////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Global variable defining the allocator array (one per cluster) // This array (about 16 Kbytes ) will be stored in the data segment // of any application linked with this libray. ///////////////////////////////////////////////////////////////////////////////////////// malloc_store_t store[MALLOC_MAX_CLUSTERS]; // Macro returning the smallest power of 2 larger or equal to size value #define GET_SIZE_INDEX(size) (size <= 0x00000001) ? 0 :\ (size <= 0x00000002) ? 1 :\ (size <= 0x00000004) ? 2 :\ (size <= 0x00000008) ? 3 :\ (size <= 0x00000010) ? 4 :\ (size <= 0x00000020) ? 5 :\ (size <= 0x00000040) ? 6 :\ (size <= 0x00000080) ? 7 :\ (size <= 0x00000100) ? 8 :\ (size <= 0x00000200) ? 9 :\ (size <= 0x00000400) ? 10 :\ (size <= 0x00000800) ? 11 :\ (size <= 0x00001000) ? 12 :\ (size <= 0x00002000) ? 13 :\ (size <= 0x00004000) ? 14 :\ (size <= 0x00008000) ? 15 :\ (size <= 0x00010000) ? 16 :\ (size <= 0x00020000) ? 17 :\ (size <= 0x00040000) ? 18 :\ (size <= 0x00080000) ? 19 :\ (size <= 0x00100000) ? 20 :\ (size <= 0x00200000) ? 21 :\ (size <= 0x00400000) ? 22 :\ (size <= 0x00800000) ? 23 :\ (size <= 0x01000000) ? 24 :\ (size <= 0x02000000) ? 25 :\ (size <= 0x04000000) ? 26 :\ (size <= 0x08000000) ? 27 :\ (size <= 0x10000000) ? 28 :\ (size <= 0x20000000) ? 29 :\ (size <= 0x40000000) ? 30 :\ (size <= 0x80000000) ? 31 :\ 32 //////////////////////////////////////////////////////////////////////////////////////////// // This static function display the current state of the allocator in cluster . //////////////////////////////////////////////////////////////////////////////////////////// #if DEBUG_REMOTE_MALLOC static void display_free_array( unsigned int cxy ) { unsigned int next; unsigned int id; unsigned int iter; printf("\n***** store[%x] base = %x / size = %x\n", cxy , store[cxy].store_base, store[cxy].store_size ); for ( id = 0 ; id < 32 ; id++ ) { next = store[cxy].free[id]; printf(" - free[%d] = " , id ); iter = 0; while ( next != 0 ) { printf("%x | ", next ); next = (*(unsigned int*)next); iter++; } printf("0\n"); } } // end display_free_array() #endif ////////////////////////////////////////////////////////////////////i////////////////////// // This static function initialises the store in the cluster identified by the // arguments. It is called by the remote_malloc() function when a specific store(x,y) // is accessed for the first time. // It uses the mmap( MAP_REMOTE ) syscall to allocate a new vseg mapped in cluster (cxy). ////////////////////////////////////////////////////////////////////i////////////////////// // @ cxy : target cluster identifier (fixed format). // @ store_size : store size (bytes). // # return without setting the initialized field in store(cxy) if failure. ////////////////////////////////////////////////////////////////////i////////////////////// static void store_init( unsigned int cxy, unsigned int store_size ) { unsigned int store_base; // store base address unsigned int free_index; // index in free[array] unsigned int alloc_base; // alloc[] array base unsigned int alloc_size; // alloc[] array size unsigned int alloc_index; // index in alloc[array] unsigned int iter; // iterator #if DEBUG_REMOTE_MALLOC unsigned int core_cxy; unsigned int core_lid; get_core_id( &core_cxy , &core_lid ); printf("\n[%s] core[%x,%d] enter for store[%x] / size = %x\n", __FUNCTION__, core_cxy, core_lid, cxy, store_size ); #endif // get index in free[] array from size free_index = GET_SIZE_INDEX( store_size ); // check store size power of 2 if( store_size != (unsigned int)(1<= alloc_index ; iter-- ) { store[cxy].free[iter] = base; ptr = (unsigned int*)base; *ptr = 0; base = base + (1<free[searched_index-1]; store->free[searched_index-1] = (unsigned int)new; if ( searched_index == requested_index + 1 ) // terminal case: return lower half block { return vaddr; } else // non terminal case : lower half block must be split again { return split_block( store, vaddr, searched_index-1, requested_index ); } } // end split_block() ////////////////////////////////////////////////////// static unsigned int get_block( malloc_store_t * store, unsigned int searched_index, unsigned int requested_index ) { // test terminal case if ( (unsigned int)(1< store->store_size ) // failure { return 0; } else // search a block in free[searched_index] { unsigned int vaddr = store->free[searched_index]; if ( vaddr == 0 ) // block not found : search in free[searched_index+1] { return get_block( store, searched_index+1, requested_index ); } else // block found : pop it from free[searched_index] { // pop the block from free[searched_index] unsigned int next = *((unsigned int*)vaddr); store->free[searched_index] = next; // test if the block must be split if ( searched_index == requested_index ) // no split required { return vaddr; } else // split is required { return split_block( store, vaddr, searched_index, requested_index ); } } } } // end get_block() //////////////////////////////////////// void * remote_malloc( unsigned int size, unsigned int cxy ) { int error; #if DEBUG_REMOTE_MALLOC unsigned int core_cxy; unsigned int core_lid; get_core_id( &core_cxy , &core_lid ); printf("\n[%s] core[%x,%d] enter for size = %x / target_cxy = %x\n", __FUNCTION__ , core_cxy, core_lid, size , cxy ); #endif // check arguments if( size == 0 ) { printf("\n[ERROR] in %s : requested size = 0 \n", __FUNCTION__ ); return NULL; } if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // initializes target store if required if( store[cxy].initialized != MALLOC_INITIALIZED ) { store_init( cxy , MALLOC_LOCAL_STORE_SIZE ); if( store[cxy].initialized != MALLOC_INITIALIZED ) { printf("\n[ERROR] in %s : cannot allocate store in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } } // normalize size if ( size < MALLOC_MIN_BLOCK_SIZE ) size = MALLOC_MIN_BLOCK_SIZE; // compute requested_index for the free[] array unsigned int requested_index = GET_SIZE_INDEX( size ); // take the lock protecting access to store[cxy] error = pthread_mutex_lock( &store[cxy].mutex ); if( error ) { printf("\n[ERROR] in %s : cannot take the lock protecting store in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // call the recursive function get_block unsigned int base = get_block( &store[cxy], requested_index, requested_index ); // check block found if (base == 0) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : no more space in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // compute pointer in alloc[] array unsigned offset = (base - store[cxy].store_base) / MALLOC_MIN_BLOCK_SIZE; unsigned char * ptr = (unsigned char*)(store[cxy].alloc_base + offset); // update alloc_array *ptr = requested_index; // release the lock pthread_mutex_unlock( &store[cxy].mutex ); #if DEBUG_REMOTE_MALLOC printf("\n[%s] core[%x,%d] exit / base = %x / size = %x / from store[%x]\n", __FUNCTION__, core_cxy, core_lid, base , size , cxy ); #endif return (void*) base; } // end remote_malloc() ////////////////////////////////////////// void * remote_calloc ( unsigned int count, unsigned int size, unsigned int cxy ) { void * ptr = remote_malloc( count * size , cxy ); memset( ptr , 0 , count * size ); return ptr; } ////////////////////////////////// void * remote_realloc( void * ptr, unsigned int size, unsigned int cxy ) { // simple allocation when (ptr == NULL) if( ptr == NULL ) { return remote_malloc( size , cxy ); } // simple free when (size == 0) if( size == 0 ) { remote_free( ptr , cxy ); return NULL; } // check cxy and ptr in general case if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster index %x\n", __FUNCTION__ , cxy ); return NULL; } unsigned int base = (unsigned int)ptr; if( (base < store[cxy].store_base) || (base >= (store[cxy].store_base + store[cxy].store_size)) ) { printf("\n[ERROR] in %s : illegal pointer = %x\n", __FUNCTION__, ptr ); return NULL; } // compute index in free[] array int index = (base - store[cxy].store_base) / MALLOC_MIN_BLOCK_SIZE; // compute old size char * pchar = (char *) (store[cxy].alloc_base + index); unsigned int old_size = (unsigned int)(1 << ((int) *pchar)); // allocate a new block void * new_ptr = remote_malloc( size , cxy ); // save old data to new block int min_size = (int)((size < old_size) ? size : old_size); memcpy( new_ptr, ptr, min_size ); // release old block remote_free( ptr , cxy ); return new_ptr; } // end remote_realloc() ////////////////////////////////////////////////////// static void update_free_array( malloc_store_t * store, unsigned int base, unsigned int size_index ) { // This recursive function try to merge the released block // with the companion block if this companion block is free. // This companion has the same size, and almost the same address // (only one address bit is different) // - If the companion is not in free[size_index], // the released block is pushed in free[size_index]. // - If the companion is found, it is evicted from free[size_index] // and the merged bloc is pushed in the free[size_index+1]. // compute released block size unsigned int size = 1<free[size_index]; unsigned int prev = (unsigned int)&store->free[size_index]; while ( iter ) { if ( iter == companion_base ) { found = 1; break; } prev = iter; iter = *(unsigned int*)iter; } if ( found == 0 ) // Companion not found => push in free[size_index] { *(unsigned int*)base = store->free[size_index]; store->free[size_index] = base; } else // Companion found : merge { // evict the searched block from free[size_index] *(unsigned int*)prev = *(unsigned int*)iter; // call the update_free() function for free[size_index+1] update_free_array( store, merged_base , size_index+1 ); } } // end update_free_array() //////////////////////////////////// void remote_free( void * ptr, unsigned int cxy ) { #if DEBUG_REMOTE_MALLOC printf("\n[MALLOC] %s : enter for block = %x / cxy = %x\n", __FUNCTION__, ptr, cxy ); #endif unsigned int base = (unsigned int)ptr; // check cxy value if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster index %x\n", __FUNCTION__ , cxy ); return; } // check ptr value if( (base < store[cxy].store_base) || (base >= (store[cxy].store_base + store[cxy].store_size)) ) { printf("\n[ERROR] in %s : illegal pointer for released block = %x\n", __FUNCTION__, ptr ); return; } // get the lock protecting store[cxy] pthread_mutex_lock( &store[cxy].mutex ); // compute released block index in alloc[] array unsigned index = (base - store[cxy].store_base ) / MALLOC_MIN_BLOCK_SIZE; // get the released block size_index unsigned char* pchar = (unsigned char*)(store[cxy].alloc_base + index); unsigned int size_index = (unsigned int)*pchar; // check block is allocated if ( size_index == 0 ) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : released block not allocated / ptr = %x\n", __FUNCTION__, ptr ); return; } // check released block alignment if ( base % (1 << size_index) ) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : released block not aligned / ptr = %x\n", __FUNCTION__, ptr ); return; } // reset the alloc[index] entry *pchar = 0; // call the recursive function update_free_array() update_free_array( &store[cxy], base, size_index ); // release the lock pthread_mutex_unlock( &store[cxy].mutex ); #if DEBUG_REMOTE_MALLOC printf("\n[MALLOC] %s : conmpletes for block = %x / cxy = %x\n", __FUNCTION__, ptr, cxy ); #endif } // end remote_free() ///////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard pthread_parallel_create ////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// #define X_MAX 16 // max number of clusters in a row #define Y_MAX 16 // max number of clusters in a column #define CLUSTERS_MAX X_MAX * Y_MAX // max number of clusters #define LEVEL_MAX 5 // max level of DQT #define CORES_MAX 4 // max number of cores per cluster ///////////////////////////////////////////////////////////////////////////////////////// // Global variables // // WARNING : arguments of the pthread_create() function MUST be global variables. ///////////////////////////////////////////////////////////////////////////////////////// // 2D array of threads attributes / indexed by [cid][level] __attribute__((aligned(4096))) pthread_attr_t pthread_build_attr[CLUSTERS_MAX][LEVEL_MAX]; // 2D array of threads arguments / indexed by [cid][level] __attribute__((aligned(4096))) pthread_parallel_build_args_t pthread_build_args[CLUSTERS_MAX][LEVEL_MAX]; // 1D array of threads attributes / indexed by [tid] __attribute__((aligned(4096))) pthread_attr_t pthread_work_attr[CLUSTERS_MAX * CORES_MAX]; // 1D array of threads arguments / indexed by [tid] __attribute__((aligned(4096))) pthread_parallel_work_args_t pthread_work_args[CLUSTERS_MAX * CORES_MAX]; // kernel thread identifier / unused, but required by pthread_create() __attribute__((aligned(4096))) pthread_t trdid; /////////////////////////////////////////////////////////////////////////// static void pthread_recursive_build( pthread_parallel_build_args_t * args ) { // get arguments unsigned int cid = args->cid; unsigned int level = args->level; unsigned int parent_cid = args->parent_cid; pthread_barrier_t * parent_barrier = args->parent_barrier; unsigned int root_level = args->root_level; void * work_func = args->work_func; unsigned int x_size = args->x_size; unsigned int y_size = args->y_size; unsigned int ncores = args->ncores; #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] enters / parent_cid %d / work_func %x\n", __FUNCTION__, cid , level , parent_cid , work_func ); #endif // set error default value in pthread_build_args[cid][level] pthread_build_args[cid][level].error = 0; // get cxy from cid unsigned int cxy = HAL_CXY_FROM_XY( cid / y_size , cid % y_size ); // allocate the parent/child barrier in local cluster pthread_barrier_t * barrier = (pthread_barrier_t *)malloc( sizeof(pthread_barrier_t) ); if( barrier == NULL ) { printf("\n[ERROR] in %s : cannot allocate barrier for thread[%d][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } /////////////////////////////////////////////////////////// if( level == 0 ) // children are threads { // check number of cores in local cluster unsigned int actual_ncores; get_nb_cores( cxy , &actual_ncores ); if( actual_ncores != ncores ) { printf("\n[ERROR] in %s : actual_ncores (%d) in cluster %x\n", __FUNCTION__ , actual_ncores, cxy ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } // initializes barrier for (ncores + 1) in flat mode if( pthread_barrier_init( barrier , NULL , ncores + 1 ) ) { printf("\n[ERROR] in %s : cannot init barrier for thread[%d][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] initialized barrier / %d children\n", __FUNCTION__, cid, level, ncores ); #endif unsigned int lid; // core local index for thread unsigned int tid; // thread continuous index // thread creates ncores threads for ( lid = 0 ; lid < ncores ; lid++ ) { // compute work thread tid tid = (cid * ncores) + lid; // set attributes for thread[tid] pthread_work_attr[tid].attributes = PT_ATTR_DETACH | PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; pthread_work_attr[tid].cxy = cxy; pthread_work_attr[tid].lid = lid; // set tid and barrier arguments for thread[tid] pthread_work_args[tid].tid = tid; pthread_work_args[tid].barrier = barrier; // create thread if ( pthread_create( &trdid, // unused &pthread_work_attr[tid], work_func, &pthread_work_args[tid] ) ) { printf("\n[ERROR] in %s : thread[%d][%d] cannot create thread[%d]\n", __FUNCTION__ , cid , level , tid ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] created thread[%d]\n", __FUNCTION__, cid, level, tid ); #endif } // wait on barrier until all children threads completed if( pthread_barrier_wait( barrier ) ) { printf("\n[ERROR] in %s / barrier for thread[%x][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] resume after children completion\n", __FUNCTION__ , cid , level ); #endif } // end level == 0 //////////////////////////////////////////////////////////// else // children are "build" threads { // the 4 children threads can be linked to any core in each // sub-macro-cluster[i][j] with [ij] in {00,01,10,11} unsigned int parent_x; // X coordinate of parent macro-cluster unsigned int parent_y; // Y coordinate of parent macro-cluster unsigned int child_x; // X coordinate of child macro-cluster unsigned int child_y; // Y coordinate of child macro-cluster unsigned int child_cid[2][2]; // selected cluster cid for child[i][j] unsigned int child_cxy[2][2]; // selected cluster cxy for child[i][j] unsigned int child_lid[2][2]; // selected core index for child[i][j] int child_sts[2][2]; // -1 if error / 0 if success / +1 if no core unsigned int i; // loop index for children unsigned int j; // loop index for children unsigned int nb_children = 0; // actual number of children (can be < 4) // get parent macro-cluster mask and half-size from level unsigned int mask = (1 << level) - 1; unsigned int half = (level > 0) ? (1 << (level - 1)) : 0; // get parent macro-cluster coordinates parent_x = HAL_X_FROM_CXY( cxy ) & ~mask; parent_y = HAL_Y_FROM_CXY( cxy ) & ~mask; // First step : select core for each child thread for (i = 0 ; i < 2 ; i++) { // compute child macro-cluster X coordinate child_x = (i == 0) ? parent_x : (parent_x + half); for (j = 0 ; j < 2 ; j++) { // compute child macro-cluster Y coordinate child_y = (j == 0) ? parent_y : (parent_y + half); // select the best core in macro-cluster unsigned int best_cxy; unsigned int best_lid; child_sts[i][j] = get_best_core( HAL_CXY_FROM_XY( child_x , child_y ), level-1, &best_cxy, &best_lid ); if( child_sts[i][j] < 0 ) // failure => report error { printf("\n[ERROR] in %s : child[%d,%d] of thread[%d,%d]\n", __FUNCTION__ , i , j , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } else if (child_sts[i][j] > 0 ) // macro-cluster empty => does nothing { } else // core found { child_cxy[i][j] = best_cxy; child_lid[i][j] = best_lid; child_cid[i][j] = (HAL_X_FROM_CXY(best_cxy) * y_size) + HAL_Y_FROM_CXY( best_cxy); nb_children++; #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] select core[%x][%d] for child[%d][%d]\n", __FUNCTION__ , cid , level , best_cxy , best_lid , i , j ); #endif } } // end for j } // end for i // second step : initialize barrier for (nb_children + 1) in flat mode if( pthread_barrier_init( barrier , NULL , nb_children + 1 ) ) { printf("\n[ERROR] in %s : cannot init barrier for thread[%d][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] initialized barrier / %d children\n", __FUNCTION__, cid, level, nb_children ); #endif // Third step : actually create the children threads for (i = 0 ; i < 2 ; i++) { for (j = 0 ; j < 2 ; j++) { // thread is created only if macro-cluster is active if( child_sts[i][j] == 0 ) { unsigned int tgt_cid = child_cid[i][j]; unsigned int tgt_lid = child_lid[i][j]; unsigned int tgt_cxy = child_cxy[i][j]; // set child thread attributes pthread_build_attr[tgt_cid][level-1].attributes = PT_ATTR_DETACH | PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; pthread_build_attr[tgt_cid][level-1].cxy = tgt_cxy; pthread_build_attr[tgt_cid][level-1].lid = tgt_lid; // propagate build function arguments from parent to child pthread_build_args[tgt_cid][level-1].cid = tgt_cid; pthread_build_args[tgt_cid][level-1].level = level-1; pthread_build_args[tgt_cid][level-1].parent_cid = cid; pthread_build_args[tgt_cid][level-1].parent_barrier = barrier; pthread_build_args[tgt_cid][level-1].root_level = root_level; pthread_build_args[tgt_cid][level-1].work_func = work_func; pthread_build_args[tgt_cid][level-1].x_size = x_size; pthread_build_args[tgt_cid][level-1].y_size = y_size; pthread_build_args[tgt_cid][level-1].ncores = ncores; // create thread if( pthread_create( &trdid, &pthread_build_attr[tgt_cid][level-1], &pthread_recursive_build, &pthread_build_args[tgt_cid][level-1] ) ) { printf("\n[ERROR] in %s : cannot create thread[%x][%d]\n", __FUNCTION__ , child_cid , level -1 ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%d][%d] created thread[%d][%d] on core[%x,%d]\n", __FUNCTION__, cid, level, tgt_cid, (level - 1), tgt_cxy, tgt_lid ); #endif } //end if sts[x][y] } // end for y } // end for x // wait on barrier until all children threads completed if( pthread_barrier_wait( barrier ) ) { printf("\n[ERROR] in %s / barrier for thread[%d][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] resume after children completion\n", __FUNCTION__, cid, level ); #endif } // end level > 0 // report error to parent when required if( pthread_build_args[cid][level].error ) { pthread_build_args[parent_cid][level+1].error = 1; } // all threads - but the root - signal completion to parent thread and exit if( level < root_level ) { if( pthread_barrier_wait( parent_barrier ) ) { printf("\n[ERROR] in %s / parent barrier for thread[%d][%d]\n", __FUNCTION__ , cid , level ); // report error to parent pthread_build_args[parent_cid][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] exit\n", __FUNCTION__, cid , level ); #endif // thread exit pthread_exit( NULL ); } } // end pthread_recursive_build() ////////////////////////////////////////////////////// int pthread_parallel_create( unsigned int root_level, void * work_func ) { #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] enter / root_level %d / func %x\n", __FUNCTION__, root_level, work_func ); #endif // get platform parameters hard_config_t config; get_config( &config ); unsigned int x_size = config.x_size; unsigned int y_size = config.y_size; unsigned int ncores = config.ncores; // get calling thread cluster identifier unsigned int root_cxy; unsigned int root_lid; // unused, but required by get_core_id() get_core_id( &root_cxy , &root_lid ); // get calling thread continuous index unsigned int x = HAL_X_FROM_CXY( root_cxy ); unsigned int y = HAL_Y_FROM_CXY( root_cxy ); unsigned int root_cid = (y_size * x) + y; // set the build function arguments for the root thread pthread_build_args[root_cid][root_level].cid = root_cid; pthread_build_args[root_cid][root_level].level = root_level; pthread_build_args[root_cid][root_level].parent_cid = -1; pthread_build_args[root_cid][root_level].parent_barrier = NULL; pthread_build_args[root_cid][root_level].root_level = root_level; pthread_build_args[root_cid][root_level].work_func = work_func; pthread_build_args[root_cid][root_level].x_size = x_size; pthread_build_args[root_cid][root_level].y_size = y_size; pthread_build_args[root_cid][root_level].ncores = ncores; // call the recursive function pthread_recursive_build( &pthread_build_args[root_cid][root_level] ); // check error when execution completes if( pthread_build_args[root_cid][root_level].error ) { printf("\n[error] in %s\n", __FUNCTION__ ); return -1; } return 0; } // end pthread_parallel_create() ///////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard Frame Buffer related syscalls ///////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////// int fbf_get_config( int * width, int * height, int * type ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_GET_CONFIG, (reg_t)width, (reg_t)height, (reg_t)type ); } //////////////////////////// int fbf_read( void * buffer, int length, int offset ) { printf("[WARNING] the <%s> syscall is deprecated\n", __FUNCTION__ ); return hal_user_syscall( SYS_FBF, (reg_t)FBF_DIRECT_READ, (reg_t)buffer, (reg_t)length, (reg_t)offset ); } ///////////////////////////// int fbf_write( void * buffer, int length, int offset ) { printf("[WARNING] the <%s> syscall is deprecated\n", __FUNCTION__ ); return hal_user_syscall( SYS_FBF, (reg_t)FBF_DIRECT_WRITE, (reg_t)buffer, (reg_t)length, (reg_t)offset ); } ////////////////////////////////////// int fbf_create_window( int l_zero, int p_zero, int nlines, int npixels, void ** buffer ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_CREATE_WINDOW, (reg_t)((l_zero << 16) | p_zero), (reg_t)((nlines << 16) | npixels), (reg_t)buffer ); } /////////////////////////////// int fbf_active_window( int wid, int active ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_ACTIVE_WINDOW, (reg_t)wid, (reg_t)active, 0 ); } ///////////////////////////////// int fbf_delete_window( int wid ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_DELETE_WINDOW, (reg_t)wid, 0, 0 ); } ////////////////////////////// int fbf_move_window( int wid, int l_zero, int p_zero ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_MOVE_WINDOW, (reg_t)wid, (reg_t)l_zero, (reg_t)p_zero ); } //////////////////////////////// int fbf_resize_window( int wid, int width, int height ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_RESIZE_WINDOW, (reg_t)wid, (reg_t)width, (reg_t)height ); } ///////////////////////////////// int fbf_refresh_window( int wid, int line_first, int line_last ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_REFRESH_WINDOW, (reg_t)wid, (reg_t)line_first, (reg_t)line_last ); } ///////////////////////////////// int fbf_front_window( int wid ) { return hal_user_syscall( SYS_FBF, (reg_t)FBF_FRONT_WINDOW, (reg_t)wid, 0, 0 ); } // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4