/* * almosmkh.c - User level ALMOS-MKH specific library implementation. * * Author Alain Greiner (2016,2017,2018,2019) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #define DEBUG_REMOTE_MALLOC 0 #define DEBUG_PTHREAD_PARALLEL 1 ////////////////////////////////////////////////////////////////////////////////////// ///////////// Non standard system calls /////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////// int fg( unsigned int pid ) { return hal_user_syscall( SYS_FG, (reg_t)pid, 0, 0, 0 ); } ////////////////////////////// int is_fg( unsigned int pid, unsigned int * owner ) { return hal_user_syscall( SYS_IS_FG, (reg_t)pid, (reg_t)owner, 0, 0 ); } ////////////////////////////////////// int get_config( unsigned int * x_size, unsigned int * y_size, unsigned int * ncores ) { return hal_user_syscall( SYS_GET_CONFIG, (reg_t)x_size, (reg_t)y_size, (reg_t)ncores, 0 ); } //////////////////////////////////// int get_core_id( unsigned int * cxy, unsigned int * lid ) { return hal_user_syscall( SYS_GET_CORE_ID, (reg_t)cxy, (reg_t)lid, 0, 0 ); } ///////////////////////////////////// int get_nb_cores( unsigned int cxy, unsigned int * ncores ) { return hal_user_syscall( SYS_GET_NB_CORES, (reg_t)cxy, (reg_t)ncores, 0, 0 ); } /////////////////////////////////////////// int get_best_core( unsigned int base_cxy, unsigned int level, unsigned int * cxy, unsigned int * lid ) { return hal_user_syscall( SYS_GET_BEST_CORE, (reg_t)base_cxy, (reg_t)level, (reg_t)cxy, (reg_t)lid ); } /////////////////////////////////////////// int get_cycle( unsigned long long * cycle ) { return hal_user_syscall( SYS_GET_CYCLE, (reg_t)cycle, 0, 0, 0 ); } ////////////////////////////////// int place_fork( unsigned int cxy ) { return hal_user_syscall( SYS_PLACE_FORK, (reg_t)cxy, 0, 0, 0 ); } ///////////////////////////////// int utls( unsigned int operation, unsigned int value ) { return hal_user_syscall( SYS_UTLS, (reg_t)operation, (reg_t)value, 0, 0 ); } /////////////////////////////// unsigned int get_uint32( void ) { unsigned int i; int c; // ASCII character value unsigned char buf[32]; unsigned int save = 0; unsigned int value = 0; unsigned int done = 0; unsigned int overflow = 0; unsigned int length = 0; // get characters while (done == 0) { // read one character c = getchar(); // analyse this character if ( ((c > 0x2F) && (c < 0x3A)) || // 0 to 9 ((c > 0x40) && (c < 0x47)) || // A to F ((c > 0x60) && (c < 0x67)) || // a to f (((c == 0x58) || (c == 0x78)) && (length == 1)) ) // X or x { putchar( c ); // echo if ( c > 0x60 ) c = c - 0x20; // to upper case buf[length] = (unsigned char)c; length++; } else if (c == 0x0A) // LF character { done = 1; } else if ( (c == 0x7F) || // DEL character (c == 0x08) ) // BS character { if ( length > 0 ) { length--; printf("\b \b"); // BS / / BS } } else if ( c == 0 ) // EOF character { return -1; } // test buffer overflow if ( length >= 32 ) { overflow = 1; done = 1; } } // end while characters // string to int conversion with overflow detection if ( overflow == 0 ) { // test (decimal / hexa) if( (buf[0] == 0x30) && (buf[1] == 0x58) ) // hexadecimal input { for (i = 2; (i < length) && (overflow == 0) ; i++) { if( buf[i] < 0x40 ) value = (value << 4) + (buf[i] - 0x30); else value = (value << 4) + (buf[i] - 0x37); if (value < save) overflow = 1; save = value; } } else // decimal input { for (i = 0; (i < length) && (overflow == 0) ; i++) { value = (value * 10) + (buf[i] - 0x30); if (value < save) overflow = 1; save = value; } } } // final evaluation if ( overflow == 0 ) { // return value return value; } else { // cancel all echo characters for (i = 0; i < length ; i++) { printf("\b \b"); // BS / / BS } // echo character '0' putchar( '0' ); // return 0 value return 0; } } // end get_uint32() ////////////////////////////// int get_string( char * string, int maxlen ) { int c; int done = 0; int length = 0; while( done == 0 ) { // check buffer overflow if( length >= maxlen-1 ) { return -1; // return failure } // read one character c = getchar(); // analyse this character if ( (c >= 0x20) && (c < 0x7F) ) // printable character { putchar( c ); // echo string[length] = (char)c; // register character in string length++; // update length } else if (c == 0x0A) // LF character marks end of string { done = 1; } else if ( (c == 0x7F) || // DEL character (c == 0x08) ) // BS character { if ( length > 0 ) { length--; printf("\b \b"); // BS / / BS } } else if ( c == 0 ) // EOF character { return -1; // return failure } } // set NUL character in string and return success string[length] = 0; return 0; } // end get_string() ////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard debug functions /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// void display_string( char * string ) { hal_user_syscall( SYS_DISPLAY, DISPLAY_STRING, (reg_t)string, 0, 0 ); } ///////////////////////////////////////////////////// int display_vmm( unsigned int cxy, unsigned int pid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_VMM, (reg_t)cxy, (reg_t)pid, 0 ); } //////////////////////////////////// int display_sched( unsigned int cxy, unsigned int lid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_SCHED, (reg_t)cxy, (reg_t)lid, 0 ); } //////////////////////////////////////////////// int display_cluster_processes( unsigned int cxy, unsigned int owned ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_CLUSTER_PROCESSES, (reg_t)cxy, (reg_t)owned, 0 ); } //////////////////////////////////////// int display_busylocks( unsigned int pid, unsigned int trdid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_BUSYLOCKS, (reg_t)pid, (reg_t)trdid, 0 ); } ///////////////////////// int display_chdev( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_CHDEV, 0, 0, 0 ); } /////////////////////// int display_vfs( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_VFS, 0, 0, 0 ); } //////////////////////////////////////////////// int display_txt_processes( unsigned int txt_id ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_TXT_PROCESSES, (reg_t)txt_id, 0, 0 ); } //////////////////////// int display_dqdt( void ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_DQDT, 0, 0, 0 ); } /////////////////////////////////////// int display_mapper( char * path, unsigned int page_id, unsigned int nbytes) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_MAPPER, (reg_t)path, (reg_t)page_id, (reg_t)nbytes ); } /////////////////////////////////////// int display_barrier( unsigned int pid ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_BARRIER, (reg_t)pid, 0, 0 ); } /////////////////////////////////////// int display_fat( unsigned int page_id, unsigned int nb_entries ) { return hal_user_syscall( SYS_DISPLAY, DISPLAY_FAT, (reg_t)page_id, (reg_t)nb_entries, 0 ); } /////////////////////////////// int trace( unsigned int active, unsigned int cxy, unsigned int lid ) { return hal_user_syscall( SYS_TRACE, (reg_t)active, (reg_t)cxy, (reg_t)lid, 0 ); } ///////////////// void idbg( void ) { char cmd; while( 1 ) { // display prompt printf("\n[idbg] cmd = "); // get a one character command cmd = (char)getchar(); // display all busylocks owned by thread(pid,trdid) if( cmd == 'b' ) { printf("b / pid = "); unsigned int pid = get_uint32(); printf(" / trdid = "); unsigned int trdid = get_uint32(); display_busylocks( pid , trdid ); } // return to calling process else if( cmd == 'c' ) { printf("c\n"); break; } // display FAT mapper(page,entries) else if( cmd == 'f' ) { printf("f / page = "); unsigned int page = get_uint32(); printf(" / entries = "); unsigned int entries = get_uint32(); display_fat( page , entries ); } // list all supported commands else if( cmd == 'h' ) { printf("h\n" "- b : display on TXT0 busylocks taken by thread[pid,trdid]\n" "- c : resume calling process execution\n" "- f : display on TXT0 FAT mapper[page,entries]\n" "- h : list of supported commands\n" "- m : display on TXT0 mapper[path,page,nbytes]\n" "- p : display on TXT0 process descriptors in cluster[cxy]\n" "- q : display on TXT0 DQDT state\n" "- s : display on TXT0 scheduler state for core[cxy,lid]\n" "- t : display on TXT0 process decriptors attached to TXT[tid]\n" "- v : display on TXT0 VMM state for process[cxy,pid]\n" "- x : force calling process to exit\n" "- y : activate/desactivate trace for core[cxy,lid]\n" ); } // display MAPPER(path,page,nbytes) else if( cmd == 'm' ) { char path[128]; printf("m / path = "); int error = get_string( path , 128 ); printf(" / page = "); unsigned int page = get_uint32(); printf(" / nbytes = "); unsigned int nbytes = get_uint32(); if( error == 0 ) display_mapper( path , page , nbytes ); } // display all processes in cluster(cxy) else if( cmd == 'p' ) { printf("p / cxy = "); unsigned int cxy = get_uint32(); display_cluster_processes( cxy , 0 ); } // display DQDT else if( cmd == 'q' ) { printf("q\n"); display_dqdt(); } // display scheduler state for core(cxy,lid) else if( cmd == 's' ) { printf("s / cxy = "); unsigned int cxy = get_uint32(); printf(" / lid = "); unsigned int lid = get_uint32(); display_sched( cxy , lid ); } // display all processes attached to TXT(txt_id) else if( cmd == 't' ) { printf("t / txt_id = "); unsigned int txt_id = get_uint32(); display_txt_processes( txt_id ); } // display vmm state for process(cxy, pid) else if( cmd == 'v' ) { printf("v / cxy = "); unsigned int cxy = get_uint32(); printf(" / pid = "); unsigned int pid = get_uint32(); display_vmm( cxy , pid ); } // force the calling process to exit else if( cmd == 'x' ) { printf("x\n"); exit( 0 ); } // activate scheduler trace for core(cxy,lid) else if( cmd == 'y' ) { printf("y / active = "); unsigned int active = get_uint32(); printf(" / cxy = "); unsigned int cxy = get_uint32(); printf(" / lid = "); unsigned int lid = get_uint32(); trace( active , cxy , lid ); } } // en while } // end idbg() ///////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard remote_malloc //////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Global variable defining the allocator array (one per cluster) // This array (about 16 Kbytes ) will be stored in the data segment // of any application linked with this libray. ///////////////////////////////////////////////////////////////////////////////////////// malloc_store_t store[MALLOC_MAX_CLUSTERS]; // Macro returning the smallest power of 2 larger or equal to size value #define GET_SIZE_INDEX(size) (size <= 0x00000001) ? 0 :\ (size <= 0x00000002) ? 1 :\ (size <= 0x00000004) ? 2 :\ (size <= 0x00000008) ? 3 :\ (size <= 0x00000010) ? 4 :\ (size <= 0x00000020) ? 5 :\ (size <= 0x00000040) ? 6 :\ (size <= 0x00000080) ? 7 :\ (size <= 0x00000100) ? 8 :\ (size <= 0x00000200) ? 9 :\ (size <= 0x00000400) ? 10 :\ (size <= 0x00000800) ? 11 :\ (size <= 0x00001000) ? 12 :\ (size <= 0x00002000) ? 13 :\ (size <= 0x00004000) ? 14 :\ (size <= 0x00008000) ? 15 :\ (size <= 0x00010000) ? 16 :\ (size <= 0x00020000) ? 17 :\ (size <= 0x00040000) ? 18 :\ (size <= 0x00080000) ? 19 :\ (size <= 0x00100000) ? 20 :\ (size <= 0x00200000) ? 21 :\ (size <= 0x00400000) ? 22 :\ (size <= 0x00800000) ? 23 :\ (size <= 0x01000000) ? 24 :\ (size <= 0x02000000) ? 25 :\ (size <= 0x04000000) ? 26 :\ (size <= 0x08000000) ? 27 :\ (size <= 0x10000000) ? 28 :\ (size <= 0x20000000) ? 29 :\ (size <= 0x40000000) ? 30 :\ (size <= 0x80000000) ? 31 :\ 32 //////////////////////////////////////////////////////////////////////////////////////////// // This static function display the current state of the allocator in cluster . //////////////////////////////////////////////////////////////////////////////////////////// #if DEBUG_REMOTE_MALLOC static void display_free_array( unsigned int cxy ) { unsigned int next; unsigned int id; unsigned int iter; printf("\n***** store[%x] base = %x / size = %x\n", cxy , store[cxy].store_base, store[cxy].store_size ); for ( id = 0 ; id < 32 ; id++ ) { next = store[cxy].free[id]; printf(" - free[%d] = " , id ); iter = 0; while ( next != 0 ) { printf("%x | ", next ); next = (*(unsigned int*)next); iter++; } printf("0\n"); } } // end display_free_array() #endif ////////////////////////////////////////////////////////////////////i////////////////////// // This static function initialises the store in the cluster identified by the // arguments. It is called by the malloc() or remote_malloc when a specific store(x,y) // is accessed for the first time by a remote() or remote_malloc() request. // It uses the mmap( MAP_REMOTE ) syscall to allocate a new vseg mapped in cluster (cxy). ////////////////////////////////////////////////////////////////////i////////////////////// // @ cxy : target cluster identifier (fixed format). // @ store_size : store size (bytes). // # return without setting the initialized field in store(cxy) if failure. ////////////////////////////////////////////////////////////////////i////////////////////// static void store_init( unsigned int cxy, unsigned int store_size ) { unsigned int store_base; // store base address unsigned int free_index; // index in free[array] unsigned int alloc_base; // alloc[] array base unsigned int alloc_size; // alloc[] array size unsigned int alloc_index; // index in alloc[array] unsigned int iter; // iterator #if DEBUG_REMOTE_MALLOC unsigned int core_cxy; unsigned int core_lid; get_core_id( &core_cxy , &core_lid ); printf("\n[%s] core[%x,%d] enter for store[%x] / size = %x\n", __FUNCTION__, core_cxy, core_lid, cxy, store_size ); #endif // get index in free[] array from size free_index = GET_SIZE_INDEX( store_size ); // check store size power of 2 if( store_size != (unsigned int)(1<= alloc_index ; iter-- ) { store[cxy].free[iter] = base; ptr = (unsigned int*)base; *ptr = 0; base = base + (1<free[searched_index-1]; store->free[searched_index-1] = (unsigned int)new; if ( searched_index == requested_index + 1 ) // terminal case: return lower half block { return vaddr; } else // non terminal case : lower half block must be split again { return split_block( store, vaddr, searched_index-1, requested_index ); } } // end split_block() ////////////////////////////////////////////////////// static unsigned int get_block( malloc_store_t * store, unsigned int searched_index, unsigned int requested_index ) { // test terminal case if ( (unsigned int)(1< store->store_size ) // failure { return 0; } else // search a block in free[searched_index] { unsigned int vaddr = store->free[searched_index]; if ( vaddr == 0 ) // block not found : search in free[searched_index+1] { return get_block( store, searched_index+1, requested_index ); } else // block found : pop it from free[searched_index] { // pop the block from free[searched_index] unsigned int next = *((unsigned int*)vaddr); store->free[searched_index] = next; // test if the block must be split if ( searched_index == requested_index ) // no split required { return vaddr; } else // split is required { return split_block( store, vaddr, searched_index, requested_index ); } } } } // end get_block() //////////////////////////////////////// void * remote_malloc( unsigned int size, unsigned int cxy ) { int error; #if DEBUG_REMOTE_MALLOC unsigned int core_cxy; unsigned int core_lid; get_core_id( &core_cxy , &core_lid ); printf("\n[%s] core[%x,%d] enter for size = %x / target_cxy = %x\n", __FUNCTION__ , core_cxy, core_lid, size , cxy ); #endif // check arguments if( size == 0 ) { printf("\n[ERROR] in %s : requested size = 0 \n", __FUNCTION__ ); return NULL; } if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // initializes target store if required if( store[cxy].initialized != MALLOC_INITIALIZED ) { store_init( cxy , MALLOC_LOCAL_STORE_SIZE ); if( store[cxy].initialized != MALLOC_INITIALIZED ) { printf("\n[ERROR] in %s : cannot allocate store in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } } // normalize size if ( size < MALLOC_MIN_BLOCK_SIZE ) size = MALLOC_MIN_BLOCK_SIZE; // compute requested_index for the free[] array unsigned int requested_index = GET_SIZE_INDEX( size ); // take the lock protecting access to store[cxy] error = pthread_mutex_lock( &store[cxy].mutex ); if( error ) { printf("\n[ERROR] in %s : cannot take the lock protecting store in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // call the recursive function get_block unsigned int base = get_block( &store[cxy], requested_index, requested_index ); // check block found if (base == 0) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : no more space in cluster %x\n", __FUNCTION__ , cxy ); return NULL; } // compute pointer in alloc[] array unsigned offset = (base - store[cxy].store_base) / MALLOC_MIN_BLOCK_SIZE; unsigned char * ptr = (unsigned char*)(store[cxy].alloc_base + offset); // update alloc_array *ptr = requested_index; // release the lock pthread_mutex_unlock( &store[cxy].mutex ); #if DEBUG_REMOTE_MALLOC printf("\n[%s] core[%x,%d] exit / base = %x / size = %x / from store[%x]\n", __FUNCTION__, core_cxy, core_lid, base , size , cxy ); #endif return (void*) base; } // end remote_malloc() ////////////////////////////////////////// void * remote_calloc ( unsigned int count, unsigned int size, unsigned int cxy ) { void * ptr = remote_malloc( count * size , cxy ); memset( ptr , 0 , count * size ); return ptr; } ////////////////////////////////// void * remote_realloc( void * ptr, unsigned int size, unsigned int cxy ) { // simple allocation when (ptr == NULL) if( ptr == NULL ) { return remote_malloc( size , cxy ); } // simple free when (size == 0) if( size == 0 ) { remote_free( ptr , cxy ); return NULL; } // check cxy and ptr in general case if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster index %x\n", __FUNCTION__ , cxy ); return NULL; } unsigned int base = (unsigned int)ptr; if( (base < store[cxy].store_base) || (base >= (store[cxy].store_base + store[cxy].store_size)) ) { printf("\n[ERROR] in %s : illegal pointer = %x\n", __FUNCTION__, ptr ); return NULL; } // compute index in free[] array int index = (base - store[cxy].store_base) / MALLOC_MIN_BLOCK_SIZE; // compute old size char * pchar = (char *) (store[cxy].alloc_base + index); unsigned int old_size = (unsigned int)(1 << ((int) *pchar)); // allocate a new block void * new_ptr = remote_malloc( size , cxy ); // save old data to new block int min_size = (int)((size < old_size) ? size : old_size); memcpy( new_ptr, ptr, min_size ); // release old block remote_free( ptr , cxy ); return new_ptr; } // end remote_realloc() ////////////////////////////////////////////////////// static void update_free_array( malloc_store_t * store, unsigned int base, unsigned int size_index ) { // This recursive function try to merge the released block // with the companion block if this companion block is free. // This companion has the same size, and almost the same address // (only one address bit is different) // - If the companion is not in free[size_index], // the released block is pushed in free[size_index]. // - If the companion is found, it is evicted from free[size_index] // and the merged bloc is pushed in the free[size_index+1]. // compute released block size unsigned int size = 1<free[size_index]; unsigned int prev = (unsigned int)&store->free[size_index]; while ( iter ) { if ( iter == companion_base ) { found = 1; break; } prev = iter; iter = *(unsigned int*)iter; } if ( found == 0 ) // Companion not found => push in free[size_index] { *(unsigned int*)base = store->free[size_index]; store->free[size_index] = base; } else // Companion found : merge { // evict the searched block from free[size_index] *(unsigned int*)prev = *(unsigned int*)iter; // call the update_free() function for free[size_index+1] update_free_array( store, merged_base , size_index+1 ); } } // end update_free_array() //////////////////////////////////// void remote_free( void * ptr, unsigned int cxy ) { #if DEBUG_REMOTE_MALLOC printf("\n[MALLOC] %s : enter for block = %x / cxy = %x\n", __FUNCTION__, ptr, cxy ); #endif unsigned int base = (unsigned int)ptr; // check cxy value if( cxy >= MALLOC_MAX_CLUSTERS ) { printf("\n[ERROR] in %s : illegal cluster index %x\n", __FUNCTION__ , cxy ); return; } // check ptr value if( (base < store[cxy].store_base) || (base >= (store[cxy].store_base + store[cxy].store_size)) ) { printf("\n[ERROR] in %s : illegal pointer for released block = %x\n", __FUNCTION__, ptr ); return; } // get the lock protecting store[cxy] pthread_mutex_lock( &store[cxy].mutex ); // compute released block index in alloc[] array unsigned index = (base - store[cxy].store_base ) / MALLOC_MIN_BLOCK_SIZE; // get the released block size_index unsigned char* pchar = (unsigned char*)(store[cxy].alloc_base + index); unsigned int size_index = (unsigned int)*pchar; // check block is allocated if ( size_index == 0 ) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : released block not allocated / ptr = %x\n", __FUNCTION__, ptr ); return; } // check released block alignment if ( base % (1 << size_index) ) { pthread_mutex_unlock( &store[cxy].mutex ); printf("\n[ERROR] in %s : released block not aligned / ptr = %x\n", __FUNCTION__, ptr ); return; } // reset the alloc[index] entry *pchar = 0; // call the recursive function update_free_array() update_free_array( &store[cxy], base, size_index ); // release the lock pthread_mutex_unlock( &store[cxy].mutex ); #if DEBUG_REMOTE_MALLOC printf("\n[MALLOC] %s : conmpletes for block = %x / cxy = %x\n", __FUNCTION__, ptr, cxy ); #endif } // end remote_free() ///////////////////////////////////////////////////////////////////////////////////////// /////////////// non standard pthread_parallel_create ////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// #define X_MAX 16 // max number of clusters in a row #define Y_MAX 16 // max number of clusters in a column #define CLUSTERS_MAX X_MAX * Y_MAX #define LEVEL_MAX 5 #define CORES_MAX 4 // max number of cores per cluster typedef struct build_args_s { unsigned char cxy; // this thread cluster identifier unsigned char level; // this thread level in quad-tree unsigned char parent_cxy; // parent thread cluster identifier unsigned char root_level; // quad-tree root level void * work_func; // pointer on work function pointer void * work_args_array; // pointer on 2D array of pointers pthread_barrier_t * parent_barriers_array; // pointer on 1D array of barriers unsigned int error; // return value : 0 if success } build_args_t; ///////////////////////////////////////////////////////////////////////////////////////// // Global variables used for inter-thread communications ///////////////////////////////////////////////////////////////////////////////////////// pthread_attr_t build_attr [CLUSTERS_MAX][LEVEL_MAX]; // POSIX thread attributes build_args_t build_args [CLUSTERS_MAX][LEVEL_MAX]; // build function arguments pthread_barrier_t build_barrier[CLUSTERS_MAX][LEVEL_MAX]; // parent/child synchro pthread_attr_t work_attr [CLUSTERS_MAX][CORES_MAX]; // POSIX thread attributes ////////////////////////////////////////////////////////// static void pthread_recursive_build( build_args_t * args ) { unsigned int trdid; // unused (required by pthread_create() // get arguments unsigned int cxy = args->cxy; unsigned int level = args->level; unsigned int parent_cxy = args->parent_cxy; unsigned int root_level = args->root_level; void * work_func = args->work_func; void * work_args_array = args->work_args_array; pthread_barrier_t * parent_barriers_array = args->parent_barriers_array; // set error default value build_args[cxy][level].error = 0; /////////////////////////////////////////////////////////// if( level == 0 ) // children are "work" threads { unsigned int lid; // core local index unsigned int ncores; // number of cores in a cluster // get number of cores per cluster get_nb_cores( cxy , &ncores ); // kill process if no active core in cluster // TODO this "if" should be replaced by an "assert" [AG] if( ncores == 0 ) { printf("\n[PANIC] in %s : no active core in cluster %x\n", __FUNCTION__ , cxy ); // report error to parent build_args[parent_cxy][level+1].error = 1; // kill process exit( EXIT_FAILURE ); } // initialize the parent_barrier if( pthread_barrier_init( &parent_barriers_array[cxy] , NULL , ncores + 1 ) ) { printf("\n[ERROR] in %s : cannot initialise barrier for build thread[%x][%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] created barrier / %d children\n", __FUNCTION__, cxy, level, ncores + 1 ); #endif // create (ncores) "work" threads for ( lid = 0 ; lid < ncores ; lid++ ) { // set attributes for thread[cxy][lid] work_attr[cxy][lid].attributes = PT_ATTR_DETACH | PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; work_attr[cxy][lid].cxy = cxy; work_attr[cxy][lid].lid = lid; // compute pointer on thread[cxy][lid] arguments void * work_args = *((void **)work_args_array + (cxy * CORES_MAX) + lid); // create thread if ( pthread_create( &trdid, // unused &work_attr[cxy][lid], work_func, work_args ) ) { printf("\n[ERROR] in %s : cannot create work thread[%x,%x]\n", __FUNCTION__ , cxy , lid ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] created thread[%x][%d]\n", __FUNCTION__, cxy, level, cxy, lid ); #endif } // wait on barrier until "work" children threads completed if( pthread_barrier_wait( &parent_barriers_array[cxy] ) ) { printf("\n[ERROR] in %s / first barrier for thread[%x][%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] resume after children completion\n", __FUNCTION__, cxy, level ); #endif } // end level == 0 //////////////////////////////////////////////////////////// else // children are "build" threads { // the 4 children threads can be created in any core of each quarters // of the parent macro-cluster unsigned int parent_x; // X coordinate of parent macro-cluster unsigned int parent_y; // Y coordinate of parent macro-cluster unsigned int child_x; // X coordinate of child macro-cluster unsigned int child_y; // Y coordinate of child macro-cluster unsigned int child_cxy[2][2]; // selected cluster for child thread unsigned int child_lid[2][2]; // selected core index for child thread int child_sts[2][2]; // -1 if error / 0 if success / +1 if not found unsigned int x; // X loop index for children unsigned int y; // Y loop index for children unsigned int nb_children = 0; // get parent macro-cluster mask and half-size from level unsigned int mask = (1 << level) - 1; unsigned int half = (level > 0) ? (1 << (level - 1)) : 0; // get parent macro-cluster coordinates parent_x = HAL_X_FROM_CXY( cxy ) & ~mask; parent_y = HAL_Y_FROM_CXY( cxy ) & ~mask; // get child_cxy and child_lid for up to 4 children threads : 00 / 01 / 10 / 11 for (x = 0 ; x < 2 ; x++) { // compute child macro-cluster X coordinate child_x = (x == 0) ? parent_x : (parent_x + half); for (y = 0 ; y < 2 ; y++) { // compute child macro-cluster Y coordinate child_y = (y == 0) ? parent_y : (parent_y + half); // select the best core in macro-cluster child_sts[x][y] = get_best_core( HAL_CXY_FROM_XY( child_x , child_y ), level-1, &child_cxy[x][y], &child_lid[x][y] ); if( child_sts[x][y] < 0 ) // failure => report error { printf("\n[ERROR] in %s : illegal arguments for thread[%x,%x]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } else if (child_sts[x][y] > 0 ) // macro-cluster undefined => does nothing { } else // core found { nb_children++; } } // end for y } // end for x // kill process if no active core in cluster // TODO this "if" should be replaced by an "assert" [AG] if( nb_children == 0 ) { printf("\n[PANIC] in %s : no active core in macro cluster [%x,%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; // kill process exit( EXIT_FAILURE ); } // initialize the barrier for (nb_children + 1) if( pthread_barrier_init( &build_barrier[cxy][level], NULL , nb_children + 1 ) ) { printf("\n[error] in %s : cannot initialise barrier for build thread[%x][%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] created barrier / %d children\n", __FUNCTION__, cxy, level, nb_children + 1 ); #endif // create 1 to 4 children threads for (x = 0 ; x < 2 ; x++) { for (y = 0 ; y < 2 ; y++) { // thread is created only if macro-cluster is active if( child_sts[x][y] == 0 ) { unsigned int tgt_cxy = child_cxy[x][y]; unsigned int tgt_lid = child_lid[x][y]; // set child thread attributes build_attr[tgt_cxy][level-1].attributes = PT_ATTR_DETACH | PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; build_attr[tgt_cxy][level-1].cxy = tgt_cxy; build_attr[tgt_cxy][level-1].lid = tgt_lid; // propagate build function arguments build_args[tgt_cxy][level-1].cxy = child_cxy[x][y]; build_args[tgt_cxy][level-1].level = level-1; build_args[tgt_cxy][level-1].parent_cxy = cxy; build_args[tgt_cxy][level-1].root_level = root_level; build_args[tgt_cxy][level-1].work_func = work_func; build_args[tgt_cxy][level-1].work_args_array = work_args_array; build_args[tgt_cxy][level-1].parent_barriers_array = parent_barriers_array; // create thread if( pthread_create( &trdid, &build_attr[tgt_cxy][level-1], &pthread_recursive_build, &build_args[tgt_cxy][level-1] ) ) { printf("\n[ERROR] in %s : cannot create build thread[%x][%d]\n", __FUNCTION__ , child_cxy , level -1 ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] created thread[%x][%d] on core[%x,%d]\n", __FUNCTION__, cxy, level, tgt_cxy, level - 1, tgt_cxy, tgt_lid ); #endif } //end if sts[x][y] } // end for y } // end for x // wait on barrier until "build" children threads completed if( pthread_barrier_wait( &build_barrier[cxy][level] ) ) { printf("\n[ERROR] in %s / first barrier for thread[%x][%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] resume after children completion\n", __FUNCTION__, cxy, level ); #endif } // end level > 0 // report error to parent when required if( build_args[cxy][level].error ) { build_args[parent_cxy][level+1].error = 1; } // all threads - but the root - // signal completion to parent thread and exit if( level < root_level ) { if( pthread_barrier_wait( &build_barrier[parent_cxy][level+1] ) ) { printf("\n[ERROR] in %s / second barrier for thread[%x][%d]\n", __FUNCTION__ , cxy , level ); // report error to parent build_args[parent_cxy][level+1].error = 1; } #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] thread[%x][%d] exit\n", __FUNCTION__, cxy , level ); #endif // "build" thread exit pthread_exit( NULL ); } } // end pthread_recursive_build() /////////////////////////////////////////////////////// int pthread_parallel_create( unsigned int root_level, void * work_func, void * work_args_array, void * parent_barriers_array ) { unsigned int root_cxy; unsigned int root_lid; // unused, but required by get_core_id() #if DEBUG_PTHREAD_PARALLEL printf("\n[%s] enter / root_level %d / func %x / args %x / barriers %x\n", __FUNCTION__, root_level, work_func, work_args_array, parent_barriers_array ); #endif // get calling thread cluster get_core_id( &root_cxy , &root_lid ); // set the build function arguments for the root thread build_args[root_cxy][root_level].cxy = root_cxy; build_args[root_cxy][root_level].level = root_level; build_args[root_cxy][root_level].root_level = root_level; build_args[root_cxy][root_level].work_func = work_func; build_args[root_cxy][root_level].work_args_array = work_args_array; build_args[root_cxy][root_level].parent_barriers_array = parent_barriers_array; // call the recursive build function pthread_recursive_build( &build_args[root_cxy][root_level] ); // check error if( build_args[root_cxy][root_level].error ) { printf("\n[error] in %s\n", __FUNCTION__ ); return -1; } return 0; } // end pthread_parallel_create() // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4