/* * pthread.c - User side pthread related functions implementation. * * Author Alain Greiner (2016,2017) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #define PTHREAD_MUTEX_DEBUG 0 #define PTHREAD_BARRIER_DEBUG 0 #define reg_t int //////////////////////////////////////////////////////////////////////////////////////////// // Threads //////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////// int pthread_create( pthread_t * trdid, const pthread_attr_t * attr, void * start_func, void * start_args ) { return hal_user_syscall( SYS_THREAD_CREATE, (reg_t)trdid, (reg_t)attr, (reg_t)start_func, (reg_t)start_args ); } ///////////////////////////////////// int pthread_join( pthread_t trdid, void ** exit_value ) { return hal_user_syscall( SYS_THREAD_JOIN, (reg_t)exit_value, 0, 0, 0 ); } /////////////////////////////////////// int pthread_detach( pthread_t trdid ) { return hal_user_syscall( SYS_THREAD_DETACH, (reg_t)trdid, 0, 0, 0 ); } ///////////////////////////////////// int pthread_exit( void * exit_value ) { return hal_user_syscall( SYS_THREAD_EXIT, (reg_t)exit_value, 0, 0, 0 ); } /////////////////// int pthread_yield() { return hal_user_syscall( SYS_THREAD_YIELD, 0, 0, 0, 0 ); } //////////////////////////////////////////////////////////////////////////////////////////// // Barriers //////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// // This recursive function initializes the SQT nodes // traversing the SQT from root to bottom //////////////////////////////////////////////////////////////////////////////////////////// static void sqt_barrier_build( pthread_barrier_t * barrier, unsigned int x, unsigned int y, unsigned int level, sqt_node_t * parent, unsigned int x_size, unsigned int y_size, unsigned int nthreads ) { // get target node address sqt_node_t * node = barrier->node[x][y][level]; if (level == 0 ) // terminal case { // initializes target node node->arity = nthreads; node->count = nthreads; node->sense = 0; node->level = 0; node->parent = parent; node->child[0] = NULL; node->child[1] = NULL; node->child[2] = NULL; node->child[3] = NULL; #if PTHREAD_BARRIER_DEBUG printf("\n[BARRIER] %s : sqt_node[%d][%d][%d] : arity = %d / parent = %x\n" "child0 = %x / child1 = %x / child2 = %x / child3 = %x\n", __FUNCTION__, x, y, level, node->arity, node->parent, node->child[0], node->child[1], node->child[2], node->child[3] ); #endif } else // non terminal case { unsigned int cx[4]; // x coordinate for children unsigned int cy[4]; // y coordinate for children unsigned int arity = 0; unsigned int i; // the child0 coordinates are equal to the parent coordinates // other children coordinates are incremented depending on the level value cx[0] = x; cy[0] = y; cx[1] = x; cy[1] = y + (1 << (level-1)); cx[2] = x + (1 << (level-1)); cy[2] = y; cx[3] = x + (1 << (level-1)); cy[3] = y + (1 << (level-1)); // initializes parent node taken into account the actual number of childs // child pointer is NULL if coordinates outside the mesh for ( i = 0 ; i < 4 ; i++ ) { if ( (cx[i] < x_size) && (cy[i] < y_size) ) { node->child[i] = barrier->node[cx[i]][cy[i]][level-1]; arity++; } else node->child[i] = NULL; } node->arity = arity; node->count = arity; node->sense = 0; node->level = level; node->parent = parent; #if PTHREAD_BARRIER_DEBUG printf("\n[BARRIER] %s : sqt_node[%d][%d][%d] : arity = %d / parent = %x\n" "child0 = %x / child1 = %x / child2 = %x / child3 = %x\n", __FUNCTION__, x, y, level, node->arity, node->parent, node->child[0], node->child[1], node->child[2], node->child[3] ); #endif // recursive calls for children nodes for ( i = 0 ; i < 4 ; i++ ) { if ( (cx[i] < x_size) && (cy[i] < y_size) ) sqt_barrier_build( barrier, cx[i], cy[i], level-1, node, x_size, y_size, nthreads ); } } } // end sqt_barrier_build() //////////////////////////////////////////////////////////////// int pthread_barrier_init( pthread_barrier_t * barrier, const pthread_barrierattr_t * attr, unsigned int count ) { unsigned int x_size; unsigned int y_size; unsigned int nthreads; if( attr != NULL ) { x_size = attr->x_size; y_size = attr->y_size; nthreads = attr->nthreads; } else { x_size = 1; y_size = 1; nthreads = count; } // check attributes assert( x_size <= QDT_XMAX ); assert( y_size <= QDT_YMAX ); assert( x_size * y_size * nthreads == count ); // compute SQT levels unsigned int levels; unsigned int z = (x_size > y_size) ? x_size : y_size; levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5; #if PTHREAD_BARRIER_DEBUG unsigned int side = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 4 : (z < 9) ? 8 : 16; printf("\n[BARRIER] %s : x_size = %d / y_size = %d / levels = %d / side = %d\n", __FUNCTION__ , x_size , y_size , levels , side ); #endif // allocates memory for the SQT nodes and initializes SQT nodes pointers array // the actual number of SQT nodes in a cluster(x,y) depends on (x,y): // At least 1 node / at most 5 nodes unsigned int x; // x coordinate for one SQT node unsigned int y; // y coordinate for one SQT node unsigned int l; // level for one SQT node for ( x = 0 ; x < x_size ; x++ ) { for ( y = 0 ; y < y_size ; y++ ) { unsigned int cxy = (x<node[x][y][l] = node; #if PTHREAD_BARRIER_DEBUG printf("\n[BARRIER] %s : sqt_node[%d][%d][%d] : vaddr = %x\n", __FUNCTION__ , x , y , l , node ); #endif } } } } // recursively initialize all SQT nodes from root to bottom sqt_barrier_build( barrier, 0, 0, levels-1, NULL, x_size, y_size, nthreads ); hal_user_fence(); return 0; } // end pthread_barrier_init ////////////////////////////////////////////////////////////////////////////////////////// // This recursive function decrements the distributed "count" variables, // traversing the SQT from bottom to root. // The last arrived thread reset the local node before returning. ////////////////////////////////////////////////////////////////////////////////////////// static void sqt_barrier_decrement( sqt_node_t * node ) { #if PTHREAD_BARRIER_DEBUG unsigned int cxy; unsigned int lid; get_core( &cxy , &lid ); printf("\n[BARRIER] %s : core[%x,%d] decrement SQT barrier node %x :\n" " level = %d / parent = %x / arity = %d / sense = %d / count = %d\n", __FUNCTION__ , cxy , lid , (unsigned int)node , node->level , node->parent, node->arity , node->sense , node->count ); #endif unsigned int expected; // compute expected sense value if ( node->sense == 0) expected = 1; else expected = 0; // atomically decrement count int count = hal_user_atomic_add( (int *)&node->count , -1 ); // last arrived thread makes the recursive call if ( count == 1 ) // last thread { // decrement the parent node if the current node is not the root if ( node->parent != NULL ) sqt_barrier_decrement( node->parent ); // reset the current node node->sense = expected; node->count = node->arity; #if PTHREAD_BARRIER_DEBUG printf("\n[BARRIER] %s : core[%x,%d] reset SQT barrier node %x :\n" " level = %d / arity = %d / sense = %d / count = %d\n", __FUNCTION__ , cxy , lid , (unsigned int)node , node->level , node->arity , node->sense , node->count ); #endif return; } else // not the last thread { // poll sense while( 1 ) { if( node->sense == expected ) break; } return; } } // end sqt_barrier_decrement() /////////////////////////////////////////////////////// int pthread_barrier_wait( pthread_barrier_t * barrier ) { // get calling core cluster unsigned int cxy; unsigned int lid; get_core( &cxy , &lid ); // get calling core coordinate unsigned int x = cxy >> QDT_YWIDTH; unsigned int y = cxy & QDT_YMASK; #if PTHREAD_BARRIER_DEBUG printf("\n[BARRIER] %s : enter for core[%x,%d] / barrier = %x / node = %x\n", __FUNCTION__ , cxy , lid , barrier, barrier->node[x][y][0] ); #endif // recursively decrement count from bottom to root sqt_barrier_decrement( barrier->node[x][y][0] ); hal_user_fence(); return 0; } // end pthread_barrier_wait() //////////////////////////////////////////////////////////////////////////////////////////// // Mutexes //////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////// int pthread_mutex_init( pthread_mutex_t * mutex, const pthread_mutexattr_t * attr ) { if( attr != NULL ) { printf("\n[ERROR] in %s : argument not supported\n", __FUNCTION__); return -1; } mutex->current = 0; mutex->free = 0; #if PTHEAD_MUTEX_DEBUG unsigned int cxy; unsigned int lid; get_core( &cxy , &lid ); printf("\n[MUTEX DEBUG] %s : core[%x,%d] initializes mutex %x\n", __FUNCTION__, cxy, lid, mutex ); #endif return 0; } ///////////////////////////////////////////////// int pthread_mutex_lock( pthread_mutex_t * mutex ) { unsigned int ticket; // get next free ticket ticket = (unsigned int)hal_user_atomic_add( (int *)&mutex->free, 1 ); #if PTHREAD_MUTEX_DEBUG unsigned int cxy; unsigned int lid; get_core( &cxy , &lid ); printf("\n[MUTEX DEBUG] %s : core[%x,%d] get ticket %d\n", " / mutex = %x / current = %d / free = %d\n", __FUNCTION__, cxy, lid, ticket, mutex, mutex->current, mutex->free ); #endif // poll the current index while( 1 ) { if( mutex->current == ticket) break; } #if PTHREAD_MUTEX_DEBUG printf("\n[MUTEX DEBUG] %s : core[%x,%d] get mutex %x / current = %d / free = %d\n", __FUNCTION__, cxy, lid, mutex, mutex->current, mutex->free ); #endif return 0; } //////////////////////////////////////////////////// int pthread_mutex_trylock( pthread_mutex_t * mutex ) { unsigned int ticket; // get next free ticket ticket = (unsigned int)hal_user_atomic_add( (int *)&mutex->free, 1 ); #if PTHREAD_MUTEX_DEBUG unsigned int cxy; unsigned int lid; get_core( &cxy, &lid ); printf("\n[MUTEX DEBUG] %s : core[%x,%d] get ticket = %d" " / mutex = %x / current = %d / free = %d\n", __FUNCTION__, cxy, lid, ticket, mutex, mutex->current, mutex->free ); #endif // test ticket if( ticket == mutex->current ) return 0; // success else return -1; // failure } /////////////////////////////////////////////////// int pthread_mutex_unlock( pthread_mutex_t * mutex ) { hal_user_fence(); mutex->current = mutex->current + 1; #if PTHREAD_MUTEX_DEBUG unsigned int cxy; unsigned int lid; get_core( &cxy , &lid ); printf("\n[MUTEX_DEBUG] %s : core[%x,%d] releases mutex %x" " / current = %d / free = %d\n", __FUNCTION__, cxy, lid, mutex, mutex->current, mutex->free ); #endif return 0; } // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4