Changeset 623 for trunk/kernel/libk/remote_barrier.c
- Timestamp:
- Mar 6, 2019, 4:37:15 PM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/kernel/libk/remote_barrier.c
r619 r623 245 245 } // end generic_barrier_wait() 246 246 247 247 ///////////////////////////////////////////////////// 248 void generic_barrier_display( xptr_t gen_barrier_xp ) 249 { 250 // get cluster and local pointer 251 generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); 252 cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); 253 254 // get barrier type and extend pointer 255 bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); 256 void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); 257 258 // buil extended pointer on the implementation specific barrier descriptor 259 xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); 260 261 // display barrier state 262 if( is_dqt ) dqt_barrier_display( barrier_xp ); 263 else simple_barrier_display( barrier_xp ); 264 } 248 265 249 266 … … 454 471 455 472 } // end simple_barrier_wait() 473 474 ///////////////////////////////////////////////// 475 void simple_barrier_display( xptr_t barrier_xp ) 476 { 477 // get cluster and local pointer on simple barrier 478 simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 479 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 480 481 // get barrier global parameters 482 uint32_t current = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->current ) ); 483 uint32_t arity = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->arity ) ); 484 485 printk("\n***** simple barrier : %d arrived threads on %d *****\n", 486 current, arity ); 487 488 } // end simple_barrier_display() 489 490 456 491 457 492 … … 493 528 494 529 // check x_size and y_size arguments 495 assert( (z <= 16) , "DQT dqthlarger than (16*16)\n");530 assert( (z <= 16) , "DQT mesh size larger than (16*16)\n"); 496 531 497 532 // check RPC descriptor size … … 973 1008 } // end dqt_barrier_wait() 974 1009 975 976 //////////////////////////////////////////////////////////////////////////////////////////// 977 // DQT static functions 978 //////////////////////////////////////////////////////////////////////////////////////////// 979 980 981 ////////////////////////////////////////////////////////////////////////////////////////// 982 // This recursive function decrements the distributed "count" variables, 983 // traversing the DQT from bottom to root. 984 // The last arrived thread reset the local node before returning. 985 ////////////////////////////////////////////////////////////////////////////////////////// 986 static void dqt_barrier_increment( xptr_t node_xp ) 987 { 988 uint32_t expected; 989 uint32_t sense; 990 uint32_t arity; 991 992 thread_t * this = CURRENT_THREAD; 993 994 // get node cluster and local pointer 995 dqt_node_t * node_ptr = GET_PTR( node_xp ); 996 cxy_t node_cxy = GET_CXY( node_xp ); 997 998 // build relevant extended pointers 999 xptr_t arity_xp = XPTR( node_cxy , &node_ptr->arity ); 1000 xptr_t sense_xp = XPTR( node_cxy , &node_ptr->sense ); 1001 xptr_t current_xp = XPTR( node_cxy , &node_ptr->current ); 1002 xptr_t lock_xp = XPTR( node_cxy , &node_ptr->lock ); 1003 xptr_t root_xp = XPTR( node_cxy , &node_ptr->root ); 1004 1005 #if DEBUG_BARRIER_WAIT 1006 uint32_t cycle = (uint32_t)hal_get_cycles(); 1007 uint32_t level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) ); 1008 if( cycle > DEBUG_BARRIER_WAIT ) 1009 printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n", 1010 __FUNCTION__ , this->process->pid, this->trdid, 1011 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1012 #endif 1013 1014 // get extended pointer on parent node 1015 xptr_t parent_xp = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) ); 1016 1017 // take busylock 1018 remote_busylock_acquire( lock_xp ); 1019 1020 // get sense and arity values from barrier descriptor 1021 sense = hal_remote_l32( sense_xp ); 1022 arity = hal_remote_l32( arity_xp ); 1023 1024 // compute expected value 1025 expected = (sense == 0) ? 1 : 0; 1026 1027 // increment current number of arrived threads / get value before increment 1028 uint32_t current = hal_remote_atomic_add( current_xp , 1 ); 1029 1030 // last arrived thread reset the local node, makes the recursive call 1031 // on parent node, and reactivates all waiting thread when returning. 1032 // other threads block, register in queue, and deschedule. 1033 1034 if ( current == (arity - 1) ) // last thread 1035 { 1036 1037 #if DEBUG_BARRIER_WAIT 1038 if( cycle > DEBUG_BARRIER_WAIT ) 1039 printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n", 1040 __FUNCTION__ , this->process->pid, this->trdid, 1041 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1042 #endif 1043 // reset the current node 1044 hal_remote_s32( sense_xp , expected ); 1045 hal_remote_s32( current_xp , 0 ); 1046 1047 // release busylock protecting the current node 1048 remote_busylock_release( lock_xp ); 1049 1050 // recursive call on parent node when current node is not the root 1051 if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp ); 1052 1053 // unblock all waiting threads on this node 1054 while( xlist_is_empty( root_xp ) == false ) 1055 { 1056 // get pointers on first waiting thread 1057 xptr_t thread_xp = XLIST_FIRST( root_xp , thread_t , wait_list ); 1058 cxy_t thread_cxy = GET_CXY( thread_xp ); 1059 thread_t * thread_ptr = GET_PTR( thread_xp ); 1060 1061 #if (DEBUG_BARRIER_WAIT & 1) 1062 trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); 1063 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); 1064 pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); 1065 if( cycle > DEBUG_BARRIER_WAIT ) 1066 printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n", 1067 __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); 1068 #endif 1069 // remove waiting thread from queue 1070 xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) ); 1071 1072 // unblock waiting thread 1073 thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC ); 1074 } 1075 } 1076 else // not the last thread 1077 { 1078 // get extended pointer on xlist entry from thread 1079 xptr_t entry_xp = XPTR( local_cxy , &this->wait_list ); 1080 1081 // register calling thread in barrier waiting queue 1082 xlist_add_last( root_xp , entry_xp ); 1083 1084 // block calling thread 1085 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC ); 1086 1087 // release busylock protecting the remote_barrier 1088 remote_busylock_release( lock_xp ); 1089 1090 #if DEBUG_BARRIER_WAIT 1091 if( cycle > DEBUG_BARRIER_WAIT ) 1092 printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n", 1093 __FUNCTION__ , this->process->pid, this->trdid, 1094 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1095 #endif 1096 // deschedule 1097 sched_yield("blocked on barrier"); 1098 } 1099 1100 return; 1101 1102 } // end dqt_barrier_decrement() 1103 1104 #if DEBUG_BARRIER_CREATE 1105 1106 //////////////////////////////////////////////////////////////////////////////////////////// 1107 // This debug function displays all DQT nodes in all clusters. 1108 //////////////////////////////////////////////////////////////////////////////////////////// 1109 // @ barrier_xp : extended pointer on DQT barrier descriptor. 1110 //////////////////////////////////////////////////////////////////////////////////////////// 1111 static void dqt_barrier_display( xptr_t barrier_xp ) 1010 ////////////////////////////////////////////// 1011 void dqt_barrier_display( xptr_t barrier_xp ) 1112 1012 { 1113 1013 // get cluster and local pointer on DQT barrier … … 1147 1047 uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level )); 1148 1048 uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity )); 1049 uint32_t count = hal_remote_l32( XPTR( node_cxy , &node_ptr->current )); 1149 1050 xptr_t pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp )); 1150 1051 xptr_t c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] )); … … 1153 1054 xptr_t c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] )); 1154 1055 1155 printk(" . level %d : (%x,%x) / arity%d / P(%x,%x) / C0(%x,%x)"1056 printk(" . level %d : (%x,%x) / %d on %d / P(%x,%x) / C0(%x,%x)" 1156 1057 " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n", 1157 level, node_cxy, node_ptr, arity,1058 level, node_cxy, node_ptr, count, arity, 1158 1059 GET_CXY(pa_xp), GET_PTR(pa_xp), 1159 1060 GET_CXY(c0_xp), GET_PTR(c0_xp), … … 1167 1068 } // end dqt_barrier_display() 1168 1069 1169 #endif 1070 1071 ////////////////////////////////////////////////////////////////////////////////////////// 1072 // This static (recursive) function is called by the dqt_barrier_wait() function. 1073 // It traverses the DQT from bottom to root, and decrements the "current" variables. 1074 // For each traversed node, it blocks and deschedules if it is not the last expected 1075 // thread. The last arrived thread reset the local node before returning. 1076 ////////////////////////////////////////////////////////////////////////////////////////// 1077 static void dqt_barrier_increment( xptr_t node_xp ) 1078 { 1079 uint32_t expected; 1080 uint32_t sense; 1081 uint32_t arity; 1082 1083 thread_t * this = CURRENT_THREAD; 1084 1085 // get node cluster and local pointer 1086 dqt_node_t * node_ptr = GET_PTR( node_xp ); 1087 cxy_t node_cxy = GET_CXY( node_xp ); 1088 1089 // build relevant extended pointers 1090 xptr_t arity_xp = XPTR( node_cxy , &node_ptr->arity ); 1091 xptr_t sense_xp = XPTR( node_cxy , &node_ptr->sense ); 1092 xptr_t current_xp = XPTR( node_cxy , &node_ptr->current ); 1093 xptr_t lock_xp = XPTR( node_cxy , &node_ptr->lock ); 1094 xptr_t root_xp = XPTR( node_cxy , &node_ptr->root ); 1095 1096 #if DEBUG_BARRIER_WAIT 1097 uint32_t cycle = (uint32_t)hal_get_cycles(); 1098 uint32_t level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) ); 1099 if( cycle > DEBUG_BARRIER_WAIT ) 1100 printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n", 1101 __FUNCTION__ , this->process->pid, this->trdid, 1102 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1103 #endif 1104 1105 // get extended pointer on parent node 1106 xptr_t parent_xp = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) ); 1107 1108 // take busylock 1109 remote_busylock_acquire( lock_xp ); 1110 1111 // get sense and arity values from barrier descriptor 1112 sense = hal_remote_l32( sense_xp ); 1113 arity = hal_remote_l32( arity_xp ); 1114 1115 // compute expected value 1116 expected = (sense == 0) ? 1 : 0; 1117 1118 // increment current number of arrived threads / get value before increment 1119 uint32_t current = hal_remote_atomic_add( current_xp , 1 ); 1120 1121 // last arrived thread reset the local node, makes the recursive call 1122 // on parent node, and reactivates all waiting thread when returning. 1123 // other threads block, register in queue, and deschedule. 1124 1125 if ( current == (arity - 1) ) // last thread 1126 { 1127 1128 #if DEBUG_BARRIER_WAIT 1129 if( cycle > DEBUG_BARRIER_WAIT ) 1130 printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n", 1131 __FUNCTION__ , this->process->pid, this->trdid, 1132 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1133 #endif 1134 // reset the current node 1135 hal_remote_s32( sense_xp , expected ); 1136 hal_remote_s32( current_xp , 0 ); 1137 1138 // release busylock protecting the current node 1139 remote_busylock_release( lock_xp ); 1140 1141 // recursive call on parent node when current node is not the root 1142 if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp ); 1143 1144 // unblock all waiting threads on this node 1145 while( xlist_is_empty( root_xp ) == false ) 1146 { 1147 // get pointers on first waiting thread 1148 xptr_t thread_xp = XLIST_FIRST( root_xp , thread_t , wait_list ); 1149 cxy_t thread_cxy = GET_CXY( thread_xp ); 1150 thread_t * thread_ptr = GET_PTR( thread_xp ); 1151 1152 #if (DEBUG_BARRIER_WAIT & 1) 1153 trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); 1154 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); 1155 pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); 1156 if( cycle > DEBUG_BARRIER_WAIT ) 1157 printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n", 1158 __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); 1159 #endif 1160 // remove waiting thread from queue 1161 xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) ); 1162 1163 // unblock waiting thread 1164 thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC ); 1165 } 1166 } 1167 else // not the last thread 1168 { 1169 // get extended pointer on xlist entry from thread 1170 xptr_t entry_xp = XPTR( local_cxy , &this->wait_list ); 1171 1172 // register calling thread in barrier waiting queue 1173 xlist_add_last( root_xp , entry_xp ); 1174 1175 // block calling thread 1176 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC ); 1177 1178 // release busylock protecting the remote_barrier 1179 remote_busylock_release( lock_xp ); 1180 1181 #if DEBUG_BARRIER_WAIT 1182 if( cycle > DEBUG_BARRIER_WAIT ) 1183 printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n", 1184 __FUNCTION__ , this->process->pid, this->trdid, 1185 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1186 #endif 1187 // deschedule 1188 sched_yield("blocked on barrier"); 1189 } 1190 1191 return; 1192 1193 } // end dqt_barrier_decrement() 1194 1195
Note: See TracChangeset
for help on using the changeset viewer.