Ignore:
Timestamp:
Mar 6, 2019, 4:37:15 PM (5 years ago)
Author:
alain
Message:

Introduce three new types of vsegs (KCODE,KDATA,KDEV)
to map the kernel vsegs in the process VSL and GPT.
This now used by both the TSAR and the I86 architectures.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/kernel/libk/remote_barrier.c

    r619 r623  
    245245}  // end generic_barrier_wait()
    246246
    247 
     247/////////////////////////////////////////////////////
     248void generic_barrier_display( xptr_t gen_barrier_xp )
     249{
     250    // get cluster and local pointer
     251    generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     252    cxy_t               gen_barrier_cxy = GET_CXY( gen_barrier_xp );
     253
     254    // get barrier type and extend pointer
     255    bool_t  is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) );
     256    void  * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) );
     257
     258    // buil extended pointer on the implementation specific barrier descriptor
     259    xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend );
     260
     261    // display barrier state
     262    if( is_dqt ) dqt_barrier_display( barrier_xp );
     263    else         simple_barrier_display( barrier_xp );
     264}
    248265
    249266
     
    454471
    455472}  // end simple_barrier_wait()
     473
     474/////////////////////////////////////////////////
     475void simple_barrier_display( xptr_t  barrier_xp )
     476{
     477    // get cluster and local pointer on simple barrier
     478    simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     479    cxy_t              barrier_cxy = GET_CXY( barrier_xp );
     480
     481    // get barrier global parameters
     482    uint32_t current  = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->current ) );
     483    uint32_t arity    = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->arity   ) );
     484
     485    printk("\n***** simple barrier : %d arrived threads on %d *****\n",
     486    current, arity );
     487
     488}   // end simple_barrier_display()
     489
     490
    456491
    457492
     
    493528
    494529// check x_size and y_size arguments
    495 assert( (z <= 16) , "DQT dqth larger than (16*16)\n");
     530assert( (z <= 16) , "DQT mesh size larger than (16*16)\n");
    496531
    497532// check RPC descriptor size
     
    9731008}  // end dqt_barrier_wait()
    9741009
    975 
    976 ////////////////////////////////////////////////////////////////////////////////////////////
    977 //          DQT static functions
    978 ////////////////////////////////////////////////////////////////////////////////////////////
    979 
    980 
    981 //////////////////////////////////////////////////////////////////////////////////////////
    982 // This recursive function decrements the distributed "count" variables,
    983 // traversing the DQT from bottom to root.
    984 // The last arrived thread reset the local node before returning.
    985 //////////////////////////////////////////////////////////////////////////////////////////
    986 static void dqt_barrier_increment( xptr_t  node_xp )
    987 {
    988     uint32_t   expected;
    989     uint32_t   sense;
    990     uint32_t   arity;
    991 
    992     thread_t * this = CURRENT_THREAD;
    993 
    994     // get node cluster and local pointer
    995     dqt_node_t * node_ptr = GET_PTR( node_xp );
    996     cxy_t        node_cxy = GET_CXY( node_xp );
    997 
    998     // build relevant extended pointers
    999     xptr_t  arity_xp   = XPTR( node_cxy , &node_ptr->arity );
    1000     xptr_t  sense_xp   = XPTR( node_cxy , &node_ptr->sense );
    1001     xptr_t  current_xp = XPTR( node_cxy , &node_ptr->current );
    1002     xptr_t  lock_xp    = XPTR( node_cxy , &node_ptr->lock );
    1003     xptr_t  root_xp    = XPTR( node_cxy , &node_ptr->root );
    1004 
    1005 #if DEBUG_BARRIER_WAIT
    1006 uint32_t   cycle = (uint32_t)hal_get_cycles();
    1007 uint32_t   level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) );
    1008 if( cycle > DEBUG_BARRIER_WAIT )
    1009 printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n",
    1010 __FUNCTION__ , this->process->pid, this->trdid,
    1011 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
    1012 #endif
    1013 
    1014     // get extended pointer on parent node
    1015     xptr_t  parent_xp  = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) );
    1016 
    1017     // take busylock
    1018     remote_busylock_acquire( lock_xp );
    1019    
    1020     // get sense and arity values from barrier descriptor
    1021     sense = hal_remote_l32( sense_xp );
    1022     arity = hal_remote_l32( arity_xp );
    1023 
    1024     // compute expected value
    1025     expected = (sense == 0) ? 1 : 0;
    1026 
    1027     // increment current number of arrived threads / get value before increment
    1028     uint32_t current = hal_remote_atomic_add( current_xp , 1 );
    1029 
    1030     // last arrived thread reset the local node, makes the recursive call
    1031     // on parent node, and reactivates all waiting thread when returning.
    1032     // other threads block, register in queue, and deschedule.
    1033 
    1034     if ( current == (arity - 1) )                        // last thread 
    1035     {
    1036 
    1037 #if DEBUG_BARRIER_WAIT
    1038 if( cycle > DEBUG_BARRIER_WAIT )
    1039 printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n",
    1040 __FUNCTION__ , this->process->pid, this->trdid,
    1041 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
    1042 #endif
    1043         // reset the current node
    1044         hal_remote_s32( sense_xp   , expected );
    1045         hal_remote_s32( current_xp , 0 );
    1046 
    1047         // release busylock protecting the current node
    1048         remote_busylock_release( lock_xp );
    1049 
    1050         // recursive call on parent node when current node is not the root
    1051         if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp );
    1052 
    1053         // unblock all waiting threads on this node
    1054         while( xlist_is_empty( root_xp ) == false )
    1055         {
    1056             // get pointers on first waiting thread
    1057             xptr_t     thread_xp  = XLIST_FIRST( root_xp , thread_t , wait_list );
    1058             cxy_t      thread_cxy = GET_CXY( thread_xp );
    1059             thread_t * thread_ptr = GET_PTR( thread_xp );
    1060 
    1061 #if (DEBUG_BARRIER_WAIT & 1)
    1062 trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
    1063 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
    1064 pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
    1065 if( cycle > DEBUG_BARRIER_WAIT )
    1066 printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n",
    1067 __FUNCTION__, this->process->pid, this->trdid, pid, trdid );
    1068 #endif
    1069             // remove waiting thread from queue
    1070             xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) );
    1071 
    1072             // unblock waiting thread
    1073             thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC );
    1074         }
    1075     }
    1076     else                                               // not the last thread
    1077     {
    1078         // get extended pointer on xlist entry from thread
    1079         xptr_t  entry_xp = XPTR( local_cxy , &this->wait_list );
    1080        
    1081         // register calling thread in barrier waiting queue
    1082         xlist_add_last( root_xp , entry_xp );
    1083 
    1084         // block calling thread
    1085         thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC );
    1086 
    1087         // release busylock protecting the remote_barrier
    1088         remote_busylock_release( lock_xp );
    1089 
    1090 #if DEBUG_BARRIER_WAIT
    1091 if( cycle > DEBUG_BARRIER_WAIT )
    1092 printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n",
    1093 __FUNCTION__ , this->process->pid, this->trdid,
    1094 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
    1095 #endif
    1096         // deschedule
    1097         sched_yield("blocked on barrier");
    1098     }
    1099 
    1100     return;
    1101 
    1102 } // end dqt_barrier_decrement()
    1103 
    1104 #if DEBUG_BARRIER_CREATE
    1105 
    1106 ////////////////////////////////////////////////////////////////////////////////////////////
    1107 // This debug function displays all DQT nodes in all clusters.
    1108 ////////////////////////////////////////////////////////////////////////////////////////////
    1109 // @ barrier_xp   : extended pointer on DQT barrier descriptor.
    1110 ////////////////////////////////////////////////////////////////////////////////////////////
    1111 static void dqt_barrier_display( xptr_t  barrier_xp )
     1010//////////////////////////////////////////////
     1011void dqt_barrier_display( xptr_t  barrier_xp )
    11121012{
    11131013    // get cluster and local pointer on DQT barrier
     
    11471047                     uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level       ));
    11481048                     uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity       ));
     1049                     uint32_t count = hal_remote_l32( XPTR( node_cxy , &node_ptr->current     ));
    11491050                     xptr_t   pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp   ));
    11501051                     xptr_t   c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] ));
     
    11531054                     xptr_t   c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] ));
    11541055
    1155                      printk("   . level %d : (%x,%x) / arity %d / P(%x,%x) / C0(%x,%x)"
     1056                     printk("   . level %d : (%x,%x) / %d on %d / P(%x,%x) / C0(%x,%x)"
    11561057                            " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n",
    1157                      level, node_cxy, node_ptr, arity,
     1058                     level, node_cxy, node_ptr, count, arity,
    11581059                     GET_CXY(pa_xp), GET_PTR(pa_xp),
    11591060                     GET_CXY(c0_xp), GET_PTR(c0_xp),
     
    11671068}   // end dqt_barrier_display()
    11681069
    1169 #endif
     1070
     1071//////////////////////////////////////////////////////////////////////////////////////////
     1072// This static (recursive) function is called by the dqt_barrier_wait() function.
     1073// It traverses the DQT from bottom to root, and decrements the "current" variables.
     1074// For each traversed node, it blocks and deschedules if it is not the last expected
     1075//  thread. The last arrived thread reset the local node before returning.
     1076//////////////////////////////////////////////////////////////////////////////////////////
     1077static void dqt_barrier_increment( xptr_t  node_xp )
     1078{
     1079    uint32_t   expected;
     1080    uint32_t   sense;
     1081    uint32_t   arity;
     1082
     1083    thread_t * this = CURRENT_THREAD;
     1084
     1085    // get node cluster and local pointer
     1086    dqt_node_t * node_ptr = GET_PTR( node_xp );
     1087    cxy_t        node_cxy = GET_CXY( node_xp );
     1088
     1089    // build relevant extended pointers
     1090    xptr_t  arity_xp   = XPTR( node_cxy , &node_ptr->arity );
     1091    xptr_t  sense_xp   = XPTR( node_cxy , &node_ptr->sense );
     1092    xptr_t  current_xp = XPTR( node_cxy , &node_ptr->current );
     1093    xptr_t  lock_xp    = XPTR( node_cxy , &node_ptr->lock );
     1094    xptr_t  root_xp    = XPTR( node_cxy , &node_ptr->root );
     1095
     1096#if DEBUG_BARRIER_WAIT
     1097uint32_t   cycle = (uint32_t)hal_get_cycles();
     1098uint32_t   level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) );
     1099if( cycle > DEBUG_BARRIER_WAIT )
     1100printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n",
     1101__FUNCTION__ , this->process->pid, this->trdid,
     1102HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1103#endif
     1104
     1105    // get extended pointer on parent node
     1106    xptr_t  parent_xp  = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) );
     1107
     1108    // take busylock
     1109    remote_busylock_acquire( lock_xp );
     1110   
     1111    // get sense and arity values from barrier descriptor
     1112    sense = hal_remote_l32( sense_xp );
     1113    arity = hal_remote_l32( arity_xp );
     1114
     1115    // compute expected value
     1116    expected = (sense == 0) ? 1 : 0;
     1117
     1118    // increment current number of arrived threads / get value before increment
     1119    uint32_t current = hal_remote_atomic_add( current_xp , 1 );
     1120
     1121    // last arrived thread reset the local node, makes the recursive call
     1122    // on parent node, and reactivates all waiting thread when returning.
     1123    // other threads block, register in queue, and deschedule.
     1124
     1125    if ( current == (arity - 1) )                        // last thread 
     1126    {
     1127
     1128#if DEBUG_BARRIER_WAIT
     1129if( cycle > DEBUG_BARRIER_WAIT )
     1130printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n",
     1131__FUNCTION__ , this->process->pid, this->trdid,
     1132HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1133#endif
     1134        // reset the current node
     1135        hal_remote_s32( sense_xp   , expected );
     1136        hal_remote_s32( current_xp , 0 );
     1137
     1138        // release busylock protecting the current node
     1139        remote_busylock_release( lock_xp );
     1140
     1141        // recursive call on parent node when current node is not the root
     1142        if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp );
     1143
     1144        // unblock all waiting threads on this node
     1145        while( xlist_is_empty( root_xp ) == false )
     1146        {
     1147            // get pointers on first waiting thread
     1148            xptr_t     thread_xp  = XLIST_FIRST( root_xp , thread_t , wait_list );
     1149            cxy_t      thread_cxy = GET_CXY( thread_xp );
     1150            thread_t * thread_ptr = GET_PTR( thread_xp );
     1151
     1152#if (DEBUG_BARRIER_WAIT & 1)
     1153trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     1154process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     1155pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
     1156if( cycle > DEBUG_BARRIER_WAIT )
     1157printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n",
     1158__FUNCTION__, this->process->pid, this->trdid, pid, trdid );
     1159#endif
     1160            // remove waiting thread from queue
     1161            xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) );
     1162
     1163            // unblock waiting thread
     1164            thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC );
     1165        }
     1166    }
     1167    else                                               // not the last thread
     1168    {
     1169        // get extended pointer on xlist entry from thread
     1170        xptr_t  entry_xp = XPTR( local_cxy , &this->wait_list );
     1171       
     1172        // register calling thread in barrier waiting queue
     1173        xlist_add_last( root_xp , entry_xp );
     1174
     1175        // block calling thread
     1176        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC );
     1177
     1178        // release busylock protecting the remote_barrier
     1179        remote_busylock_release( lock_xp );
     1180
     1181#if DEBUG_BARRIER_WAIT
     1182if( cycle > DEBUG_BARRIER_WAIT )
     1183printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n",
     1184__FUNCTION__ , this->process->pid, this->trdid,
     1185HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1186#endif
     1187        // deschedule
     1188        sched_yield("blocked on barrier");
     1189    }
     1190
     1191    return;
     1192
     1193} // end dqt_barrier_decrement()
     1194
     1195
Note: See TracChangeset for help on using the changeset viewer.