Changeset 619 for trunk/kernel/libk


Ignore:
Timestamp:
Feb 12, 2019, 1:15:47 PM (3 years ago)
Author:
alain
Message:

1) Fix a bug in KSH : after the "load" command,

the [ksh] prompt is now printed after completion
of the loaded application.

2) Fix a bug in vmm_handle_cow() : the copy-on-write

use now a hal_remote_memcpy() to replicate the page content.


Location:
trunk/kernel/libk
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/kernel/libk/remote_barrier.c

    r581 r619  
    22 * remote_barrier.c -  POSIX barrier implementation.
    33 *
    4  * Author   Alain Greiner (2016,2017,2018)
     4 * Author   Alain Greiner (2016,2017,2018,2019)
    55 *
    66 * Copyright (c) UPMC Sorbonne Universites
     
    2323
    2424#include <hal_kernel_types.h>
     25#include <hal_macros.h>
    2526#include <hal_remote.h>
    2627#include <hal_irqmask.h>
     
    3334#include <remote_barrier.h>
    3435
     36////////////////////////////////////////////////////
     37//  generic (implementation independant) functions
     38////////////////////////////////////////////////////
    3539
    3640///////////////////////////////////////////////////
    37 xptr_t remote_barrier_from_ident( intptr_t  ident )
     41xptr_t generic_barrier_from_ident( intptr_t  ident )
    3842{
    3943    // get pointer on local process_descriptor
    4044    process_t * process = CURRENT_THREAD->process;
    4145
    42     // get extended pointer on reference process
    43     xptr_t      ref_xp = process->ref_xp;
    44 
    45     // get cluster and local pointer on reference process
     46    // get pointers on reference process
     47    xptr_t         ref_xp  = process->ref_xp;
    4648    cxy_t          ref_cxy = GET_CXY( ref_xp );
    4749    process_t    * ref_ptr = (process_t *)GET_PTR( ref_xp );
     
    5153
    5254    // scan reference process barriers list
    53     xptr_t             iter_xp;
    54     xptr_t             barrier_xp;
    55     cxy_t              barrier_cxy;
    56     remote_barrier_t * barrier_ptr;
    57     intptr_t           current;
    58     bool_t             found = false;
     55    xptr_t              iter_xp;
     56    xptr_t              barrier_xp;
     57    cxy_t               barrier_cxy;
     58    generic_barrier_t * barrier_ptr;
     59    intptr_t            current;
     60    bool_t              found = false;
    5961
    6062    XLIST_FOREACH( root_xp , iter_xp )
    6163    {
    62         barrier_xp  = XLIST_ELEMENT( iter_xp , remote_barrier_t , list );
     64        barrier_xp  = XLIST_ELEMENT( iter_xp , generic_barrier_t , list );
    6365        barrier_cxy = GET_CXY( barrier_xp );
    64         barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
     66        barrier_ptr = (generic_barrier_t *)GET_PTR( barrier_xp );
    6567        current     = (intptr_t)hal_remote_lpt( XPTR( barrier_cxy , &barrier_ptr->ident ) );
    6668        if( ident == current )
     
    7375    if( found == false )  return XPTR_NULL;
    7476    else                  return barrier_xp;
    75 }
    76 
    77 //////////////////////////////////////////////
    78 error_t remote_barrier_create( intptr_t ident,
    79                                uint32_t count )
     77
     78} // end generic_barrier_from_ident()
     79
     80//////////////////////////////////////////////////////////////
     81error_t generic_barrier_create( intptr_t                ident,
     82                                uint32_t                count,
     83                                pthread_barrierattr_t * attr )
     84{
     85    xptr_t              gen_barrier_xp;   // extended pointer on generic barrier descriptor
     86    generic_barrier_t * gen_barrier_ptr;  // local pointer on generic barrier descriptor
     87    void              * barrier;          // local pointer on implementation barrier descriptor     
     88    kmem_req_t          req;              // kmem request
     89
     90    // get pointer on local process_descriptor
     91    process_t * process = CURRENT_THREAD->process;
     92
     93    // get pointers on reference process
     94    xptr_t         ref_xp  = process->ref_xp;
     95    cxy_t          ref_cxy = GET_CXY( ref_xp );
     96    process_t    * ref_ptr = (process_t *)GET_PTR( ref_xp );
     97
     98    // allocate memory for generic barrier descriptor
     99    if( ref_cxy == local_cxy )                         // reference cluster is local
     100    {
     101        req.type          = KMEM_GEN_BARRIER;
     102        req.flags         = AF_ZERO;
     103        gen_barrier_ptr   = kmem_alloc( &req );
     104        gen_barrier_xp    = XPTR( local_cxy , gen_barrier_ptr );
     105    }
     106    else                                               // reference cluster is remote
     107    {
     108        rpc_kcm_alloc_client( ref_cxy,
     109                              KMEM_GEN_BARRIER,
     110                              &gen_barrier_xp );
     111        gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     112    }
     113
     114    if( gen_barrier_ptr == NULL )
     115    {
     116        printk("\n[ERROR] in %s : cannot create generic barrier\n", __FUNCTION__ );
     117        return -1;
     118    }
     119
     120    // create implementation specific barrier descriptor
     121    if( attr == NULL )                                    // simple barrier implementation
     122    {
     123        // create simple barrier descriptor
     124         barrier = simple_barrier_create( count );
     125
     126        if( barrier == NULL )
     127        {
     128            printk("\n[ERROR] in %s : cannot create simple barrier\n", __FUNCTION__);
     129            return -1;
     130        }
     131    }
     132    else                                                  // QDT barrier implementation
     133    {
     134        uint32_t x_size   = attr->x_size;
     135        uint32_t y_size   = attr->y_size;
     136        uint32_t nthreads = attr->nthreads;
     137
     138        // check attributes / count
     139        if( (x_size * y_size * nthreads) != count )
     140        {
     141            printk("\n[ERROR] in %s : count(%d) != x_size(%d) * y_size(%d) * nthreads(%d)\n",
     142            __FUNCTION__, count, x_size, y_size, nthreads );
     143            return -1;
     144        }
     145
     146        // create DQT barrier descriptor
     147        barrier = dqt_barrier_create( x_size , y_size , nthreads );
     148
     149        if( barrier == NULL )
     150        {
     151            printk("\n[ERROR] in %s : cannot create DQT barrier descriptor\n", __FUNCTION__);
     152            return -1;
     153        }
     154    }
     155
     156    // initialize the generic barrier descriptor
     157    hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->ident  ) , (void*)ident );
     158    hal_remote_s32( XPTR( ref_cxy , &gen_barrier_ptr->is_dqt ) , (attr != NULL) );
     159    hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->extend ) , barrier );
     160
     161    // build extended pointers on lock, root and entry for reference process xlist
     162    xptr_t root_xp  = XPTR( ref_cxy , &ref_ptr->barrier_root );
     163    xptr_t lock_xp  = XPTR( ref_cxy , &ref_ptr->sync_lock );
     164    xptr_t entry_xp = XPTR( ref_cxy , &gen_barrier_ptr->list );
     165
     166    // register barrier in reference process xlist of barriers
     167    remote_busylock_acquire( lock_xp );
     168    xlist_add_first( root_xp , entry_xp );
     169    remote_busylock_release( lock_xp );
     170
     171    return 0;
     172
     173}  // en generic_barrier_create()
     174
     175/////////////////////////////////////////////////////
     176void generic_barrier_destroy( xptr_t gen_barrier_xp )
     177{
     178    kmem_req_t  req;              // kmem request
     179
     180    // get pointer on local process_descriptor
     181    process_t * process = CURRENT_THREAD->process;
     182
     183    // get pointers on reference process
     184    xptr_t      ref_xp  = process->ref_xp;
     185    cxy_t       ref_cxy = GET_CXY( ref_xp );
     186    process_t * ref_ptr = GET_PTR( ref_xp );
     187
     188    // get cluster and local pointer on generic barrier descriptor
     189    generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     190    cxy_t               gen_barrier_cxy = GET_CXY( gen_barrier_xp );
     191
     192    // get barrier type and extension pointer
     193    bool_t  is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) );
     194    void  * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) );
     195
     196    // build extended pointer on implementation dependant barrier descriptor
     197    xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend );
     198
     199    // delete the implementation specific barrier
     200    if( is_dqt ) dqt_barrier_destroy( barrier_xp );
     201    else         simple_barrier_destroy( barrier_xp );
     202
     203    // build extended pointers on lock and entry for reference process xlist
     204    xptr_t  lock_xp  = XPTR( ref_cxy , &ref_ptr->sync_lock );
     205    xptr_t  entry_xp = XPTR( gen_barrier_cxy , &gen_barrier_ptr->list );
     206
     207    // remove barrier from reference process xlist
     208    remote_busylock_acquire( lock_xp );
     209    xlist_unlink( entry_xp );
     210    remote_busylock_release( lock_xp );
     211
     212    // release memory allocated to barrier descriptor
     213    if( gen_barrier_cxy == local_cxy )           
     214    {
     215        req.type          = KMEM_GEN_BARRIER;
     216        req.ptr           = gen_barrier_ptr;
     217        kmem_free( &req );
     218    }
     219    else         
     220    {
     221        rpc_kcm_free_client( gen_barrier_cxy,
     222                             gen_barrier_ptr,
     223                             KMEM_GEN_BARRIER );
     224    }
     225}  // end generic_barrier_destroy()
     226
     227//////////////////////////////////////////////////
     228void generic_barrier_wait( xptr_t gen_barrier_xp )
     229{
     230    // get generic barrier descriptor cluster and pointer
     231    cxy_t               gen_barrier_cxy = GET_CXY( gen_barrier_xp );
     232    generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     233
     234    // get implementation type and extend local pointer
     235    bool_t  is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) );
     236    void  * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) );
     237
     238    // build extended pointer on implementation specific barrier descriptor
     239    xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend );
     240
     241    // call the relevant wait function
     242    if( is_dqt ) dqt_barrier_wait( barrier_xp );
     243    else         simple_barrier_wait( barrier_xp );
     244   
     245}  // end generic_barrier_wait()
     246
     247
     248
     249
     250
     251/////////////////////////////////////////////////////////////
     252//      simple barrier functions
     253/////////////////////////////////////////////////////////////
     254
     255///////////////////////////////////////////////////////////
     256simple_barrier_t * simple_barrier_create( uint32_t  count )
    80257{
    81258    xptr_t             barrier_xp;
    82     remote_barrier_t * barrier_ptr;
    83 
    84     // get pointer on local process descriptor
     259    simple_barrier_t * barrier;
     260
     261    // get pointer on local client process descriptor
    85262    thread_t  * this    = CURRENT_THREAD;
    86263    process_t * process = this->process;
    87264
    88 #if DEBUG_BARRIER
     265    // get reference process cluster
     266    xptr_t         ref_xp  = process->ref_xp;
     267    cxy_t          ref_cxy = GET_CXY( ref_xp );
     268
     269    // allocate memory for simple barrier descriptor
     270    if( ref_cxy == local_cxy )                        // reference is local
     271    {
     272        kmem_req_t req;
     273        req.type      = KMEM_SMP_BARRIER;
     274        req.flags     = AF_ZERO;
     275        barrier       = kmem_alloc( &req );
     276        barrier_xp    = XPTR( local_cxy , barrier );
     277    }
     278    else                                             // reference is remote
     279    {
     280        rpc_kcm_alloc_client( ref_cxy,
     281                              KMEM_SMP_BARRIER,
     282                              &barrier_xp );
     283        barrier = GET_PTR( barrier_xp );
     284    }
     285
     286    if( barrier == NULL ) return NULL;
     287
     288    // initialise simple barrier descriptor
     289    hal_remote_s32      ( XPTR( ref_cxy , &barrier->arity )      , count );
     290    hal_remote_s32      ( XPTR( ref_cxy , &barrier->current    ) , 0 );
     291    hal_remote_s32      ( XPTR( ref_cxy , &barrier->sense      ) , 0 );
     292
     293    xlist_root_init     ( XPTR( ref_cxy , &barrier->root ) );
     294    remote_busylock_init( XPTR( ref_cxy , &barrier->lock ) , LOCK_BARRIER_STATE );
     295
     296#if DEBUG_BARRIER_CREATE
    89297uint32_t cycle = (uint32_t)hal_get_cycles();
    90 if( cycle > DEBUG_BARRIER )
    91 printk("\n[DBG] %s : thread %x in process %x enter / count %d / cycle %d\n",
    92 __FUNCTION__, this->trdid, process->pid, count, cycle );
    93 #endif
    94 
    95     // get extended pointer on reference process
    96     xptr_t      ref_xp = process->ref_xp;
    97 
    98     // get reference process cluster and local pointer
    99     cxy_t       ref_cxy = GET_CXY( ref_xp );
    100     process_t * ref_ptr = GET_PTR( ref_xp );
    101 
    102     // allocate memory for barrier descriptor
    103     if( ref_cxy == local_cxy )                  // local cluster is the reference
    104     {
    105         kmem_req_t req;
    106         req.type      = KMEM_BARRIER;
    107         req.flags     = AF_ZERO;
    108         barrier_ptr   = kmem_alloc( &req );
    109         barrier_xp    = XPTR( local_cxy , barrier_ptr );
    110     }
    111     else                                       // reference is remote
    112     {
    113         rpc_kcm_alloc_client( ref_cxy , KMEM_BARRIER , &barrier_xp );
    114         barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
    115     }
    116 
    117     if( barrier_ptr == NULL ) return ENOMEM;
    118 
    119     // initialise barrier
    120     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->nb_threads ) , count );
    121     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->current    ) , 0 );
    122     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->sense      ) , 0 );
    123     hal_remote_spt( XPTR( ref_cxy , &barrier_ptr->ident      ) , (void*)ident );
    124 
    125     xlist_root_init( XPTR( ref_cxy , &barrier_ptr->root ) );
    126 
    127     // register  barrier in reference process xlist
    128     xptr_t root_xp  = XPTR( ref_cxy , &ref_ptr->barrier_root );
    129     xptr_t entry_xp = XPTR( ref_cxy , &barrier_ptr->list );
    130 
    131     remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    132     xlist_add_first( root_xp , entry_xp );
    133     remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    134 
    135 #if DEBUG_BARRIER
    136 cycle = (uint32_t)hal_get_cycles();
    137 if( cycle > DEBUG_BARRIER )
    138 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n",
    139 __FUNCTION__, this->trdid, process->pid, barrier_ptr, ref_cxy, cycle );
    140 #endif
    141 
    142     return 0;
    143 
    144 }  // end remote_barrier_create()
     298if( cycle > DEBUG_BARRIER_CREATE )
     299printk("\n[%s] thread[%x,%x] created barrier (%x,%x) / count %d / cycle %d\n",
     300__FUNCTION__, process->pid, this->trdid, ref_cxy, barrier, count, cycle );
     301#endif
     302
     303    return barrier;
     304
     305}  // end simple_barrier_create()
    145306
    146307////////////////////////////////////////////////
    147 void remote_barrier_destroy( xptr_t barrier_xp )
     308void simple_barrier_destroy( xptr_t barrier_xp )
    148309{
    149     // get pointer on local process descriptor
    150     process_t * process = CURRENT_THREAD->process;
    151 
    152     // get extended pointer on reference process
    153     xptr_t      ref_xp = process->ref_xp;
    154 
    155     // get reference process cluster and local pointer
    156     cxy_t       ref_cxy = GET_CXY( ref_xp );
    157     process_t * ref_ptr = (process_t *)GET_PTR( ref_xp );
    158 
    159310    // get barrier cluster and local pointer
    160311    cxy_t              barrier_cxy = GET_CXY( barrier_xp );
    161     remote_barrier_t * barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
    162 
    163     // remove barrier from reference process xlist
    164     remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    165     xlist_unlink( XPTR( barrier_cxy , &barrier_ptr->list ) );
    166     remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
     312    simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
    167313
    168314    // release memory allocated for barrier descriptor
    169     if( barrier_cxy == local_cxy )                        // reference is local
     315    if( barrier_cxy == local_cxy )
    170316    {
    171317        kmem_req_t  req;
    172         req.type = KMEM_BARRIER;
     318        req.type = KMEM_SMP_BARRIER;
    173319        req.ptr  = barrier_ptr;
    174320        kmem_free( &req );
    175321    }
    176     else                                                  // reference is remote
    177     {
    178         rpc_kcm_free_client( barrier_cxy , barrier_ptr , KMEM_BARRIER );
    179     }
    180 }  // end remote_barrier_destroy()
     322    else 
     323    {
     324        rpc_kcm_free_client( barrier_cxy,
     325                             barrier_ptr,
     326                             KMEM_SMP_BARRIER );
     327    }
     328
     329#if DEBUG_BARRIER_DESTROY
     330uint32_t    cycle   = (uint32_t)hal_get_cycles();
     331thread_t  * this    = CURRENT_THREAD;
     332process_t * process = this->process;
     333if( cycle > DEBUG_BARRIER_DESTROY )
     334printk("\n[%s] thread[%x,%x] deleted barrier (%x,%x) / cycle %d\n",
     335__FUNCTION__, process->pid, this->trdid, barrier_ptr, barrier_cxy, cycle );
     336#endif
     337
     338}  // end simple_barrier_destroy()
    181339
    182340/////////////////////////////////////////////
    183 void remote_barrier_wait( xptr_t barrier_xp )
     341void simple_barrier_wait( xptr_t barrier_xp )
    184342{
    185343    uint32_t  expected;
    186344    uint32_t  sense;
    187345    uint32_t  current;
    188     uint32_t  nb_threads;
     346    uint32_t  arity;
    189347    xptr_t    root_xp;
    190348    xptr_t    lock_xp;
    191349    xptr_t    current_xp;
    192350    xptr_t    sense_xp;
    193     xptr_t    nb_threads_xp;
     351    xptr_t    arity_xp;
    194352
    195353    // get pointer on calling thread
     
    200358
    201359    // get cluster and local pointer on remote barrier
    202     remote_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     360    simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
    203361    cxy_t              barrier_cxy = GET_CXY( barrier_xp );
    204362
    205 #if DEBUG_BARRIER
     363#if DEBUG_BARRIER_WAIT
    206364uint32_t cycle = (uint32_t)hal_get_cycles();
    207 if( cycle > DEBUG_BARRIER )
    208 printk("\n[DBG] %s : thread %x in process %x enter / barrier %x in cluster %x / cycle %d\n",
    209 __FUNCTION__, this->trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle );
    210 #endif
    211 
    212     // compute extended pointers on various barrier fields
    213     lock_xp       = XPTR( barrier_cxy , &barrier_ptr->lock );
    214     root_xp       = XPTR( barrier_cxy , &barrier_ptr->root );
    215     current_xp    = XPTR( barrier_cxy , &barrier_ptr->current );
    216     sense_xp      = XPTR( barrier_cxy , &barrier_ptr->sense );
    217     nb_threads_xp = XPTR( barrier_cxy , &barrier_ptr->nb_threads );
    218 
    219     // take busylock protecting the remote_barrier
     365if( cycle > DEBUG_BARRIER_WAIT )
     366printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n",
     367__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     368#endif
     369
     370    // build extended pointers on various barrier descriptor fields
     371    lock_xp    = XPTR( barrier_cxy , &barrier_ptr->lock );
     372    root_xp    = XPTR( barrier_cxy , &barrier_ptr->root );
     373    current_xp = XPTR( barrier_cxy , &barrier_ptr->current );
     374    sense_xp   = XPTR( barrier_cxy , &barrier_ptr->sense );
     375    arity_xp   = XPTR( barrier_cxy , &barrier_ptr->arity );
     376
     377    // take busylock protecting the barrier state
    220378    remote_busylock_acquire( lock_xp );
    221379
    222 #if (DEBUG_BARRIER & 1)
    223 cycle = (uint32_t)hal_get_cycles();
    224 if( cycle > DEBUG_BARRIER )
    225 printk("\n[DBG] %s : thread %x in process %x get lock / cycle %d\n",
    226 __FUNCTION__, this->trdid, this->process->pid, cycle );
    227 #endif
    228 
    229     // get sense and nb_threads values from barrier descriptor
    230     sense      = hal_remote_l32( sense_xp );
    231     nb_threads = hal_remote_l32( nb_threads_xp );
     380    // get sense and threads values from barrier descriptor
     381    sense = hal_remote_l32( sense_xp );
     382    arity = hal_remote_l32( arity_xp );
    232383
    233384    // compute expected value
     
    235386    else              expected = 0;
    236387
    237 #if (DEBUG_BARRIER & 1)
    238 cycle = (uint32_t)hal_get_cycles();
    239 if( cycle > DEBUG_BARRIER )
    240 printk("\n[DBG] %s : thread %x in process %x / count %d / sense %d / cycle %d\n",
    241 __FUNCTION__, this->trdid, this->process->pid, nb_threads, sense, cycle );
    242 #endif
    243 
    244     // atomically increment current, and get value before increment
     388    // increment current number of arrived threads / get value before increment
    245389    current = hal_remote_atomic_add( current_xp , 1 );
    246390
     
    248392    // other threads block, register in queue, and deschedule
    249393
    250     if( current == (nb_threads-1) )                       // last thread
     394    if( current == (arity - 1) )                       // last thread
    251395    {
    252396        hal_remote_s32( current_xp , 0 );
     
    261405            thread_t * thread_ptr = GET_PTR( thread_xp );
    262406
    263 #if (DEBUG_BARRIER & 1)
    264 cycle = (uint32_t)hal_get_cycles();
    265 if( cycle > DEBUG_BARRIER )
    266 printk("\n[DBG] %s : thread %x in process %x / unblock thread %x / cycle %d\n",
    267 __FUNCTION__, this->trdid, this->process->pid, thread_ptr, cycle );
     407#if (DEBUG_BARRIER_WAIT & 1)
     408trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     409process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     410pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
     411if( cycle > DEBUG_BARRIER_WAIT )
     412printk("\n[%s] thread[%x,%x] unblocks thread[%x,%x]\n",
     413__FUNCTION__, this->process->pid, this->trdid, pid, trdid );
    268414#endif
    269415
     
    275421        }
    276422
     423        // release busylock protecting the barrier
     424        remote_busylock_release( lock_xp );
     425    }
     426    else                                             // not the last thread
     427    {
     428
     429#if (DEBUG_BARRIER_WAIT & 1)
     430if( cycle > DEBUG_BARRIER_WAIT )
     431printk("\n[%s] thread[%x,%x] blocks\n",
     432__FUNCTION__, this->process->pid, this->trdid );
     433#endif
     434
     435        // register calling thread in barrier waiting queue
     436        xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) );
     437
     438        // block calling thread
     439        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC );
     440
    277441        // release busylock protecting the remote_barrier
    278442        remote_busylock_release( lock_xp );
    279     }
    280     else                                             // not the last thread
    281     {
    282 
    283 #if (DEBUG_BARRIER & 1)
     443
     444        // deschedule
     445        sched_yield("blocked on barrier");
     446    }
     447
     448#if DEBUG_BARRIER_WAIT
    284449cycle = (uint32_t)hal_get_cycles();
    285 if( cycle > DEBUG_BARRIER )
    286 printk("\n[DBG] %s : thread %x in process %x / blocked / cycle %d\n",
    287 __FUNCTION__, this->trdid, this->process->pid, cycle );
    288 #endif
    289 
     450if( cycle > DEBUG_BARRIER_WAIT )
     451printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n",
     452__FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle );
     453#endif
     454
     455}  // end simple_barrier_wait()
     456
     457
     458/////////////////////////////////////////////////////////////
     459//      DQT barrier functions
     460/////////////////////////////////////////////////////////////
     461
     462static void dqt_barrier_increment( xptr_t node_xp );
     463
     464#if DEBUG_BARRIER_CREATE
     465static void dqt_barrier_display( xptr_t  barrier_xp );
     466#endif
     467
     468///////////////////////////////////////////////////////
     469dqt_barrier_t * dqt_barrier_create( uint32_t    x_size,
     470                                    uint32_t    y_size,
     471                                    uint32_t    nthreads )
     472{
     473    page_t        * dqt_page;
     474    xptr_t          dqt_page_xp;     
     475    page_t        * rpc_page;
     476    xptr_t          rpc_page_xp;     
     477    dqt_barrier_t * barrier;       // local pointer on DQT barrier descriptor
     478    xptr_t          barrier_xp;    // extended pointer on DQT barrier descriptor
     479    uint32_t        z;             // actual DQT size == max(x_size,y_size)
     480    uint32_t        levels;        // actual number of DQT levels
     481    kmem_req_t      req;           // kmem request
     482    xptr_t          rpc_xp;        // extended pointer on RPC descriptors array
     483    rpc_desc_t    * rpc;           // pointer on RPC descriptors array
     484    uint32_t        responses;     // responses counter for parallel RPCs
     485    reg_t           save_sr;       // for critical section
     486    uint32_t        x;             // X coordinate in QDT mesh
     487    uint32_t        y;             // Y coordinate in QDT mesh
     488    uint32_t        l;             // level coordinate
     489
     490    // compute size and number of DQT levels
     491    z      = (x_size > y_size) ? x_size : y_size;
     492    levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5;
     493
     494// check x_size and y_size arguments
     495assert( (z <= 16) , "DQT dqth larger than (16*16)\n");
     496
     497// check RPC descriptor size
     498assert( (sizeof(rpc_desc_t) <= 128), "RPC descriptor  larger than 128 bytes\n");
     499
     500// check size of an array of 5 DQT nodes
     501assert( (sizeof(dqt_node_t) * 5 <= 512 ), "array of DQT nodes larger than 512 bytes\n");
     502
     503// check size of DQT barrier descriptor
     504assert( (sizeof(dqt_barrier_t) <= 0x4000 ), "DQT barrier descriptor larger than 4 pages\n");
     505
     506    // get pointer on local client process descriptor
     507    thread_t  * this    = CURRENT_THREAD;
     508    process_t * process = this->process;
     509
     510#if DEBUG_BARRIER_CREATE
     511uint32_t   cycle = (uint32_t)hal_get_cycles();
     512if( cycle > DEBUG_BARRIER_CREATE )
     513printk("\n[%s] thread[%x,%x] enter : x_size %d / y_size %d / levels %d / cycle %d\n",
     514__FUNCTION__, process->pid, this->trdid, x_size, y_size, levels, cycle );
     515#endif
     516
     517    // get reference process cluster
     518    xptr_t         ref_xp  = process->ref_xp;
     519    cxy_t          ref_cxy = GET_CXY( ref_xp );
     520
     521    // 1. allocate memory for DQT barrier descriptor in reference cluster
     522    if( ref_cxy == local_cxy )                   
     523     {
     524        req.type     = KMEM_PAGE;
     525        req.size     = 2;               // 4 pages == 16 Kbytes
     526        req.flags    = AF_ZERO;
     527        dqt_page     = kmem_alloc( &req );
     528        dqt_page_xp  = XPTR( local_cxy , dqt_page );
     529    }
     530    else                                         
     531    {
     532        rpc_pmem_get_pages_client( ref_cxy,
     533                                   2,
     534                                   &dqt_page );
     535        dqt_page_xp  = XPTR( ref_cxy , dqt_page );
     536    }
     537
     538    if( dqt_page == NULL ) return NULL;
     539
     540    // get pointers on DQT barrier descriptor
     541    barrier_xp = ppm_page2base( dqt_page_xp );
     542    barrier    = GET_PTR( barrier_xp );
     543
     544    // initialize global parameters in DQT barrier descriptor
     545    hal_remote_s32( XPTR( ref_cxy , &barrier->x_size   ) , x_size );
     546    hal_remote_s32( XPTR( ref_cxy , &barrier->y_size   ) , x_size );
     547    hal_remote_s32( XPTR( ref_cxy , &barrier->nthreads ) , nthreads );
     548
     549#if DEBUG_BARRIER_CREATE
     550if( cycle > DEBUG_BARRIER_CREATE )
     551printk("\n[%s] thread[%x,%x] created DQT barrier descriptor at (%x,%x)\n",
     552__FUNCTION__, process->pid, this->trdid, ref_cxy, barrier );
     553#endif
     554
     555    // 2. allocate memory from local cluster for an array of 256 RPCs descriptors
     556    //    cannot share the RPC descriptor, because the returned argument is not shared
     557    req.type    = KMEM_PAGE;
     558    req.size    = 3;            // 8 pages == 32 Kbytes
     559    req.flags   = AF_ZERO;
     560    rpc_page    = kmem_alloc( &req );
     561    rpc_page_xp = XPTR( local_cxy , rpc_page );
     562
     563    // get pointers on RPC descriptors array
     564    rpc_xp    = ppm_page2base( rpc_page_xp );
     565    rpc       = GET_PTR( rpc_xp );
     566
     567#if DEBUG_BARRIER_CREATE
     568if( cycle > DEBUG_BARRIER_CREATE )
     569printk("\n[%s] thread[%x,%x] created RPC descriptors array at (%x,%s)\n",
     570__FUNCTION__, process->pid, this->trdid, local_cxy, rpc );
     571#endif
     572
     573    // 3. send parallel RPCs to all existing clusters covered by the DQT
     574    //    to allocate memory for an array of 5 DQT nodes in each cluster
     575    //    (5 nodes per cluster <= 512 bytes per cluster)
     576
     577    responses = 0;    // initialize RPC responses counter
     578
     579    // mask IRQs
     580    hal_disable_irq( &save_sr);
     581
     582    // client thread blocks itself
     583    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
     584
     585    for ( x = 0 ; x < x_size ; x++ )
     586    {
     587        for ( y = 0 ; y < y_size ; y++ )
     588        {
     589            // send RPC to existing clusters only
     590            if( LOCAL_CLUSTER->cluster_info[x][y] )
     591            {
     592                cxy_t cxy = HAL_CXY_FROM_XY( x , y );   // target cluster identifier
     593
     594                // build a specific RPC descriptor for each target cluster
     595                rpc[cxy].rsp       = &responses;
     596                rpc[cxy].blocking  = false;
     597                rpc[cxy].index     = RPC_KCM_ALLOC;
     598                rpc[cxy].thread    = this;
     599                rpc[cxy].lid       = this->core->lid;
     600                rpc[cxy].args[0]   = (uint64_t)KMEM_512_BYTES; 
     601
     602                // atomically increment expected responses counter
     603                hal_atomic_add( &responses , 1 );
     604
     605                // send a non-blocking RPC to allocate 512 bytes in target cluster
     606                rpc_send( cxy , &rpc[cxy] );
     607            }
     608        }
     609    }
     610
     611#if DEBUG_BARRIER_CREATE
     612if( cycle > DEBUG_BARRIER_CREATE )
     613printk("\n[%s] thread[%x,%x] sent all RPC requests to allocate dqt_nodes array\n",
     614__FUNCTION__, process->pid, this->trdid );
     615#endif
     616
     617    // client thread deschedule
     618    sched_yield("blocked on parallel rpc_kcm_alloc");
     619
     620    // restore IRQs
     621    hal_restore_irq( save_sr);
     622
     623    // 4. initialize the node_xp[x][y][l] array in DQT barrier descriptor
     624    //    the node_xp[x][y][0] value is available in rpc.args[1]
     625
     626#if DEBUG_BARRIER_CREATE
     627if( cycle > DEBUG_BARRIER_CREATE )
     628printk("\n[%s] thread[%x,%x] initialises array of pointers on dqt_nodes\n",
     629__FUNCTION__, process->pid, this->trdid );
     630#endif
     631
     632    for ( x = 0 ; x < x_size ; x++ )
     633    {
     634        for ( y = 0 ; y < y_size ; y++ )
     635        {
     636            cxy_t    cxy      = HAL_CXY_FROM_XY( x , y );   // target cluster identifier
     637            xptr_t   array_xp = (xptr_t)rpc[cxy].args[1];   // x_pointer on node array
     638            uint32_t offset   = sizeof( dqt_node_t );       // size of a DQT node
     639               
     640            // set values into the node_xp[x][y][l] array
     641            for ( l = 0 ; l < levels ; l++ )
     642            {
     643                xptr_t  node_xp = array_xp + (offset * l);
     644                hal_remote_s64( XPTR( ref_cxy , &barrier->node_xp[x][y][l] ), node_xp );
     645
     646#if DEBUG_BARRIER_CREATE
     647if( cycle > DEBUG_BARRIER_CREATE )
     648printk(" - dqt_node_xp[%d,%d,%d] = (%x,%x) / &dqt_node_xp = %x\n",
     649x , y , l , GET_CXY( node_xp ), GET_PTR( node_xp ), &barrier->node_xp[x][y][l] );
     650#endif
     651            }
     652        }
     653    }
     654
     655    // 5. release memory locally allocated for the RPCs array
     656    req.type  = KMEM_PAGE;
     657    req.ptr   = rpc_page;
     658    kmem_free( &req );
     659
     660#if DEBUG_BARRIER_CREATE
     661if( cycle > DEBUG_BARRIER_CREATE )
     662printk("\n[%s] thread[%x,%x] released memory for RPC descriptors array\n",
     663__FUNCTION__, process->pid, this->trdid );
     664#endif
     665
     666    // 6. initialise all distributed DQT nodes using remote accesses
     667    //    and the pointers stored in the node_xp[x][y][l] array
     668    for ( x = 0 ; x < x_size ; x++ )
     669    {
     670        for ( y = 0 ; y < y_size ; y++ )
     671        {
     672            // initialize existing clusters only
     673            if( LOCAL_CLUSTER->cluster_info[x][y] )
     674            {
     675                for ( l = 0 ; l < levels ; l++ )
     676                {
     677                                    xptr_t    parent_xp;
     678                    xptr_t    child_xp[4];
     679                    uint32_t  arity = 0;
     680
     681                    // get DQT node pointers
     682                    xptr_t       node_xp  = hal_remote_l64( XPTR( ref_cxy,
     683                                            &barrier->node_xp[x][y][l] ) );
     684                    cxy_t        node_cxy = GET_CXY( node_xp );
     685                    dqt_node_t * node_ptr = GET_PTR( node_xp );
     686
     687                    // compute arity and child_xp[i]
     688                    if (l == 0 )                            // bottom DQT node
     689                    {
     690                        arity       = nthreads;
     691
     692                        child_xp[0] = XPTR_NULL;
     693                        child_xp[1] = XPTR_NULL;
     694                        child_xp[2] = XPTR_NULL;
     695                        child_xp[3] = XPTR_NULL;
     696                    }
     697                    else                                    // not a bottom DQT node
     698                    {
     699                        arity = 0;
     700
     701                        // only few non-bottom nodes must be initialised
     702                        if( ((x & ((1<<l)-1)) == 0) && ((y & ((1<<l)-1)) == 0) )
     703                        {
     704                            uint32_t cx[4];       // x coordinate for children
     705                            uint32_t cy[4];       // y coordinate for children
     706                            uint32_t i;
     707
     708                            // the child0 coordinates are equal to the parent coordinates
     709                            // other children coordinates depend on the level value
     710                            cx[0] = x;
     711                            cy[0] = y;
     712
     713                            cx[1] = x;
     714                            cy[1] = y + (1 << (l-1));
     715
     716                            cx[2] = x + (1 << (l-1));
     717                            cy[2] = y;
     718
     719                            cx[3] = x + (1 << (l-1));
     720                            cy[3] = y + (1 << (l-1));
     721
     722                            for ( i = 0 ; i < 4 ; i++ )
     723                            {
     724                                // child pointer is NULL if  outside the mesh
     725                                if ( (cx[i] < x_size) && (cy[i] < y_size) )
     726                                {
     727                                    // get child_xp[i]
     728                                    child_xp[i] = hal_remote_l64( XPTR( ref_cxy,
     729                                                  &barrier->node_xp[cx[i]][cy[i]][l-1] ) );
     730
     731                                    // increment arity
     732                                    arity++;
     733                                }
     734                                else
     735                                {
     736                                    child_xp[i] = XPTR_NULL;
     737                                }
     738                            }
     739                        }
     740                    }
     741
     742                    // compute parent_xp
     743                    if( l == (levels - 1) )                      // root DQT node
     744                    {
     745                        parent_xp = XPTR_NULL;
     746                    }
     747                    else                                          // not the root
     748                    {
     749                        uint32_t px = 0;           // parent X coordinate
     750                        uint32_t py = 0;           // parent Y coordinate
     751                        bool_t   found = false;
     752
     753                        // compute macro_cluster x_min, x_max, y_min, y_max               
     754                        uint32_t x_min = x & ~((1<<(l+1))-1);
     755                        uint32_t x_max = x_min + (1<<(l+1));
     756                        uint32_t y_min = y & ~((1<<(l+1))-1);
     757                        uint32_t y_max = y_min + (1<<(l+1));
     758
     759                        // scan all clusters in macro-cluster[x][y][l] / take first active
     760                        for( px = x_min ; px < x_max ; px++ )
     761                        {
     762                            for( py = y_min ; py < y_max ; py++ )
     763                            {
     764                                if( LOCAL_CLUSTER->cluster_info[px][py] ) found = true;
     765                                if( found ) break;
     766                            }
     767                            if( found ) break;
     768                        }
     769
     770                        parent_xp = hal_remote_l64( XPTR( ref_cxy ,
     771                                    &barrier->node_xp[px][py][l+1] ) );
     772                    }
     773
     774                    // initializes  the DQT node
     775                    hal_remote_s32( XPTR( node_cxy , &node_ptr->arity )       , arity );   
     776                    hal_remote_s32( XPTR( node_cxy , &node_ptr->current )     , 0 );   
     777                    hal_remote_s32( XPTR( node_cxy , &node_ptr->sense )       , 0 );   
     778                    hal_remote_s32( XPTR( node_cxy , &node_ptr->level )       , l );   
     779                    hal_remote_s64( XPTR( node_cxy , &node_ptr->parent_xp )   , parent_xp );
     780                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[0] ) , child_xp[0] );
     781                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[1] ) , child_xp[1] );
     782                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[2] ) , child_xp[2] );
     783                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[3] ) , child_xp[3] );
     784
     785                    xlist_root_init( XPTR( node_cxy , &node_ptr->root ) );
     786
     787                    remote_busylock_init( XPTR( node_cxy , &node_ptr->lock ),
     788                                          LOCK_BARRIER_STATE );
     789                }
     790            }
     791        }
     792    }
     793
     794#if DEBUG_BARRIER_CREATE
     795cycle = (uint32_t)hal_get_cycles();
     796if( cycle > DEBUG_BARRIER_CREATE )
     797printk("\n[%s] thread[%x,%x] completed DQT barrier initialisation / cycle %d\n",
     798__FUNCTION__, process->pid, this->trdid, cycle );
     799dqt_barrier_display( barrier_xp );
     800#endif
     801
     802    return barrier;
     803
     804}  // end dqt_barrier_create()
     805
     806///////////////////////////////////////////////
     807void dqt_barrier_destroy( xptr_t   barrier_xp )
     808{
     809    page_t     * rpc_page;
     810    xptr_t       rpc_page_xp;
     811    rpc_desc_t * rpc;                      // local pointer on RPC descriptors array
     812    xptr_t       rpc_xp;                   // extended pointer on RPC descriptor array
     813    reg_t        save_sr;                  // for critical section
     814    kmem_req_t   req;                      // kmem request
     815
     816    thread_t * this = CURRENT_THREAD;
     817
     818    // get DQT barrier descriptor cluster and local pointer
     819    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     820    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     821
     822#if DEBUG_BARRIER_DESTROY
     823uint32_t   cycle = (uint32_t)hal_get_cycles();
     824if( cycle > DEBUG_BARRIER_DESTROY )
     825printk("\n[%s] thread[%x,%x] enter for barrier (%x,%x) / cycle %d\n",
     826__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     827#endif
     828
     829    // get x_size and y_size global parameters
     830    uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) );
     831    uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) );
     832
     833    // 1. allocate memory from local cluster for an array of 256 RPCs descriptors
     834    //    cannot share the RPC descriptor, because the "buf" argument is not shared
     835    req.type    = KMEM_PAGE;
     836    req.size    = 3;            // 8 pages == 32 Kbytes
     837    req.flags   = AF_ZERO;
     838    rpc_page    = kmem_alloc( &req );
     839    rpc_page_xp = XPTR( local_cxy , rpc_page );
     840
     841    // get pointers on RPC descriptors array
     842    rpc_xp    = ppm_page2base( rpc_page_xp );
     843    rpc       = GET_PTR( rpc_xp );
     844   
     845    // 2. send parallel RPCs to all existing clusters covered by the DQT
     846    //    to release memory allocated for the arrays of DQT nodes in each cluster
     847
     848    uint32_t responses = 0;    // initialize RPC responses counter
     849
     850    // mask IRQs
     851    hal_disable_irq( &save_sr);
     852
     853    // client thread blocks itself
     854    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
     855
     856    uint32_t x , y;
     857   
     858#if DEBUG_BARRIER_DESTROY
     859if( cycle > DEBUG_BARRIER_DESTROY )
     860printk("\n[%s] thread[%x,%x] send RPCs to release the distributed dqt_node array\n",
     861__FUNCTION__, this->process->pid, this->trdid );
     862#endif
     863
     864    for ( x = 0 ; x < x_size ; x++ )
     865    {
     866        for ( y = 0 ; y < y_size ; y++ )
     867        {
     868            // send RPC to existing cluster only
     869            if( LOCAL_CLUSTER->cluster_info[x][y] )
     870            {
     871                // compute target cluster identifier
     872                cxy_t   cxy       = HAL_CXY_FROM_XY( x , y );
     873
     874                // get local pointer on dqt_nodes array in target cluster 
     875                xptr_t  buf_xp_xp = XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] );
     876                xptr_t  buf_xp    = hal_remote_l64( buf_xp_xp );
     877                void  * buf       = GET_PTR( buf_xp );
     878
     879assert( (cxy == GET_CXY(buf_xp)) , "bad extended pointer on dqt_nodes array\n" );
     880
     881                // build a specific RPC descriptor
     882                rpc[cxy].rsp       = &responses;
     883                rpc[cxy].blocking  = false;
     884                rpc[cxy].index     = RPC_KCM_FREE;
     885                rpc[cxy].thread    = this;
     886                rpc[cxy].lid       = this->core->lid;
     887                rpc[cxy].args[0]   = (uint64_t)(intptr_t)buf; 
     888                rpc[cxy].args[1]   = (uint64_t)KMEM_512_BYTES; 
     889
     890                // atomically increment expected responses counter
     891                hal_atomic_add( &responses , 1 );
     892           
     893#if DEBUG_BARRIER_DESTROY
     894if( cycle > DEBUG_BARRIER_DESTROY )
     895printk(" - target cluster(%d,%d) / buffer %x\n", x, y, buf );
     896#endif
     897                // send a non-blocking RPC to release 512 bytes in target cluster
     898                rpc_send( cxy , &rpc[cxy] );
     899            }
     900        }
     901    }
     902
     903    // client thread deschedule
     904    sched_yield("blocked on parallel rpc_kcm_free");
     905
     906    // restore IRQs
     907    hal_restore_irq( save_sr);
     908
     909    // 3. release memory locally allocated for the RPC descriptors array
     910    req.type  = KMEM_PAGE;
     911    req.ptr   = rpc_page;
     912    kmem_free( &req );
     913
     914    // 4. release memory allocated for barrier descriptor
     915    xptr_t   page_xp = ppm_base2page( barrier_xp );
     916    page_t * page    = GET_PTR( page_xp );
     917
     918    if( barrier_cxy == local_cxy )                   
     919    {
     920        req.type      = KMEM_PAGE;
     921        req.ptr       = page;
     922        kmem_free( &req );
     923    }
     924    else                                         
     925    {
     926        rpc_pmem_release_pages_client( barrier_cxy,
     927                                       page );
     928    }
     929
     930#if DEBUG_BARRIER_DESTROY
     931cycle = (uint32_t)hal_get_cycles();
     932if( cycle > DEBUG_BARRIER_DESTROY )
     933printk("\n[%s] thread[%x,%x] exit for barrier (%x,%x) / cycle %d\n",
     934__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     935#endif
     936
     937}  // end dqt_barrier_destroy()
     938
     939////////////////////////////////////////////
     940void dqt_barrier_wait( xptr_t   barrier_xp )
     941{
     942    thread_t * this = CURRENT_THREAD;
     943
     944    // check calling thread can yield
     945    thread_assert_can_yield( this , __FUNCTION__ );
     946
     947    // get cluster and local pointer on DQT barrier descriptor
     948    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     949    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     950
     951#if DEBUG_BARRIER_WAIT
     952uint32_t cycle = (uint32_t)hal_get_cycles();
     953if( cycle > DEBUG_BARRIER_WAIT )
     954printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n",
     955__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     956#endif
     957
     958    // get extended pointer on local bottom DQT node
     959    uint32_t x       = HAL_X_FROM_CXY( local_cxy );
     960    uint32_t y       = HAL_Y_FROM_CXY( local_cxy );
     961    xptr_t   node_xp = hal_remote_l64( XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ) );
     962
     963    // call recursive function to traverse DQT from bottom to root
     964    dqt_barrier_increment( node_xp );
     965
     966#if DEBUG_BARRIER_WAIT
     967cycle = (uint32_t)hal_get_cycles();
     968if( cycle > DEBUG_BARRIER_WAIT )
     969printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n",
     970__FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle );
     971#endif
     972
     973}  // end dqt_barrier_wait()
     974
     975
     976////////////////////////////////////////////////////////////////////////////////////////////
     977//          DQT static functions
     978////////////////////////////////////////////////////////////////////////////////////////////
     979
     980
     981//////////////////////////////////////////////////////////////////////////////////////////
     982// This recursive function decrements the distributed "count" variables,
     983// traversing the DQT from bottom to root.
     984// The last arrived thread reset the local node before returning.
     985//////////////////////////////////////////////////////////////////////////////////////////
     986static void dqt_barrier_increment( xptr_t  node_xp )
     987{
     988    uint32_t   expected;
     989    uint32_t   sense;
     990    uint32_t   arity;
     991
     992    thread_t * this = CURRENT_THREAD;
     993
     994    // get node cluster and local pointer
     995    dqt_node_t * node_ptr = GET_PTR( node_xp );
     996    cxy_t        node_cxy = GET_CXY( node_xp );
     997
     998    // build relevant extended pointers
     999    xptr_t  arity_xp   = XPTR( node_cxy , &node_ptr->arity );
     1000    xptr_t  sense_xp   = XPTR( node_cxy , &node_ptr->sense );
     1001    xptr_t  current_xp = XPTR( node_cxy , &node_ptr->current );
     1002    xptr_t  lock_xp    = XPTR( node_cxy , &node_ptr->lock );
     1003    xptr_t  root_xp    = XPTR( node_cxy , &node_ptr->root );
     1004
     1005#if DEBUG_BARRIER_WAIT
     1006uint32_t   cycle = (uint32_t)hal_get_cycles();
     1007uint32_t   level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) );
     1008if( cycle > DEBUG_BARRIER_WAIT )
     1009printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n",
     1010__FUNCTION__ , this->process->pid, this->trdid,
     1011HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1012#endif
     1013
     1014    // get extended pointer on parent node
     1015    xptr_t  parent_xp  = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) );
     1016
     1017    // take busylock
     1018    remote_busylock_acquire( lock_xp );
     1019   
     1020    // get sense and arity values from barrier descriptor
     1021    sense = hal_remote_l32( sense_xp );
     1022    arity = hal_remote_l32( arity_xp );
     1023
     1024    // compute expected value
     1025    expected = (sense == 0) ? 1 : 0;
     1026
     1027    // increment current number of arrived threads / get value before increment
     1028    uint32_t current = hal_remote_atomic_add( current_xp , 1 );
     1029
     1030    // last arrived thread reset the local node, makes the recursive call
     1031    // on parent node, and reactivates all waiting thread when returning.
     1032    // other threads block, register in queue, and deschedule.
     1033
     1034    if ( current == (arity - 1) )                        // last thread 
     1035    {
     1036
     1037#if DEBUG_BARRIER_WAIT
     1038if( cycle > DEBUG_BARRIER_WAIT )
     1039printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n",
     1040__FUNCTION__ , this->process->pid, this->trdid,
     1041HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1042#endif
     1043        // reset the current node
     1044        hal_remote_s32( sense_xp   , expected );
     1045        hal_remote_s32( current_xp , 0 );
     1046
     1047        // release busylock protecting the current node
     1048        remote_busylock_release( lock_xp );
     1049
     1050        // recursive call on parent node when current node is not the root
     1051        if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp );
     1052
     1053        // unblock all waiting threads on this node
     1054        while( xlist_is_empty( root_xp ) == false )
     1055        {
     1056            // get pointers on first waiting thread
     1057            xptr_t     thread_xp  = XLIST_FIRST( root_xp , thread_t , wait_list );
     1058            cxy_t      thread_cxy = GET_CXY( thread_xp );
     1059            thread_t * thread_ptr = GET_PTR( thread_xp );
     1060
     1061#if (DEBUG_BARRIER_WAIT & 1)
     1062trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     1063process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     1064pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
     1065if( cycle > DEBUG_BARRIER_WAIT )
     1066printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n",
     1067__FUNCTION__, this->process->pid, this->trdid, pid, trdid );
     1068#endif
     1069            // remove waiting thread from queue
     1070            xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) );
     1071
     1072            // unblock waiting thread
     1073            thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC );
     1074        }
     1075    }
     1076    else                                               // not the last thread
     1077    {
     1078        // get extended pointer on xlist entry from thread
     1079        xptr_t  entry_xp = XPTR( local_cxy , &this->wait_list );
     1080       
    2901081        // register calling thread in barrier waiting queue
    291         xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) );
     1082        xlist_add_last( root_xp , entry_xp );
    2921083
    2931084        // block calling thread
     
    2971088        remote_busylock_release( lock_xp );
    2981089
     1090#if DEBUG_BARRIER_WAIT
     1091if( cycle > DEBUG_BARRIER_WAIT )
     1092printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n",
     1093__FUNCTION__ , this->process->pid, this->trdid,
     1094HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1095#endif
    2991096        // deschedule
    3001097        sched_yield("blocked on barrier");
    3011098    }
    3021099
    303 #if DEBUG_BARRIER
    304 cycle = (uint32_t)hal_get_cycles();
    305 if( cycle > DEBUG_BARRIER )
    306 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n",
    307 __FUNCTION__, this->trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle );
    308 #endif
    309 
    310 }  // end remote_barrier_wait()
     1100    return;
     1101
     1102} // end dqt_barrier_decrement()
     1103
     1104#if DEBUG_BARRIER_CREATE
     1105
     1106////////////////////////////////////////////////////////////////////////////////////////////
     1107// This debug function displays all DQT nodes in all clusters.
     1108////////////////////////////////////////////////////////////////////////////////////////////
     1109// @ barrier_xp   : extended pointer on DQT barrier descriptor.
     1110////////////////////////////////////////////////////////////////////////////////////////////
     1111static void dqt_barrier_display( xptr_t  barrier_xp )
     1112{
     1113    // get cluster and local pointer on DQT barrier
     1114    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     1115    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     1116
     1117    // get barrier global parameters
     1118    uint32_t x_size   = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) );
     1119    uint32_t y_size   = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) );
     1120    uint32_t nthreads = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->nthreads ) );
     1121
     1122    // compute size and number of DQT levels
     1123    uint32_t z      = (x_size > y_size) ? x_size : y_size;
     1124    uint32_t levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5;
     1125
     1126    printk("\n***** DQT barrier : x_size %d / y_size %d / nthreads %d / levels %d *****\n",
     1127    x_size, y_size, nthreads, levels );
     1128
     1129    uint32_t x , y , l;
     1130
     1131    for ( x = 0 ; x < x_size ; x++ )
     1132    {
     1133        for ( y = 0 ; y < y_size ; y++ )
     1134        {
     1135            printk(" - cluster[%d,%d]\n", x , y );
     1136
     1137            for ( l = 0 ; l < levels ; l++ )
     1138            {
     1139                // get pointers on target node
     1140                xptr_t       node_xp  = hal_remote_l64( XPTR( barrier_cxy ,
     1141                                        &barrier_ptr->node_xp[x][y][l] ) );
     1142                dqt_node_t * node_ptr = GET_PTR( node_xp );
     1143                cxy_t        node_cxy = GET_CXY( node_xp );
     1144
     1145                if( node_xp != XPTR_NULL )
     1146                {
     1147                     uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level       ));
     1148                     uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity       ));
     1149                     xptr_t   pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp   ));
     1150                     xptr_t   c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] ));
     1151                     xptr_t   c1_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[1] ));
     1152                     xptr_t   c2_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[2] ));
     1153                     xptr_t   c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] ));
     1154
     1155                     printk("   . level %d : (%x,%x) / arity %d / P(%x,%x) / C0(%x,%x)"
     1156                            " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n",
     1157                     level, node_cxy, node_ptr, arity,
     1158                     GET_CXY(pa_xp), GET_PTR(pa_xp),
     1159                     GET_CXY(c0_xp), GET_PTR(c0_xp),
     1160                     GET_CXY(c1_xp), GET_PTR(c1_xp),
     1161                     GET_CXY(c2_xp), GET_PTR(c2_xp),
     1162                     GET_CXY(c3_xp), GET_PTR(c3_xp) );
     1163                }
     1164            }
     1165        }
     1166    }
     1167}   // end dqt_barrier_display()
     1168
     1169#endif
  • trunk/kernel/libk/remote_barrier.h

    r581 r619  
    22 * remote_barrier.h - POSIX barrier definition.               
    33 *
    4  * Author  Alain Greiner (2016,2017,2018)
     4 * Author  Alain Greiner (2016,2017,2018,2019)
    55 *
    66 * Copyright (c) UPMC Sorbonne Universites
     
    2929#include <remote_busylock.h>
    3030#include <xlist.h>
     31#include <shared_pthread.h>
    3132
    3233/***************************************************************************************
    33  *          This file defines a POSIX compliant barrier.
     34 *       This file defines two implementations for a POSIX compliant barrier.
    3435 *
    3536 * It is used by multi-threaded user applications to synchronise threads running in
    36  * different clusters, as all access functions uses hal_remote_l32() / hal_remote_s32()
    37  * remote access primitives.
    38  *
    39  * A barrier is declared by a given user process as a "pthread_barrier_t" global variable.
    40  * This user type is implemented as an unsigned long, but the value is not used by the
    41  * kernel. ALMOS-MKH uses only the barrier virtual address as an identifier.
    42  * For each user barrier, ALMOS-MKH creates a kernel "remote_barrier_t" structure,
    43  * dynamically allocated in the reference cluster by the remote_barrier_create() function,
    44  * and destroyed by the remote_barrier_destroy() function, using RPC if the calling thread
    45  * is not running in the reference cluster.
    46  *
    47  * The blocking "remote_barrier_wait()" function implements a descheduling policy when
    48  * the calling thread is not the last expected thread: the calling thread is registered
    49  * in a waiting queue, rooted in the barrier structure, and the the calling thread
    50  * is blocked on the THREAD_BLOCKED_USERSYNC condition. The last arrived thread
    51  * unblocks all registtered waiting threads.
     37 * different clusters. Access functions use RPCs for barrier creation/destruction,
     38 * and use remote access primitives for actual synchronisation (wait function).
     39 *
     40 * A barrier is declared by a given user process as a "pthread_barrier_t" user variable.
     41 * This user type is implemented in user space as an unsigned long, but the value is not
     42 * used by the kernel. ALMOS-MKH uses only the barrier virtual address as an identifier.
     43 * For each user barrier, ALMOS-MKH creates a kernel structure, dynamically allocated
     44 * by the "generic_barrier_create()" function, destroyed by the "remote_barrier_destroy()"
     45 * function, and used by the "generic_barrier_wait()" function.
     46 *
     47 * Implementation note:
     48 * ALMOS-MKH supports two barrier implementations:
     49 *
     50 * 1) simple_barrier_t
     51 *    If the pointer on the barrier attributes is NULL, the barrier is implemented as
     52 *    a shared variable localized in the reference process cluster.
     53 *    There is a risk of contention when the number of synchronizing threads is large.
     54 *
     55 * 2) dqt_barrier_t
     56 *    If the (x_size, y_size, nthreads) arguments are defined in the barrier attributes,
     57 *    the barrier is implemented as a hierarchical quad-tree covering all clusters in the
     58 *    (x_size * ysize) mesh, including cluster (0,0), with nthreads per cluster, and called
     59 *    DQT : Distributed Quad Tree. This DQT implementation supposes a regular architecture,
     60 *    and a strong contraint on the threads placement: exactly "nthreads" threads per
     61 *    cluster in the (x_size * y_size) mesh.
     62 *
     63 * For both implementations, the blocking "generic_barrier_wait()" function implements
     64 * a descheduling policy when the calling thread is not the last expected thread:
     65 * the calling thread is registered in a waiting queue, rooted in the barrier structure,
     66 * and the the calling thread is blocked on the THREAD_BLOCKED_USERSYNC condition.
     67 * The last arrived thread unblocks all registered waiting threads.
    5268 * **************************************************************************************/
    5369
    54 /*****************************************************************************************
    55  * This structure defines the barrier descriptor.
    56  * - It contains an xlist of all barriers dynamically created by a given process,
    57  *   rooted in the reference process descriptor.
    58  * - It contains the root of another xlist to register all arrived threads.
    59  ****************************************************************************************/
    60 
    61 typedef struct remote_barrier_s
    62 {
    63     remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
    64     intptr_t           ident;         /*! virtual address in user space == identifier   */
    65     uint32_t           current;       /*! number of arrived threads                     */
    66     uint32_t           sense;         /*! barrier state (toggle)                        */
    67     uint32_t           nb_threads;    /*! number of expected threads                    */
    68     xlist_entry_t      list;          /*! member of list of barriers in same process    */
    69     xlist_entry_t      root;          /*! root of list of waiting threads               */
     70
     71
     72/*****************************************************************************************
     73 *                 generic barrier descriptor and access functions
     74 *****************************************************************************************
     75 * This generic structure is used by both the simple and the QOT implementations.
     76 * It is implemented in the reference process cluster, and contains
     77 * - the barrier identifier,
     78 * - the implementation type (simple or QDT),
     79 * - an xlist implementing the set of barriers dynamically created by a given process,
     80 * - a pointer on the implementation specific descriptor (simple_barrier / sqt_barrier).
     81 ****************************************************************************************/
     82
     83typedef struct generic_barrier_s
     84{
     85    intptr_t              ident;      /*! virtual address in user space == identifier   */
     86    xlist_entry_t         list;       /*! member of list of barriers in same process    */
     87    bool_t                is_dqt;     /*! DQT implementation when true                  */
     88    void                * extend;     /*! implementation specific barrier descriptor    */
    7089}
    71 remote_barrier_t;
    72 
     90generic_barrier_t;
    7391
    7492/*****************************************************************************************
     
    7694 * by its virtual address in a given user process. It makes an associative search,
    7795 * scanning the list of barriers rooted in the reference process descriptor.
     96 * It can be used for both simple and DQT barriers, registered in the same list.
    7897 *****************************************************************************************
    7998 * @ ident    : barrier virtual address, used as identifier.
    8099 * @ returns extended pointer on barrier if success / returns XPTR_NULL if not found.
    81100 ****************************************************************************************/
    82 xptr_t remote_barrier_from_ident( intptr_t  ident );
    83 
    84 /*****************************************************************************************
    85  * This function implement the pthread_barrier_init() syscall.
    86  * It allocates memory for the barrier descriptor in the reference cluster for
    87  * the calling process, it initializes the barrier state, and register it in the
    88  * list of barriers owned by the reference process.
    89  *****************************************************************************************
    90  * @ count       : number of expected threads.
    91  * @ ident       : barrier identifier (virtual address in user space).
    92  * @ return 0 if success / return ENOMEM if failure.
    93  ****************************************************************************************/
    94 error_t remote_barrier_create( intptr_t ident,
    95                                uint32_t count );
    96 
    97 /*****************************************************************************************
    98  * This function implement the pthread_barrier_destroy() syscall.
    99  * It releases thr memory allocated for the barrier descriptor, and remove the barrier
    100  * from the list of barriers owned by the reference process.
    101  *****************************************************************************************
    102  * @ barrier_xp  : extended pointer on barrier descriptor.
    103  ****************************************************************************************/
    104 void remote_barrier_destroy( xptr_t   barrier_xp );
    105 
    106 /*****************************************************************************************
    107  * This function implement the pthread_barrier_wait() syscall.
    108  * It returns only when the number of expected threads (registered in the barrier
    109  * dexcriptor) reach the barrier.
    110  *****************************************************************************************
    111  * @ barrier_xp   : extended pointer on barrier descriptor.
    112  ****************************************************************************************/
    113 void remote_barrier_wait( xptr_t   barrier_xp );
     101xptr_t generic_barrier_from_ident( intptr_t  ident );
     102
     103/*****************************************************************************************
     104 * This function implements the pthread_barrier_init() syscall.
     105 * It allocates and initialises the generic barrier descriptor in the reference process
     106 * cluster, and - depending on the <attr> argument, calls the relevant (simple or DQT)
     107 * function to allocate and initialize the implementation dependant barrier descriptor.
     108 * Finally, it registers the barrier in the reference process xlist of user barriers.
     109 * It can be called by a thread running in any cluster, as it use RPC if required.
     110 *****************************************************************************************
     111 * @ ident    : barrier virtual address, used as identifier.
     112 * @ count    : number of expected threads.
     113 * @ attr     : barrier attributes (x_size,y_size,nthreads), used by QDT implementation.
     114 * @ returns 0 if success / returns -1 if not found.
     115 ****************************************************************************************/
     116error_t generic_barrier_create( intptr_t                ident,
     117                                uint32_t                count,
     118                                pthread_barrierattr_t * attr );
     119
     120/*****************************************************************************************
     121 * This function implements the pthread_barrier_destroy() syscall.
     122 * It calls the relevant function (simple or DQT) to release the memory allocated for
     123 * the implementation specific barrier descriptor, and releases the memory allocated
     124 * for the generic barrier descriptor.
     125 * It removes the barrier from the list of barriers rooted in the reference process.
     126 * It can be called by a thread running in any cluster, as it use RPC if required.
     127 *****************************************************************************************
     128 * @ gen_barrier_xp  : extended pointer on generic barrier descriptor.
     129 ****************************************************************************************/
     130void generic_barrier_destroy( xptr_t gen_barrier_xp );
     131
     132/*****************************************************************************************
     133 * This blocking function implements the pthread_barrier_wait() syscall.
     134 * It calls the relevant function (simple or DQT) depending on the implementation,
     135 * and returns only when all expected threads reach the barrier.
     136 * It can be called by a thread running in any cluster, as it use remote accesses.
     137 *****************************************************************************************
     138 * @ gen_barrier_xp   : extended pointer on generic barrier descriptor.
     139 ****************************************************************************************/
     140void generic_barrier_wait( xptr_t gen_barrier_xp );
     141
     142
     143
     144
     145
     146
     147/*****************************************************************************************
     148 *                        simple barrier descriptor
     149 *****************************************************************************************
     150 * This structure defines the simple barrier descriptor. It is localized in the process
     151 * reference cluster, as an extension of the generic barrier descriptor.
     152 * It implements a toggle barrier remotely accessed by all threads.
     153 * It contains the root of the xlist registering all arrived threads.
     154 ****************************************************************************************/
     155
     156typedef struct simple_barrier_s
     157{
     158    remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
     159    uint32_t           current;       /*! number of arrived threads                     */
     160    uint32_t           sense;         /*! barrier state (toggle)                        */
     161    uint32_t           arity;         /*! number of expected threads                    */
     162    xlist_entry_t      root;          /*! root of list of waiting threads               */
     163}
     164simple_barrier_t;
     165
     166/*****************************************************************************************
     167 * This function allocates memory for the simple barrier descriptor in the reference
     168 * cluster of the calling process. It initializes the barrier state and returns
     169 * a local pointer on the created simple barrier descriptor in reference cluster.
     170 * It can be called by a thread running in any cluster, as it use RPC if required.
     171 *****************************************************************************************
     172 * @ count          : [in] number of expected threads.
     173 * @ return Local pointer on barrier descriptor if success / return NULL if failure.
     174 ****************************************************************************************/
     175simple_barrier_t * simple_barrier_create( uint32_t  count );
     176
     177/*****************************************************************************************
     178 * This function releases the memory allocated for the simple barrier descriptor.
     179 * It can be called by a thread running in any cluster, as it use RPC if required.
     180 *****************************************************************************************
     181 * @ barrier_xp  : extended pointer on simple barrier descriptor.
     182 ****************************************************************************************/
     183void simple_barrier_destroy( xptr_t   barrier_xp );
     184
     185/*****************************************************************************************
     186 * This blocking function returns only when all expected threads reach the barrier.
     187 * It can be called by a thread running in any cluster, as it use remote accesses.
     188 * Waiting threads use a descheduling policy.
     189 *****************************************************************************************
     190 * @ barrier_xp   : extended pointer on simple barrier descriptor.
     191 ****************************************************************************************/
     192void simple_barrier_wait( xptr_t   barrier_xp );
     193
     194
     195
     196
     197
     198/*****************************************************************************************
     199 *                              dqt_barrier
     200 *****************************************************************************************
     201 * These structuree define  the hierarchical DQT barrier, physically distributed in a
     202 * mesh of clusters defined by the (x_size, y_size, nthreads) arguments:
     203 *   . The involved clusters form a mesh [x_size * y_size]
     204 *   . The lower left involved cluster is cluster(0,0) 
     205 *   . The number of threads per cluster is the same in all clusters.
     206 *
     207 * Implementation note:
     208 * - The quad three is implemented as a three dimensions array of node[x][y][l]
     209 *   . [x][y] are the cluster coordinates / max values are (DQT_XMAX-1), (DQT_YMAX-1)
     210 *   . [l] is the node level / 0 for terminal nodes / (DQT_LMAX-1) for the root node
     211 * - The dqt_barrier_t is the global barrier descriptor, allocated in the reference
     212 *   process cluster as an extension of the generic barrier descriptor. It contains a
     213 *   3D array of extended pointers on all DQT nodes implementing the DQT barrier.
     214 * - The dqt_node_t is a local barrier implementing a togle barrier between all threads
     215 *   of a given cluster (for a terminal node), or between all representatives of the four
     216 *   children nodes (for a non terminal node).
     217 ****************************************************************************************/
     218
     219#define  DQT_XMAX    16               // max number of clusters in a row
     220#define  DQT_YMAX    16               // max number of clusters in a column
     221#define  DQT_LMAX    5                // max depth of the quad tree
     222
     223typedef struct dqt_node_s
     224{
     225    remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
     226    volatile uint32_t  sense;         /*! barrier state (toggle)                        */
     227    volatile uint32_t  current;       /*! number of locally arrived threads             */
     228    uint32_t           arity;         /*! total number of locally expected threads      */
     229    uint32_t           level;         /*! hierarchical level (0 is bottom)              */
     230    xptr_t             parent_xp;     /*! x_pointer on parent node (NULL for root)      */
     231    xptr_t             child_xp[4];   /*! x_pointer on children node (NULL for bottom)  */
     232    xlist_entry_t      root;          /*! root of list of waiting threads               */
     233}
     234dqt_node_t;
     235
     236typedef struct dqt_barrier_s
     237{
     238    xptr_t    node_xp[DQT_XMAX][DQT_YMAX][DQT_LMAX];  /*! array of xptr on DQT nodes    */
     239
     240    uint32_t  x_size;                 /*! number of clusters in one row of DQT mesh     */
     241    uint32_t  y_size;                 /*! number of clusters in one column of DQT mesh  */
     242    uint32_t  nthreads;               /*! number of expected threads in one cluster     */
     243}
     244dqt_barrier_t;
     245
     246/*****************************************************************************************
     247 * This function allocates memory for the DQT barrier descriptor in the reference cluster
     248 * of the calling process. It allocates also memory in all clusters of the QDT mesh,
     249 * to store up to 5 QDT nodes per cluster.
     250 * It initializes the barrier descriptor, including initialisation of the parent/children
     251 * extended pointers in the distributed QDT nodes.
     252 * It returns a local pointer on the QDT barrier descriptor in reference cluster.
     253 * It can be called by a thread running in any cluster, as it use RPCs for memory
     254 * allocation, and remote access for QDT initialisation.
     255 *****************************************************************************************
     256 * @ x_size      : [in] number of clusters in a line of DQT mesh.
     257 * @ y_size      : [in] number of clusters in a column of DQT mesh.
     258 * @ nthreads    : [in] number of threads per cluster.
     259 * @ return Local pointer on barrier descriptor if success / return NULL if failure.
     260 ****************************************************************************************/
     261dqt_barrier_t * dqt_barrier_create( uint32_t  x_size,
     262                                    uint32_t  y_size,
     263                                    uint32_t  nthreads );
     264
     265/*****************************************************************************************
     266 * This function releases all memory allocated for the QDT barrier descriptor.
     267 * It removes the barrier from the list of barriers rooted in the reference process.
     268 * It can be called by a thread running in any cluster, as it use RPCs.
     269 *****************************************************************************************
     270 * @ barrier_xp  : extended pointer on DQT barrier descriptor.
     271 ****************************************************************************************/
     272void dqt_barrier_destroy( xptr_t   barrier_xp );
     273
     274/*****************************************************************************************
     275 * This blocking function returns only when all expected threads reach the barrier.
     276 * It can be called by a thread running in any cluster, as it use remote accesses.
     277 * Waiting threads use a descheduling policy.
     278 *****************************************************************************************
     279 * @ barrier_xp   : extended pointer on DQT barrier descriptor.
     280 ****************************************************************************************/
     281void dqt_barrier_wait( xptr_t   barrier_xp );
     282
    114283
    115284
  • trunk/kernel/libk/remote_busylock.c

    r600 r619  
    101101    (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) )
    102102{
    103     // get cluster and local pointer of target thread
    104     cxy_t      thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );
    105     thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );
    106 
    107     // display message on kernel TXT0
    108103    printk("\n[%s] thread[%x,%x] ACQUIRE lock %s\n",
    109     __FUNCTION_, this->process->pid, this->trdid, lock_type_str[type] );
     104    __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] );
    110105}
    111106#endif
     
    149144    (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) )
    150145{
    151     // get cluster and local pointer of target thread
    152     cxy_t      thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );
    153     thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );
    154 
    155     // display message on kernel TXT0
    156146    printk("\n[%s] thread[%x,%x] RELEASE lock %s\n",
    157147    __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] );
  • trunk/kernel/libk/remote_busylock.h

    r603 r619  
    4242 *   makes an atomic increment on a "ticket" allocator, and keep polling the "current"
    4343 *   value  until current == ticket.
    44 
     44 *
    4545 * - To release the lock, the owner thread increments the "current" value,
    4646 *   decrements its busylocks counter.
  • trunk/kernel/libk/remote_mutex.c

    r611 r619  
    138138thread_t * this = CURRENT_THREAD;
    139139if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    140 printk("\n[DBG] %s : thread %x in %x process / mutex(%x,%x)\n",
    141 __FUNCTION__, this->trdid, this->process->pid, local_cxy, mutex_ptr );
     140printk("\n[%s] : thread[%x,%x] created mutex(%x,%x)\n",
     141__FUNCTION__, this->process->pid, this->trdid, local_cxy, mutex_ptr );
    142142#endif
    143143
     
    173173    remote_queuelock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    174174
    175     // release memory allocated for mutexaphore descriptor
     175    // release memory allocated for mutex descriptor
    176176    if( mutex_cxy == local_cxy )                            // reference is local
    177177    {
     
    183183    else                                                  // reference is remote
    184184    {
    185         rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_BARRIER );
     185        rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_MUTEX );
    186186    }
    187187
     
    226226thread_t * this = CURRENT_THREAD;
    227227if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    228 printk("\n[DBG] %s : thread %x in process %x SUCCESS on mutex(%x,%x)\n",
    229 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     228printk("\n[%s] thread[%x,%x] SUCCESS on mutex(%x,%x)\n",
     229__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    230230#endif
    231231
     
    247247thread_t * this = CURRENT_THREAD;
    248248if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    249 printk("\n[DBG] %s : thread %x in process %x BLOCKED on mutex(%x,%x)\n",
    250 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     249printk("\n[%s] thread[%x,%x] BLOCKED on mutex(%x,%x)\n",
     250__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    251251#endif
    252252
     
    296296thread_t * this = CURRENT_THREAD;
    297297if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    298 printk("\n[DBG] %s : thread %x in %x process EXIT / mutex(%x,%x)\n",
    299 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     298printk("\n[%s] thread[%x,%x] EXIT / mutex(%x,%x)\n",
     299__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    300300#endif
    301301
     
    320320process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
    321321pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
    322 printk("\n[DBG] %s : thread %x in process %x UNBLOCK thread %x in process %d / mutex(%x,%x)\n",
    323 __FUNCTION__, this->trdid, this->process->pid, trdid, pid, mutex_cxy, mutex_ptr );
     322printk("\n[%s] thread[%x,%x] UNBLOCK thread %x in process %d / mutex(%x,%x)\n",
     323__FUNCTION__, this->process->pid, this->trdid, trdid, pid, mutex_cxy, mutex_ptr );
    324324}
    325325#endif
     
    371371thread_t * this = CURRENT_THREAD;
    372372if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK )
    373 printk("\n[DBG] %s : SUCCESS for thread %x in process %x / mutex(%x,%x)\n",
    374 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     373printk("\n[%s] SUCCESS for thread[%x,%x] / mutex(%x,%x)\n",
     374__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    375375#endif
    376376        // release busylock protecting mutex state
     
    385385thread_t * this = CURRENT_THREAD;
    386386if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK )
    387 printk("\n[DBG] %s : FAILURE for thread %x in process %x / mutex(%x,%x)\n",
    388 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     387printk("\n[%s] FAILURE for thread[%x,%x] / mutex(%x,%x)\n",
     388__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    389389#endif
    390390        // release busylock protecting mutex state
  • trunk/kernel/libk/user_dir.c

    r614 r619  
    286286            printk("\n[ERROR] in %s : cannot map vpn %x in GPT\n",
    287287            __FUNCTION__, (vpn + page_id) );
    288             // use the non blocking RPC to delete the remote vseg
    289             rpc_desc_t     desc;
    290             desc.index     = RPC_VMM_DELETE_VSEG;
    291             desc.responses = 1;
    292             desc.thread    = CURRENT_THREAD;
    293             desc.lid       = CURRENT_THREAD->core->lid;
    294             desc.blocking  = true;
    295             desc.args[0]   = ref_pid;
    296             desc.args[1]   = vpn << CONFIG_PPM_PAGE_SHIFT;
    297             rpc_vmm_delete_vseg_client( ref_cxy , &desc );
     288
     289            // delete the vseg
     290            if( ref_cxy == local_cxy) vmm_delete_vseg( ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT );
     291            else rpc_vmm_delete_vseg_client( ref_cxy, ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT );
     292
    298293            // release the user_dir descriptor
    299294            req.type = KMEM_DIR;
     
    387382    lpid_t         lpid;       // process local index
    388383    rpc_desc_t     rpc;        // rpc descriptor
     384    uint32_t       responses;  // response counter
    389385     
    390386    // get pointers on calling process & thread
     
    441437    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
    442438
    443     // initialize RPC descriptor shared fields
    444     rpc.responses = 0;
     439    // initialize responses counter
     440    responses = 0;
     441
     442    // initialize a shared RPC descriptor
     443    // can be shared, because no out arguments
     444    rpc.rsp       = &responses;
    445445    rpc.blocking  = false;
    446446    rpc.index     = RPC_VMM_DELETE_VSEG;
     
    461461
    462462        // atomically increment responses counter
    463         hal_atomic_add( (void *)&rpc.responses , 1 );
    464 
    465         // call RPC 
    466         rpc_vmm_delete_vseg_client( process_cxy , &rpc );
    467 
    468     }  // end list of copies
     463        hal_atomic_add( &responses , 1 );
     464
     465        // send RPC to target cluster 
     466        rpc_send( process_cxy , &rpc );
     467    }
    469468
    470469    // release the lock protecting process copies
     
    472471
    473472    // client thread deschedule
    474     sched_yield("blocked on rpc_vmm_unmap_vseg");
     473    sched_yield("blocked on rpc_vmm_delete_vseg");
    475474 
    476475    // restore IRQs
Note: See TracChangeset for help on using the changeset viewer.