Ignore:
Timestamp:
Jun 26, 2019, 11:42:37 AM (17 months ago)
Author:
alain
Message:

This version is a major evolution: The physical memory allocators,
defined in the kmem.c, ppm.c, and kcm.c files have been modified
to support remote accesses. The RPCs that were previously user
to allocate physical memory in a remote cluster have been removed.
This has been done to cure a dead-lock in case of concurrent page-faults.

This version 2.2 has been tested on a (4 clusters / 2 cores per cluster)
TSAR architecture, for both the "sort" and the "fft" applications.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/hal/tsar_mips32/core/hal_context.c

    r625 r635  
    22 * hal_context.c - implementation of Thread Context API for TSAR-MIPS32
    33 *
    4  * Author  Alain Greiner    (2016)
     4 * Author  Alain Greiner    (2016,2017,2018,2019)
    55 *
    66 * Copyright (c)  UPMC Sorbonne Universites
     
    3030#include <printk.h>
    3131#include <vmm.h>
     32#include <bits.h>
    3233#include <core.h>
    3334#include <cluster.h>
     
    3637
    3738/////////////////////////////////////////////////////////////////////////////////////////
    38 //       Define various SR initialisation values for TSAR-MIPS32
     39//       Define various SR initialisation values for the TSAR-MIPS32 architecture.
    3940/////////////////////////////////////////////////////////////////////////////////////////
    4041
     
    4445
    4546/////////////////////////////////////////////////////////////////////////////////////////
    46 // This structure defines the CPU context for TSAR MIPS32.
     47// This structure defines the CPU context for the TSAR-MIPS32 architecture.
    4748// The following registers are saved/restored at each context switch:
    4849// - GPR : all, but (zero, k0, k1), plus (hi, lo)
     
    5152//
    5253// WARNING : check the two CONFIG_CPU_CTX_SIZE & CONFIG_FPU_CTX_SIZE configuration
    53 //           parameterss when modifying this structure.
     54//           parameters when modifying this structure.
    5455/////////////////////////////////////////////////////////////////////////////////////////
    5556
     
    101102
    102103/////////////////////////////////////////////////////////////////////////////////////////
    103 // This structure defines the fpu_context for TSAR MIPS32.
     104// This structure defines the fpu_context for the TSAR MIPS32 architecture.
    104105/////////////////////////////////////////////////////////////////////////////////////////
    105106
     
    124125    // allocate memory for cpu_context
    125126    kmem_req_t  req;
    126     req.type   = KMEM_CPU_CTX;
     127    req.type   = KMEM_KCM;
     128    req.order  = bits_log2( sizeof(hal_cpu_context_t) );
    127129    req.flags  = AF_KERNEL | AF_ZERO;
    128130
    129     hal_cpu_context_t * context = (hal_cpu_context_t *)kmem_alloc( &req );
     131    hal_cpu_context_t * context = kmem_alloc( &req );
     132
    130133    if( context == NULL ) return -1;
    131134
     
    175178void hal_cpu_context_fork( xptr_t child_xp )
    176179{
    177     // get pointer on calling thread
    178     thread_t * this = CURRENT_THREAD;
    179 
     180    cxy_t               parent_cxy;        // parent thread cluster
     181    thread_t          * parent_ptr;        // local pointer on parent thread
     182    hal_cpu_context_t * parent_context;    // local pointer on parent cpu_context
     183    uint32_t          * parent_uzone;      // local_pointer on parent uzone (in kernel stack)
     184    char              * parent_ksp;        // kernel stack pointer on parent kernel stack
     185    uint32_t            parent_us_base;    // parent user stack base value
     186
     187    cxy_t               child_cxy;         // parent thread cluster
     188    thread_t          * child_ptr;         // local pointer on child thread
     189    hal_cpu_context_t * child_context;     // local pointer on child cpu_context
     190    uint32_t          * child_uzone;       // local_pointer on child uzone (in kernel stack)
     191    char              * child_ksp;         // kernel stack pointer on child kernel stack
     192    uint32_t            child_us_base;     // child user stack base value
     193
     194    process_t         * child_process;     // local pointer on child processs
     195    uint32_t            child_pt_ppn;      // PPN of child process PT1
     196    vseg_t            * child_us_vseg;     // local pointer on child user stack vseg
     197   
    180198    // allocate a local CPU context in parent kernel stack
    181     hal_cpu_context_t  context;
    182 
    183     // get local parent thread cluster and local pointer
    184     cxy_t      parent_cxy = local_cxy;
    185     thread_t * parent_ptr = CURRENT_THREAD;
    186 
    187     // get remote child thread cluster and local pointer
    188     cxy_t      child_cxy = GET_CXY( child_xp );
    189     thread_t * child_ptr = GET_PTR( child_xp );
    190 
    191     // get local pointer on remote child cpu context
    192     char * child_context_ptr = hal_remote_lpt( XPTR(child_cxy , &child_ptr->cpu_context) );
     199    hal_cpu_context_t context;
     200
     201    // get (local) parent thread cluster and local pointer
     202    parent_cxy = local_cxy;
     203    parent_ptr = CURRENT_THREAD;
     204
     205    // get (remote) child thread cluster and local pointer
     206    child_cxy = GET_CXY( child_xp );
     207    child_ptr = GET_PTR( child_xp );
     208
     209    // get local pointer on (local) parent CPU context
     210    parent_context = parent_ptr->cpu_context;
     211
     212    // get local pointer on (remote) child CPU context
     213    child_context = hal_remote_lpt( XPTR(child_cxy , &child_ptr->cpu_context) );
    193214
    194215    // get local pointer on remote child process
    195     process_t * process = hal_remote_lpt( XPTR(child_cxy , &child_ptr->process) );
     216    child_process = hal_remote_lpt( XPTR(child_cxy , &child_ptr->process) );
    196217
    197218    // get ppn of remote child process page table
    198     uint32_t pt_ppn = hal_remote_l32( XPTR(child_cxy , &process->vmm.gpt.ppn) );
    199 
    200     // get local pointer on parent uzone from parent thread descriptor
    201     uint32_t * parent_uzone = parent_ptr->uzone_current;
    202 
    203     // compute  local pointer on child uzone
    204     uint32_t * child_uzone  = (uint32_t *)( (intptr_t)parent_uzone +
    205                                             (intptr_t)child_ptr    -
    206                                             (intptr_t)parent_ptr  );
     219    child_pt_ppn = hal_remote_l32( XPTR(child_cxy , &child_process->vmm.gpt.ppn) );
     220
     221    // get local pointer on local parent uzone (in parent kernel stack)
     222    parent_uzone = parent_ptr->uzone_current;
     223
     224    // compute local pointer on remote child uzone (in child kernel stack)
     225    child_uzone  = (uint32_t *)( (intptr_t)parent_uzone +
     226                                 (intptr_t)child_ptr    -
     227                                 (intptr_t)parent_ptr  );
    207228
    208229    // update the uzone pointer in child thread descriptor
     
    213234if( DEBUG_HAL_CONTEXT < cycle )
    214235printk("\n[%s] thread[%x,%x] parent_uzone %x / child_uzone %x / cycle %d\n",
    215 __FUNCTION__, this->process->pid, this->trdid, parent_uzone, child_uzone, cycle );
    216 #endif
    217 
    218     // copy parent kernel stack to child thread descriptor
     236__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_uzone, child_uzone, cycle );
     237#endif
     238
     239    // get user stack base for parent thread
     240    parent_us_base = parent_ptr->user_stack_vseg->min;
     241
     242    // get user stack base for child thread
     243    child_us_vseg  = hal_remote_lpt( XPTR( child_cxy , &child_ptr->user_stack_vseg ) );
     244    child_us_base  = hal_remote_l32( XPTR( child_cxy , &child_us_vseg->min ) );
     245
     246#if DEBUG_HAL_CONTEXT
     247if( DEBUG_HAL_CONTEXT < cycle )
     248printk("\n[%s] thread[%x,%x] parent_ustack_base %x / child_ustack_base %x\n",
     249__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_us_base, child_us_base );
     250#endif
     251
     252    // get current value of kernel stack pointer in parent kernel stack
     253    parent_ksp = (char *)hal_get_sp();
     254
     255    // compute value of kernel stack pointer in child kernel stack
     256    child_ksp  = (char *)((intptr_t)parent_ksp +
     257                          (intptr_t)child_ptr  -
     258                          (intptr_t)parent_ptr );
     259
     260#if DEBUG_HAL_CONTEXT
     261if( DEBUG_HAL_CONTEXT < cycle )
     262printk("\n[%s] thread[%x,%x] parent_ksp %x / child_ksp %x\n",
     263__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_ksp, child_ksp );
     264#endif
     265
     266    // compute number of bytes to be copied, depending on current value of parent_ksp
     267    uint32_t size = (uint32_t)parent_ptr + CONFIG_THREAD_DESC_SIZE - (uint32_t)parent_ksp;   
     268
     269    // copy parent kernel stack content to child thread descriptor
    219270    // (this includes the uzone, that is allocated in the kernel stack)
    220     char * parent_ksp = (char *)hal_get_sp();
    221     char * child_ksp  = (char *)((intptr_t)parent_ksp +
    222                                  (intptr_t)child_ptr  -
    223                                  (intptr_t)parent_ptr );
    224 
    225     uint32_t size = (uint32_t)parent_ptr + CONFIG_THREAD_DESC_SIZE - (uint32_t)parent_ksp;
    226 
    227271    hal_remote_memcpy( XPTR( child_cxy , child_ksp ),
    228272                       XPTR( local_cxy , parent_ksp ),
     
    230274
    231275#if DEBUG_HAL_CONTEXT
    232 cycle = (uint32_t)hal_get_cycles();
    233 printk("\n[%s] thread[%x,%x] copied kstack from parent %x to child %x / cycle %d\n",
    234 __FUNCTION__, this->process->pid, this->trdid, parent_ptr, child_ptr, cycle );
    235 #endif
    236 
    237     // patch the user stack pointer slot in the child uzone[UZ_SP]
    238     // because parent and child use the same offset to access the user stack,
    239     // but parent and child do not have the same user stack base address.
    240     uint32_t parent_us_base = parent_ptr->user_stack_vseg->min;
    241     vseg_t * child_us_vseg  = hal_remote_lpt( XPTR( child_cxy , &child_ptr->user_stack_vseg ) );
    242     uint32_t child_us_base  = hal_remote_l32( XPTR( child_cxy , &child_us_vseg->min ) );
    243     uint32_t parent_usp     = parent_uzone[UZ_SP];
    244     uint32_t child_usp      = parent_usp + child_us_base - parent_us_base;
    245 
    246     hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_SP] ) , child_usp );
    247 
    248 #if DEBUG_HAL_CONTEXT
    249 cycle = (uint32_t)hal_get_cycles();
    250 printk("\n[%s] thread[%x,%x] parent_usp %x / child_usp %x / cycle %d\n",
    251 __FUNCTION__, this->process->pid, this->trdid, parent_usp, child_usp, cycle );
    252 #endif
    253 
    254     // save current values of CPU registers to local CPU context
     276if( DEBUG_HAL_CONTEXT < cycle )
     277printk("\n[%s] thread[%x,%x] copied kstack from parent (%x) to child (%x)\n",
     278__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_ptr, child_ptr );
     279#endif
     280
     281    // save current values of CPU registers to local copy of CPU context
    255282    hal_do_cpu_save( &context );
    256283
    257     // From this point, both parent and child can execute the following code,
     284    // update  three slots in this local CPU context
     285    context.sp_29   = (uint32_t)child_ksp;
     286    context.c0_th   = (uint32_t)child_ptr;
     287    context.c2_ptpr = (uint32_t)child_pt_ppn >> 1;
     288
     289    // From this point, both parent and child execute the following code,
    258290    // but child thread will only execute it after being unblocked by parent thread.
    259291    // They can be distinguished by the (CURRENT_THREAD,local_cxy) values,
    260292    // and we must re-initialise the calling thread pointer from c0_th register
    261293
    262     this = CURRENT_THREAD;
     294    thread_t * this = CURRENT_THREAD;
    263295
    264296    if( (this == parent_ptr) && (local_cxy == parent_cxy) )   // parent thread
    265297    {
    266         // patch 4 slots in the local CPU context: the sp_29 / c0_th / C0_sr / c2_ptpr
    267         // slots are not identical in parent and child
    268         context.sp_29   = context.sp_29 + (intptr_t)child_ptr - (intptr_t)parent_ptr;
    269         context.c0_th   = (uint32_t)child_ptr;
    270         context.c0_sr   = SR_SYS_MODE;
    271         context.c2_ptpr = pt_ppn >> 1;
    272 
    273         // copy this patched context to remote child context
    274         hal_remote_memcpy( XPTR( child_cxy , child_context_ptr ),
     298        // parent thread must update four slots in child uzone
     299        // - UZ_TH   : parent and child have different threads descriptors
     300        // - UZ_SP   : parent and child have different user stack base addresses.
     301        // - UZ_PTPR : parent and child use different Generic Page Tables
     302
     303        // parent thread computes values for child thread
     304        uint32_t child_sp    = parent_uzone[UZ_SP]  + child_us_base - parent_us_base;
     305        uint32_t child_th    = (uint32_t)child_ptr;
     306        uint32_t child_ptpr  = (uint32_t)child_pt_ppn >> 1;
     307
     308#if DEBUG_HAL_CONTEXT
     309if( DEBUG_HAL_CONTEXT < cycle )
     310printk("\n[%s] thread[%x,%x] : parent_uz_sp %x / child_uz_sp %x\n",
     311__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid,
     312parent_uzone[UZ_SP], child_sp );
     313#endif
     314
     315        // parent thread updates the child uzone
     316        hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_SP]   ) , child_sp );
     317        hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_TH]   ) , child_th );
     318        hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_PTPR] ) , child_ptpr );
     319
     320        // parent thread copies the local context to remote child context
     321        hal_remote_memcpy( XPTR( child_cxy , child_context ),
    275322                           XPTR( local_cxy  , &context ) ,
    276323                           sizeof( hal_cpu_context_t ) );
    277324#if DEBUG_HAL_CONTEXT
     325if( DEBUG_HAL_CONTEXT < cycle )
     326printk("\n[%s] thread[%x,%x] copied parent CPU context to child CPU context\n",
     327__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid );
     328#endif
     329
     330        // parent thread unblocks child thread
     331        thread_unblock( XPTR( child_cxy , child_ptr ) , THREAD_BLOCKED_GLOBAL );
     332
     333#if DEBUG_HAL_CONTEXT
    278334cycle = (uint32_t)hal_get_cycles();
    279 printk("\n[%s] thread[%x,%x] copied CPU context to child / cycle %d\n",
    280 __FUNCTION__, this->process->pid, this->trdid, cycle );
    281 #endif
    282 
    283         // parent thread unblock child thread
    284         thread_unblock( XPTR( child_cxy , child_ptr ) , THREAD_BLOCKED_GLOBAL );
    285 
    286 #if DEBUG_HAL_CONTEXT
    287 cycle = (uint32_t)hal_get_cycles();
    288 printk("\n[%s] thread[%x,%x] unblocked child thread / cycle %d\n",
    289 __FUNCTION__, this->process->pid, this->trdid, cycle );
     335trdid_t child_trdid = hal_remote_l32( XPTR( child_cxy , &child_ptr->trdid ) );
     336pid_t   child_pid   = hal_remote_l32( XPTR( child_cxy , &child_process->pid ) );
     337printk("\n[%s] thread[%x,%x] unblocked child thread[%x,%x] / cycle %d\n",
     338__FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, child_pid, child_trdid, cycle );
    290339#endif
    291340
     
    347396    if( ctx != NULL )
    348397    {   
    349         req.type = KMEM_CPU_CTX;
     398        req.type = KMEM_KCM;
    350399        req.ptr  = ctx;
    351400        kmem_free( &req );
     
    366415    // allocate memory for fpu_context
    367416    kmem_req_t  req;
    368     req.type   = KMEM_FPU_CTX;
     417    req.type   = KMEM_KCM;
    369418    req.flags  = AF_KERNEL | AF_ZERO;
    370 
    371     hal_fpu_context_t * context = (hal_fpu_context_t *)kmem_alloc( &req );
     419    req.order  = bits_log2( sizeof(hal_fpu_context_t) );
     420
     421    hal_fpu_context_t * context = kmem_alloc( &req );
     422
    372423    if( context == NULL ) return -1;
    373424
     
    414465    if( context != NULL )
    415466    {   
    416         req.type = KMEM_FPU_CTX;
     467        req.type = KMEM_KCM;
    417468        req.ptr  = context;
    418469        kmem_free( &req );
Note: See TracChangeset for help on using the changeset viewer.