/* * hal_context.c - implementation of Thread Context API for TSAR-MIPS32 * * Author Alain Greiner (2016) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH.is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH.is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH.; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include ///////////////////////////////////////////////////////////////////////////////////////// // Define various SR initialisation values for TSAR-MIPS32 ///////////////////////////////////////////////////////////////////////////////////////// #define SR_USR_MODE 0x0000FF13 #define SR_USR_MODE_FPU 0x2000FF13 #define SR_SYS_MODE 0x0000FF01 ///////////////////////////////////////////////////////////////////////////////////////// // This structure defines the CPU context for TSAR MIPS32. // The following registers are saved/restored at each context switch: // - GPR : all, but (zero, k0, k1), plus (hi, lo) // - CP0 : c0_th , c0_sr , C0_epc // - CP2 : c2_ptpr , C2_mode // // WARNING : check the two CONFIG_CPU_CTX_SIZE & CONFIG_FPU_CTX_SIZE configuration // parameterss when modifying this structure. ///////////////////////////////////////////////////////////////////////////////////////// typedef struct hal_cpu_context_s { uint32_t c0_epc; // slot 0 uint32_t at_01; // slot 1 uint32_t v0_02; // slot 2 uint32_t v1_03; // slot 3 uint32_t a0_04; // slot 4 uint32_t a1_05; // slot 5 uint32_t a2_06; // slot 6 uint32_t a3_07; // slot 7 uint32_t t0_08; // slot 8 uint32_t t1_09; // slot 9 uint32_t t2_10; // slot 10 uint32_t t3_11; // slot 11 uint32_t t4_12; // slot 12 uint32_t t5_13; // slot 13 uint32_t t6_14; // slot 14 uint32_t t7_15; // slot 15 uint32_t s0_16; // slot 16 uint32_t s1_17; // slot 17 uint32_t s2_18; // slot 18 uint32_t s3_19; // slot 19 uint32_t s4_20; // slot 20 uint32_t s5_21; // slot 21 uint32_t s6_22; // slot 22 uint32_t s7_23; // slot 23 uint32_t t8_24; // slot 24 uint32_t t9_25; // slot 25 uint32_t hi_26; // slot 26 uint32_t lo_27; // slot 27 uint32_t gp_28; // slot 28 uint32_t sp_29; // slot 29 uint32_t s8_30; // slot 30 uint32_t ra_31; // slot 31 uint32_t c2_ptpr; // slot 32 uint32_t c2_mode; // slot 33 uint32_t c0_sr; // slot 34 uint32_t c0_th; // slot 35 } hal_cpu_context_t; ///////////////////////////////////////////////////////////////////////////////////////// // This structure defines the fpu_context for TSAR MIPS32. ///////////////////////////////////////////////////////////////////////////////////////// typedef struct hal_fpu_context_s { uint32_t fpu_regs[32]; } hal_fpu_context_t; ///////////////////////////////////////////////////////////////////////////////////////// // CPU context related functions ///////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////// error_t hal_cpu_context_alloc( thread_t * thread ) { assert( (sizeof(hal_cpu_context_t) <= CONFIG_CPU_CTX_SIZE) , "illegal CPU context size" ); // allocate memory for cpu_context kmem_req_t req; req.type = KMEM_CPU_CTX; req.flags = AF_KERNEL | AF_ZERO; hal_cpu_context_t * context = (hal_cpu_context_t *)kmem_alloc( &req ); if( context == NULL ) return -1; // link to thread thread->cpu_context = (void *)context; return 0; } // end hal_cpu_context_alloc() ///////////////////////////////////////////////// // The following context slots are initialised // GPR : a0_04 / sp_29 / ra_31 // CP0 : c0_sr / c0_th / c0_epc // CP2 : c2_ptpr / c2_mode ///////////////////////////////////////////////// void hal_cpu_context_init( thread_t * thread ) { hal_cpu_context_t * context = (hal_cpu_context_t *)thread->cpu_context; assert( (context != NULL ), "CPU context not allocated" ); // initialisation depends on thread type if( thread->type == THREAD_USER ) { context->a0_04 = (uint32_t)thread->entry_args; context->sp_29 = (uint32_t)thread->user_stack_vseg->max - 8; context->ra_31 = (uint32_t)&hal_kentry_eret; context->c0_epc = (uint32_t)thread->entry_func; context->c0_sr = SR_USR_MODE; context->c0_th = (uint32_t)thread; context->c2_ptpr = (uint32_t)((thread->process->vmm.gpt.ppn) >> 1); context->c2_mode = 0xF; } else // kernel thread { context->a0_04 = (uint32_t)thread->entry_args; context->sp_29 = (uint32_t)thread->k_stack_base + (uint32_t)thread->k_stack_size - 8; context->ra_31 = (uint32_t)thread->entry_func; context->c0_sr = SR_SYS_MODE; context->c0_th = (uint32_t)thread; context->c2_ptpr = (uint32_t)((thread->process->vmm.gpt.ppn) >> 1); context->c2_mode = 0x3; } } // end hal_cpu_context_init() //////////////////////////////////////////// void hal_cpu_context_fork( xptr_t child_xp ) { // get pointer on calling thread thread_t * this = CURRENT_THREAD; // allocate a local CPU context in parent kernel stack hal_cpu_context_t context; // get local parent thread cluster and local pointer cxy_t parent_cxy = local_cxy; thread_t * parent_ptr = CURRENT_THREAD; // get remote child thread cluster and local pointer cxy_t child_cxy = GET_CXY( child_xp ); thread_t * child_ptr = GET_PTR( child_xp ); // get local pointer on remote child cpu context char * child_context_ptr = hal_remote_lpt( XPTR(child_cxy , &child_ptr->cpu_context) ); // get local pointer on remote child process process_t * process = hal_remote_lpt( XPTR(child_cxy , &child_ptr->process) ); // get ppn of remote child process page table uint32_t pt_ppn = hal_remote_l32( XPTR(child_cxy , &process->vmm.gpt.ppn) ); // get local pointer on parent uzone from parent thread descriptor uint32_t * parent_uzone = parent_ptr->uzone_current; // compute local pointer on child uzone uint32_t * child_uzone = (uint32_t *)( (intptr_t)parent_uzone + (intptr_t)child_ptr - (intptr_t)parent_ptr ); // update the uzone pointer in child thread descriptor hal_remote_spt( XPTR( child_cxy , &child_ptr->uzone_current ) , child_uzone ); #if DEBUG_HAL_CONTEXT uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_HAL_CONTEXT < cycle ) printk("\n[%s] thread[%x,%x] parent_uzone %x / child_uzone %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, parent_uzone, child_uzone, cycle ); #endif // copy parent kernel stack to child thread descriptor // (this includes the uzone, that is allocated in the kernel stack) char * parent_ksp = (char *)hal_get_sp(); char * child_ksp = (char *)((intptr_t)parent_ksp + (intptr_t)child_ptr - (intptr_t)parent_ptr ); uint32_t size = (uint32_t)parent_ptr + CONFIG_THREAD_DESC_SIZE - (uint32_t)parent_ksp; hal_remote_memcpy( XPTR( child_cxy , child_ksp ), XPTR( local_cxy , parent_ksp ), size ); #if DEBUG_HAL_CONTEXT cycle = (uint32_t)hal_get_cycles(); printk("\n[%s] thread[%x,%x] copied kstack from parent %x to child %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, parent_ptr, child_ptr, cycle ); #endif // patch the user stack pointer slot in the child uzone[UZ_SP] // because parent and child use the same offset to access the user stack, // but parent and child do not have the same user stack base address. uint32_t parent_us_base = parent_ptr->user_stack_vseg->min; vseg_t * child_us_vseg = hal_remote_lpt( XPTR( child_cxy , &child_ptr->user_stack_vseg ) ); uint32_t child_us_base = hal_remote_l32( XPTR( child_cxy , &child_us_vseg->min ) ); uint32_t parent_usp = parent_uzone[UZ_SP]; uint32_t child_usp = parent_usp + child_us_base - parent_us_base; hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_SP] ) , child_usp ); #if DEBUG_HAL_CONTEXT cycle = (uint32_t)hal_get_cycles(); printk("\n[%s] thread[%x,%x] parent_usp %x / child_usp %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, parent_usp, child_usp, cycle ); #endif // save current values of CPU registers to local CPU context hal_do_cpu_save( &context ); // From this point, both parent and child can execute the following code, // but child thread will only execute it after being unblocked by parent thread. // They can be distinguished by the (CURRENT_THREAD,local_cxy) values, // and we must re-initialise the calling thread pointer from c0_th register this = CURRENT_THREAD; if( (this == parent_ptr) && (local_cxy == parent_cxy) ) // parent thread { // patch 4 slots in the local CPU context: the sp_29 / c0_th / C0_sr / c2_ptpr // slots are not identical in parent and child context.sp_29 = context.sp_29 + (intptr_t)child_ptr - (intptr_t)parent_ptr; context.c0_th = (uint32_t)child_ptr; context.c0_sr = SR_SYS_MODE; context.c2_ptpr = pt_ppn >> 1; // copy this patched context to remote child context hal_remote_memcpy( XPTR( child_cxy , child_context_ptr ), XPTR( local_cxy , &context ) , sizeof( hal_cpu_context_t ) ); #if DEBUG_HAL_CONTEXT cycle = (uint32_t)hal_get_cycles(); printk("\n[%s] thread[%x,%x] copied CPU context to child / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // parent thread unblock child thread thread_unblock( XPTR( child_cxy , child_ptr ) , THREAD_BLOCKED_GLOBAL ); #if DEBUG_HAL_CONTEXT cycle = (uint32_t)hal_get_cycles(); printk("\n[%s] thread[%x,%x] unblocked child thread / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif } } // end hal_cpu_context_fork() ////////////////////////////////////////////// void hal_cpu_context_exec( thread_t * thread ) { // re_initialize CPU context hal_cpu_context_init( thread ); // restore CPU registers ... and jump to user code hal_do_cpu_restore( (hal_cpu_context_t *)thread->cpu_context ); } // end hal_cpu_context_exec() ///////////////////////////////////////////////// void hal_cpu_context_display( xptr_t thread_xp ) { hal_cpu_context_t * ctx; // get thread cluster and local pointer cxy_t cxy = GET_CXY( thread_xp ); thread_t * ptr = GET_PTR( thread_xp ); // get context pointer ctx = (hal_cpu_context_t *)hal_remote_lpt( XPTR( cxy , &ptr->cpu_context ) ); // get relevant context slots values uint32_t sp_29 = hal_remote_l32( XPTR( cxy , &ctx->sp_29 ) ); uint32_t ra_31 = hal_remote_l32( XPTR( cxy , &ctx->ra_31 ) ); uint32_t c0_sr = hal_remote_l32( XPTR( cxy , &ctx->c0_sr ) ); uint32_t c0_epc = hal_remote_l32( XPTR( cxy , &ctx->c0_epc ) ); uint32_t c0_th = hal_remote_l32( XPTR( cxy , &ctx->c0_th ) ); uint32_t c2_ptpr = hal_remote_l32( XPTR( cxy , &ctx->c2_ptpr ) ); uint32_t c2_mode = hal_remote_l32( XPTR( cxy , &ctx->c2_mode ) ); printk("\n***** CPU context for thread %x in process %x / cycle %d\n" " sp_29 = %X ra_31 = %X\n" " c0_sr = %X c0_epc = %X c0_th = %X\n" " c2_ptpr = %X c2_mode = %X\n", ptr, ptr->process->pid, (uint32_t)hal_get_cycles(), sp_29 , ra_31, c0_sr , c0_epc , c0_th, c2_ptpr , c2_mode ); } // end hal_cpu_context_display() ///////////////////////////////////////////////// void hal_cpu_context_destroy( thread_t * thread ) { kmem_req_t req; hal_cpu_context_t * ctx = thread->cpu_context; // release CPU context if required if( ctx != NULL ) { req.type = KMEM_CPU_CTX; req.ptr = ctx; kmem_free( &req ); } } // end hal_cpu_context_destroy() ////////////////////////////////////////////////// error_t hal_fpu_context_alloc( thread_t * thread ) { assert( (sizeof(hal_fpu_context_t) <= CONFIG_FPU_CTX_SIZE) , "illegal CPU context size" ); // allocate memory for fpu_context kmem_req_t req; req.type = KMEM_FPU_CTX; req.flags = AF_KERNEL | AF_ZERO; hal_fpu_context_t * context = (hal_fpu_context_t *)kmem_alloc( &req ); if( context == NULL ) return -1; // link to thread thread->fpu_context = (void *)context; return 0; } // end hal_fpu_context_alloc() ////////////////////////////////////////////// void hal_fpu_context_init( thread_t * thread ) { hal_fpu_context_t * context = thread->fpu_context; assert( (context != NULL) , "fpu context not allocated" ); memset( context , 0 , sizeof(hal_fpu_context_t) ); } ////////////////////////////////////////// void hal_fpu_context_copy( thread_t * dst, thread_t * src ) { assert( (src != NULL) , "src thread pointer is NULL\n"); assert( (dst != NULL) , "dst thread pointer is NULL\n"); // get fpu context pointers hal_fpu_context_t * src_context = src->fpu_context; hal_fpu_context_t * dst_context = dst->fpu_context; // copy CPU context from src to dst memcpy( dst_context , src_context , sizeof(hal_fpu_context_t) ); } // end hal_fpu_context_copy() ///////////////////////////////////////////////// void hal_fpu_context_destroy( thread_t * thread ) { kmem_req_t req; hal_fpu_context_t * context = thread->fpu_context; // release FPU context if required if( context != NULL ) { req.type = KMEM_FPU_CTX; req.ptr = context; kmem_free( &req ); } } // end hal_fpu_context_destroy() ////////////////////////////////////////////// void hal_fpu_context_save( xptr_t thread_xp ) { // allocate a local FPU context in kernel stack hal_fpu_context_t src_context; // get remote child cluster and local pointer cxy_t thread_cxy = GET_CXY( thread_xp ); thread_t * thread_ptr = GET_PTR( thread_xp ); asm volatile( ".set noreorder \n" "swc1 $f0, 0*4(%0) \n" "swc1 $f1, 1*4(%0) \n" "swc1 $f2, 2*4(%0) \n" "swc1 $f3, 3*4(%0) \n" "swc1 $f4, 4*4(%0) \n" "swc1 $f5, 5*4(%0) \n" "swc1 $f6, 6*4(%0) \n" "swc1 $f7, 7*4(%0) \n" "swc1 $f8, 8*4(%0) \n" "swc1 $f9, 9*4(%0) \n" "swc1 $f10, 10*4(%0) \n" "swc1 $f11, 11*4(%0) \n" "swc1 $f12, 12*4(%0) \n" "swc1 $f13, 13*4(%0) \n" "swc1 $f14, 14*4(%0) \n" "swc1 $f15, 15*4(%0) \n" "swc1 $f16, 16*4(%0) \n" "swc1 $f17, 17*4(%0) \n" "swc1 $f18, 18*4(%0) \n" "swc1 $f19, 19*4(%0) \n" "swc1 $f20, 20*4(%0) \n" "swc1 $f21, 21*4(%0) \n" "swc1 $f22, 22*4(%0) \n" "swc1 $f23, 23*4(%0) \n" "swc1 $f24, 24*4(%0) \n" "swc1 $f25, 25*4(%0) \n" "swc1 $f26, 26*4(%0) \n" "swc1 $f27, 27*4(%0) \n" "swc1 $f28, 28*4(%0) \n" "swc1 $f29, 29*4(%0) \n" "swc1 $f30, 30*4(%0) \n" "swc1 $f31, 31*4(%0) \n" ".set reorder \n" : : "r"(&src_context) ); // get local pointer on target thread FPU context void * dst_context = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->fpu_context ) ); // copy local context to remote child context) hal_remote_memcpy( XPTR( thread_cxy , dst_context ), XPTR( local_cxy , &src_context ), sizeof( hal_fpu_context_t ) ); } // end hal_fpu_context_save() ///////////////////////////////////////////////// void hal_fpu_context_restore( thread_t * thread ) { // get pointer on FPU context and cast to uint32_t uint32_t ctx = (uint32_t)thread->fpu_context; asm volatile( ".set noreorder \n" "lwc1 $f0, 0*4(%0) \n" "lwc1 $f1, 1*4(%0) \n" "lwc1 $f2, 2*4(%0) \n" "lwc1 $f3, 3*4(%0) \n" "lwc1 $f4, 4*4(%0) \n" "lwc1 $f5, 5*4(%0) \n" "lwc1 $f6, 6*4(%0) \n" "lwc1 $f7, 7*4(%0) \n" "lwc1 $f8, 8*4(%0) \n" "lwc1 $f9, 9*4(%0) \n" "lwc1 $f10, 10*4(%0) \n" "lwc1 $f11, 11*4(%0) \n" "lwc1 $f12, 12*4(%0) \n" "lwc1 $f13, 13*4(%0) \n" "lwc1 $f14, 14*4(%0) \n" "lwc1 $f15, 15*4(%0) \n" "lwc1 $f16, 16*4(%0) \n" "lwc1 $f17, 17*4(%0) \n" "lwc1 $f18, 18*4(%0) \n" "lwc1 $f19, 19*4(%0) \n" "lwc1 $f20, 20*4(%0) \n" "lwc1 $f21, 21*4(%0) \n" "lwc1 $f22, 22*4(%0) \n" "lwc1 $f23, 23*4(%0) \n" "lwc1 $f24, 24*4(%0) \n" "lwc1 $f25, 25*4(%0) \n" "lwc1 $f26, 26*4(%0) \n" "lwc1 $f27, 27*4(%0) \n" "lwc1 $f28, 28*4(%0) \n" "lwc1 $f29, 29*4(%0) \n" "lwc1 $f30, 30*4(%0) \n" "lwc1 $f31, 31*4(%0) \n" ".set reorder \n" : : "r"(ctx) ); } // end hal_cpu_context_restore()