//////////////////////////////////////////////////////////////////////////////////////// // File : stdio.c // Written by Alain Greiner // Date : janvier 2014 // // This file defines various functions that can be used by applications to access // peripherals, for the TSAR multi-processors multi_clusters architecture. // There is NO separation between application code and system code, as the // application are running in kernel mode without system calls. // This basic GIET does not support virtual memory, and does not support multi-tasking. // // The supported peripherals are: // - the SoClib multi_tty // - The SoCLib frame_buffer // - The SoCLib block_device // // The following parameters must be defined in the hard_config.h file. // - X_SIZE : number of clusters in a row // - Y_SIZE : number of clusters in a column // - X_WIDTH : number of bits for X field in proc_id // - Y_WIDTH : number of bits for Y field in proc_id // - NB_PROCS_MAX : max number of processor per cluster // - NB_TTY_CHANNELS : max number of TTY channels // - USE_EXT_IO : use external peripherals if not zero // // The follobing base addresses must be defined in the ldscript // - seg_tty_base // - seg_fbf_base // - seg_ioc_base //////////////////////////////////////////////////////////////////////////////////////// #include "stdio.h" #if !defined(NB_PROCS_MAX) #error: you must define NB_PROCS_MAX in the hard_config.h file #endif #if !defined(USE_EXT_IO) #error: you must define USE_EXT_IO in the hard_config.h file #endif #if !defined(X_SIZE) #error: you must define X_SIZE in the hard_config.h file #endif #if !defined(Y_SIZE) #error: you must define Y_SIZE in the hard_config.h file #endif #if !defined(X_WIDTH) #error: you must define X_WIDTH in the hard_config.h file #endif #if (X_WIDTH != 4) #error: The X_WIDTH parameter must be equal to 4 #endif #if !defined(Y_WIDTH) #error: you must define X_WIDTH in the hard_config.h file #endif #if (X_WIDTH != 4) #error: The Y_WIDTH parameter must be equal to 4 #endif #if !defined(NB_TTY_CHANNELS) #error: you must define NB_TTY_CHANNELS in the hard_config.h file #endif #define NB_LOCKS 256 #define NB_BARRIERS 16 #define in_drivers __attribute__((section (".drivers"))) #define in_unckdata __attribute__((section (".unckdata"))) ////////////////////////////////////////////////////////////// // various informations that must be defined in ldscript ////////////////////////////////////////////////////////////// struct plouf; extern struct plouf seg_tty_base; extern struct plouf seg_fbf_base; extern struct plouf seg_ioc_base; extern struct plouf seg_mmc_base; extern struct plouf seg_ramdisk_base; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for synchronization between drivers and ISRs //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _ioc_lock = 0; in_unckdata int volatile _ioc_done = 0; in_unckdata int volatile _ioc_status; in_unckdata char volatile _tty_get_buf[NB_TTY_CHANNELS]; in_unckdata int volatile _tty_get_full[NB_TTY_CHANNELS] = { [0 ... NB_TTY_CHANNELS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for inter-task barriers //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _barrier_value[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; in_unckdata int volatile _barrier_count[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; in_unckdata int volatile _barrier_lock[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for spin_locks using LL/C instructions //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _spin_lock[NB_LOCKS] = { [0 ... NB_LOCKS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Memcopy taken from MutekH. //////////////////////////////////////////////////////////////////////////////////////// in_drivers void* _memcpy( void* _dst, const void* _src, unsigned int size ) { unsigned int *dst = _dst; const unsigned int *src = _src; if ( ! ((unsigned int)dst & 3) && ! ((unsigned int)src & 3) ) { while (size > 3) { *dst++ = *src++; size -= 4; } } unsigned char *cdst = (unsigned char*)dst; unsigned char *csrc = (unsigned char*)src; while (size--) { *cdst++ = *csrc++; } return _dst; } //////////////////////////////////////////////////////////////////////////////////////// // Memcopy using extended addresses //////////////////////////////////////////////////////////////////////////////////////// in_drivers void _extended_memcpy( unsigned int dst_cluster, unsigned int dst_address, unsigned int src_cluster, unsigned int src_address, unsigned int length ) { if ( (dst_address & 0x3) || (src_address & 0x3) || (length & 0x3) ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _extended_memcpy()" ); _tty_release_lock( 0 ); _exit(); } unsigned int i; unsigned int word; for ( i = 0 ; i < length ; i = i+4 ) { word = _word_extended_read( src_cluster, (src_address + i) ); _word_extended_write( dst_cluster, (dst_address + i), word ); } } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and returns processor ident // No more than 1024 processors... //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _procid() { unsigned int ret; asm volatile( "mfc0 %0, $15, 1": "=r"(ret) ); return (ret & 0x3FF); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and returns processor time //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _proctime() { unsigned int ret; asm volatile( "mfc0 %0, $9": "=r"(ret) ); return ret; } //////////////////////////////////////////////////////////////////////////////////////// // Returns the number of processsors controled by the GIET //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _procnumber() { return (unsigned int)(NB_PROCS_MAX * X_SIZE * Y_SIZE); } //////////////////////////////////////////////////////////////////////////////////////// // Returns pseudo-random number //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _rand() { unsigned int x = _proctime(); if((x & 0xF) > 7) return (x*x & 0xFFFF); else return (x*x*x & 0xFFFF); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and mask IRQs //////////////////////////////////////////////////////////////////////////////////////// in_drivers void _it_mask() { int tmp; asm volatile("mfc0 %0, $12" : "=r" (tmp) ); asm volatile("ori %0, %0, 1" : "=r" (tmp) ); asm volatile("mtc0 %0, $12" : "=r" (tmp) ); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and enable IRQs //////////////////////////////////////////////////////////////////////////////////////// in_drivers void _it_enable() { int tmp; asm volatile("mfc0 %0, $12" : "=r" (tmp) ); asm volatile("addi %0, %0, -1" : "=r" (tmp) ); asm volatile("mtc0 %0, $12" : "=r" (tmp) ); } ////////////////////////////////////////////////////////////////////// // Invalidate all cache lines corresponding to a memory buffer. // This is used by the block_device driver. ///////////////////////////////////////////////////////////////////////// in_drivers void _dcache_buf_invalidate(const void * buffer, size_t size) { size_t i; size_t dcache_line_size; // retrieve dcache line size from config register (bits 12:10) asm volatile("mfc0 %0, $16, 1" : "=r" (dcache_line_size)); dcache_line_size = 2 << ((dcache_line_size>>10) & 0x7); // iterate on lines to invalidate each one of them for ( i=0; i> Y_WIDTH; unsigned int y = (proc_id / NB_PROCS_MAX) & ((1<> 4; } _tty_puts( buf ); } /////////////////////////////////////////////////////////////////////////////// // This function displays a 32 bits unsigned int as a decimal string on TTY0. /////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_putd( unsigned int val ) { static const char DecTab[] = "0123456789"; char buf[11]; unsigned int i; unsigned int first; buf[10] = 0; for (i = 0; i < 10; i++) { if ((val != 0) || (i == 0)) { buf[9 - i] = DecTab[val % 10]; first = 9 - i; } else { break; } val /= 10; } _tty_puts( &buf[first] ); } ////////////////////////////////////////////////////////////////////////////// // This function try to take the hardwired lock protecting exclusive access // to TTY terminal identified by the channel argument. // It returns only when the lock has been successfully taken. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_get_lock( unsigned int channel ) { if ( USE_EXT_IO ) // extended addressing to cluster_io { unsigned int cluster_io = ((X_SIZE-1)<> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_TTY_CHANNELS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _tty_getc(): TTY index too large\n" ); _tty_release_lock( 0 ); _exit(); } while( _tty_read( buf, channel ) == 0 ) asm volatile("nop"); } ////////////////////////////////////////////////////////////////////////////// // Fetch a string of decimal characters (most significant digit first) // to build a 32 bits unsigned int. // The terminal index is implicitely defined by the processor ID. // This is a blocking function. // The decimal characters are written in a 32 characters buffer // until a or character is read. // The character is interpreted, and previous characters can be // cancelled. All others characters are ignored. // When the or character is received, the string is converted // to an unsigned int value. If the number of decimal digit is too large // for the 32 bits range, the zero value is returned. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_getw( unsigned int* word_buffer ) { char buf[32]; char byte; char cancel_string[3] = { 0x08, 0x20, 0x08 }; char zero = 0x30; unsigned int save = 0; unsigned int val = 0; unsigned int done = 0; unsigned int overflow = 0; unsigned int max = 0; unsigned int proc_id = _procid(); unsigned int i; unsigned int channel; unsigned int x; unsigned int y; unsigned int l; // check TTY channel l = (proc_id % NB_PROCS_MAX); x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_TTY_CHANNELS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _tty_getw(): TTY index too large\n" ); _tty_release_lock( 0 ); _exit(); } while( done == 0 ) { _tty_read( &byte, channel ); if (( byte > 0x2F) && (byte < 0x3A)) // decimal character { buf[max] = byte; max++; _tty_write( &byte, 1, channel ); } else if ( (byte == 0x0A) || (byte == 0x0D) ) // LF or CR character { done = 1; } else if ( byte == 0x7F ) // DEL character { if (max > 0) { max--; // cancel the character _tty_write( cancel_string, 3, channel ); } } } // end while // string conversion for( i=0 ; i> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_LOCKS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _release_lock()" ); _tty_release_lock( 0 ); _exit(); } _spin_lock[index] = 0; } /////////////////////////////////////////////////////////////////////////////////////// // Try to take a software spin-lock. // This is a blocking call, as there is a busy-waiting loop, // until the lock is granted to the requester. // There is an internal delay of about 100 cycles between // two successive lock read, to avoid bus saturation. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _get_lock(size_t index) { if( index >= NB_LOCKS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _get_lock()" ); _tty_release_lock( 0 ); _exit(); } register int delay = ((_proctime() +_procid()) & 0xF) << 4; register int * plock = (int *) &_spin_lock[index]; asm volatile ("_locks_llsc: \n" "ll $2, 0(%0) \n" // $2 <= _locks_lock "bnez $2, _locks_delay \n" // random delay if busy "li $3, 1 \n" // prepare argument for sc "sc $3, 0(%0) \n" // try to set _locks_busy "bnez $3, _locks_ok \n" // exit if atomic "_locks_delay: \n" "move $4, %1 \n" // $4 <= delay "_locks_loop: \n" "addi $4, $4, -1 \n" // $4 <= $4 - 1 "beqz $4, _locks_loop \n" // test end delay "j _locks_llsc \n" // retry "_locks_ok: \n" ::"r"(plock),"r"(delay):"$2","$3","$4"); } ////////////////////////////////////////////////////////////////////////////////////// // This function makes a cooperative initialisation of the barrier: // - barrier_count[index] <= N // - barrier_lock[index] <= 0 // All tasks try to initialize the barrier, but the initialisation // is done by only one task, using LL/SC instructions. // This cooperative initialisation is questionnable, // because the barrier can ony be initialised once... ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _barrier_init(unsigned int index, unsigned int value) { register int* pinit = (int*)&_barrier_value[index]; register int* pcount = (int*)&_barrier_count[index]; register int* plock = (int*)&_barrier_lock[index]; if ( index >= NB_BARRIERS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _barrier_init()" ); _tty_release_lock( 0 ); _exit(); } // parallel initialisation using atomic instructions LL/SC asm volatile ("_barrier_init_test: \n" "ll $2, 0(%0) \n" // read barrier_value "bnez $2, _barrier_init_done \n" "move $3, %3 \n" "sc $3, 0(%0) \n" // try to write barrier_value "beqz $3, _barrier_init_test \n" "move $3, %3 \n" "sw $3, 0(%1) \n" // barrier_count <= barrier_value "move $3, $0 \n" // "sw $3, 0(%2) \n" // barrier_lock <= 0 "_barrier_init_done: \n" ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3"); } ////////////////////////////////////////////////////////////////////////////////////// // This blocking function uses a busy_wait technics (on the barrier_lock value), // because the GIET does not support dynamic scheduling/descheduling of tasks. // The barrier state is actually defined by two variables: // _barrier_count[index] define the number of particpants that are waiting // _barrier_lock[index] define the bool variable whose value is polled // The last participant change the value of _barrier_lock[index] to release the barrier... // There is at most 16 independant barriers, and an error is returned // if the barrier index is larger than 15. ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _barrier_wait(unsigned int index) { register int* pcount = (int*)&_barrier_count[index]; register int count; int lock = _barrier_lock[index]; if ( index >= NB_BARRIERS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _barrier_wait()" ); _tty_release_lock( 0 ); _exit(); } // parallel decrement _barrier_count[index] using atomic instructions LL/SC // input : pointer on _barrier_count[index] // output : count = _barrier_count[index] (before decrementation) asm volatile ("_barrier_decrement: \n" "ll %0, 0(%1) \n" "addi $3, %0, -1 \n" "sc $3, 0(%1) \n" "beqz $3, _barrier_decrement \n" :"=&r"(count) :"r"(pcount) :"$2","$3"); // the last task re-initializes the barrier_ count variable // and the barrier_lock variable, waking up all other waiting tasks if ( count == 1 ) // last task { _barrier_count[index] = _barrier_value[index]; asm volatile( "sync" ); _barrier_lock[index] = (lock == 0) ? 1 : 0; } else // other tasks { while ( lock == _barrier_lock[index] ) asm volatile("nop"); } } // Local Variables: // tab-width: 4; // c-basic-offset: 4; // c-file-offsets:((innamespace . 0)(inline-open . 0)); // indent-tabs-mode: nil; // End: // // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4