//////////////////////////////////////////////////////////////////////////////////////// // File : stdio.c // Written by Alain Greiner // Date : janvier 2014 // // This file define varions functions that can be used by applications to access // peripherals, for the TSAR multi-processors multi_clusters architecture. // There is NO separation between application code and system code, as the // application are running in kernel mode without system calls. // This basic GIET does not support virtual memory, and does not support multi-tasking. // // The supported peripherals are: // - the SoClib multi_tty // - The SoCLib frame_buffer // - The SoCLib block_device // // The following parameters must be defined in the hard_config.h file. // - X_SIZE : number of clusters in a row // - Y_SIZE : number of clusters in a column // - X_WIDTH : number of bits for X field in proc_id // - Y_WIDTH : number of bits for Y field in proc_id // - NB_PROCS_MAX : max number of processor per cluster // - NB_TTY_CHANNELS : max number of TTY channels // // The follobing base addresses must be defined in the ldscript // - seg_tty_base // - seg_fbf_base // - seg_ioc_base //////////////////////////////////////////////////////////////////////////////////////// #include "stdio.h" #define NB_LOCKS 256 #define NB_BARRIERS 16 #define in_drivers __attribute__((section (".drivers"))) #define in_unckdata __attribute__((section (".unckdata"))) ////////////////////////////////////////////////////////////// // various informations that must be defined in ldscript ////////////////////////////////////////////////////////////// struct plouf; extern struct plouf seg_tty_base; extern struct plouf seg_fbf_base; extern struct plouf seg_ioc_base; extern struct plouf seg_mmc_base; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for synchronization between drivers and ISRs //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _ioc_lock = 0; in_unckdata int volatile _ioc_done = 0; in_unckdata int volatile _ioc_status; in_unckdata char volatile _tty_get_buf[NB_TTY_CHANNELS]; in_unckdata int volatile _tty_get_full[NB_TTY_CHANNELS] = { [0 ... NB_TTY_CHANNELS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for inter-task barriers //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _barrier_value[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; in_unckdata int volatile _barrier_count[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; in_unckdata int volatile _barrier_lock[NB_BARRIERS] = { [0 ... NB_BARRIERS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Global uncachable variables for spin_locks using LL/C instructions //////////////////////////////////////////////////////////////////////////////////////// in_unckdata int volatile _spin_lock[NB_LOCKS] = { [0 ... NB_LOCKS-1] = 0 }; //////////////////////////////////////////////////////////////////////////////////////// // Taken from MutekH. //////////////////////////////////////////////////////////////////////////////////////// in_drivers void* _memcpy( void* _dst, const void* _src, unsigned int size ) { unsigned int *dst = _dst; const unsigned int *src = _src; if ( ! ((unsigned int)dst & 3) && ! ((unsigned int)src & 3) ) { while (size > 3) { *dst++ = *src++; size -= 4; } } unsigned char *cdst = (unsigned char*)dst; unsigned char *csrc = (unsigned char*)src; while (size--) { *cdst++ = *csrc++; } return _dst; } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and returns processor ident // No more than 1024 processors... //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _procid() { unsigned int ret; asm volatile( "mfc0 %0, $15, 1": "=r"(ret) ); return (ret & 0x3FF); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and returns processor time //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _proctime() { unsigned int ret; asm volatile( "mfc0 %0, $9": "=r"(ret) ); return ret; } //////////////////////////////////////////////////////////////////////////////////////// // Returns the number of processsors controled by the GIET //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _procnumber() { return (unsigned int)(NB_PROCS_MAX * X_SIZE * Y_SIZE); } //////////////////////////////////////////////////////////////////////////////////////// // Returns pseudo-random number //////////////////////////////////////////////////////////////////////////////////////// in_drivers unsigned int _rand() { unsigned int x = _proctime(); if((x & 0xF) > 7) return (x*x & 0xFFFF); else return (x*x*x & 0xFFFF); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and mask IRQs //////////////////////////////////////////////////////////////////////////////////////// in_drivers void _it_mask() { int tmp; asm volatile("mfc0 %0, $12" : "=r" (tmp) ); asm volatile("ori %0, %0, 1" : "=r" (tmp) ); asm volatile("mtc0 %0, $12" : "=r" (tmp) ); } //////////////////////////////////////////////////////////////////////////////////////// // Access CP0 and enable IRQs //////////////////////////////////////////////////////////////////////////////////////// in_drivers void _it_enable() { int tmp; asm volatile("mfc0 %0, $12" : "=r" (tmp) ); asm volatile("addi %0, %0, -1" : "=r" (tmp) ); asm volatile("mtc0 %0, $12" : "=r" (tmp) ); } ////////////////////////////////////////////////////////////////////// // Invalidate all cache lines corresponding to a memory buffer. // This is used by the block_device driver. ///////////////////////////////////////////////////////////////////////// in_drivers void _dcache_buf_invalidate(const void * buffer, size_t size) { size_t i; size_t dcache_line_size; // retrieve dcache line size from config register (bits 12:10) asm volatile("mfc0 %0, $16, 1" : "=r" (dcache_line_size)); dcache_line_size = 2 << ((dcache_line_size>>10) & 0x7); // iterate on lines to invalidate each one of them for ( i=0; i> Y_WIDTH; unsigned int y = (proc_id / NB_PROCS_MAX) & ((1<> 4; } _tty_puts( buf ); } /////////////////////////////////////////////////////////////////////////////// // This function displays a 32 bits unsigned int as a decimal string on TTY0. /////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_putd( unsigned int val ) { static const char DecTab[] = "0123456789"; char buf[11]; unsigned int i; unsigned int first; buf[10] = 0; for (i = 0; i < 10; i++) { if ((val != 0) || (i == 0)) { buf[9 - i] = DecTab[val % 10]; first = 9 - i; } else { break; } val /= 10; } _tty_puts( &buf[first] ); } ////////////////////////////////////////////////////////////////////////////// // This function try to take the hardwired lock protecting exclusive access // to TTY terminal identified by the channel argument. // It returns only when the lock has been successfully taken. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_get_lock( unsigned int channel ) { unsigned int* tty_address = (unsigned int *) &seg_tty_base; while ( tty_address[channel * TTY_SPAN + TTY_CONFIG] ) asm volatile("nop"); } ////////////////////////////////////////////////////////////////////////////// // This function releases the hardwired lock protecting exclusive access // to TTY terminal identified by the channel argument. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_release_lock( unsigned int channel ) { unsigned int* tty_address = (unsigned int *) &seg_tty_base; tty_address[channel * TTY_SPAN + TTY_CONFIG] = 0; } ////////////////////////////////////////////////////////////////////////////// // This function fetch a single ascii character from a terminal // implicitely defined by the processor ID. // It is a blocking function. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_getc( char* buf ) { unsigned int proc_id = _procid(); unsigned int channel; unsigned int l; unsigned int x; unsigned int y; // compute TTY terminal index if ( NB_TTY_CHANNELS == 1 ) { channel = 0; } else { l = (proc_id % NB_PROCS_MAX); x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_TTY_CHANNELS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _tty_getc()\n" ); _tty_release_lock( 0 ); _exit(); } } while( _tty_read( buf, channel ) == 0 ) asm volatile("nop"); } ////////////////////////////////////////////////////////////////////////////// // Fetch a string of decimal characters (most significant digit first) // to build a 32 bits unsigned int. // The terminal index is implicitely defined by the processor ID. // This is a blocking function. // The decimal characters are written in a 32 characters buffer // until a or character is read. // The character is interpreted, and previous characters can be // cancelled. All others characters are ignored. // When the or character is received, the string is converted // to an unsigned int value. If the number of decimal digit is too large // for the 32 bits range, the zero value is returned. ////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_getw( unsigned int* word_buffer ) { char buf[32]; char byte; char cancel_string[3] = { 0x08, 0x20, 0x08 }; char zero = 0x30; unsigned int save = 0; unsigned int val = 0; unsigned int done = 0; unsigned int overflow = 0; unsigned int max = 0; unsigned int proc_id = _procid(); unsigned int i; unsigned int channel; unsigned int l; unsigned int x; unsigned int y; // compute TTY terminal index if ( NB_TTY_CHANNELS == 1 ) { channel = 0; } else { l = (proc_id % NB_PROCS_MAX); x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_TTY_CHANNELS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _tty_getw()\n" ); _tty_release_lock( 0 ); _exit(); } } while( done == 0 ) { _tty_read( &byte, channel ); if (( byte > 0x2F) && (byte < 0x3A)) // decimal character { buf[max] = byte; max++; _tty_write( &byte, 1, channel ); } else if ( (byte == 0x0A) || (byte == 0x0D) ) // LF or CR character { done = 1; } else if ( byte == 0x7F ) // DEL character { if (max > 0) { max--; // cancel the character _tty_write( cancel_string, 3, channel ); } } } // end while // string conversion for( i=0 ; i> Y_WIDTH; y = (proc_id / NB_PROCS_MAX) & ((1<= NB_TTY_CHANNELS ) { _tty_get_lock( 0 ); _tty_puts("ERROR in _tty_printf() for proc[" ); _tty_putd( x ); _tty_puts(","); _tty_putd( y ); _tty_puts(","); _tty_putd( l ); _tty_puts("] / TTY channel too large = "); _tty_putd( channel ); _tty_puts("\n"); _tty_release_lock( 0 ); _exit(); } } // take the TTY lock _tty_get_lock( channel ); printf_text: while (*format) { unsigned int i; for (i = 0; format[i] && format[i] != '%'; i++) ; if (i) { _tty_write( format, i, channel ); format += i; } if (*format == '%') { format++; goto printf_arguments; } } // end while va_end( ap ); // release lock _tty_release_lock( 0 ); return; printf_arguments: { int val = va_arg(ap, long); char buf[20]; char* pbuf; unsigned int len = 0; static const char HexaTab[] = "0123456789ABCDEF"; unsigned int i; switch (*format++) { case ('c'): // char conversion len = 1; buf[0] = val; pbuf = buf; break; case ('d'): // decimal signed integer if (val < 0) { val = -val; _tty_write( "_" , 1, channel ); } case ('u'): // decimal unsigned integer for( i=0 ; i<10 ; i++) { buf[9-i] = HexaTab[val % 10]; if (!(val /= 10)) break; } len = i+1; pbuf = &buf[9-i]; break; case ('x'): // hexadecimal integer _tty_write( "0x", 2, channel ); for( i=0 ; i<8 ; i++) { buf[7-i] = HexaTab[val % 16U]; if (!(val /= 16U)) break; } len = i+1; pbuf = &buf[7-i]; break; case ('s'): // string { char *str = (char*)val; while ( str[len] ) len++; pbuf = (char*)val; } break; default: goto printf_text; } // end switch _tty_write( pbuf, len, channel ); goto printf_text; } } // end printf() ////////////////////////////////////////////////////////////////////////////////////// // These functions are the ISRs that must be executed when an IRQ is activated // by the TTY: _tty_isr_X is associated to channel [X]. // It save the character in the communication buffer _tty_get_buf[X], // and set the set/reset variable _tty_get_full[X]. // A character is lost if the buffer is full when the ISR is executed. ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _tty_isr_indexed(size_t index) { char* base = (char*)&seg_tty_base; char* tty_address = (char*)(base + index*TTY_SPAN*4); _tty_get_buf[index] = tty_address[TTY_READ*4]; // save character and reset IRQ _tty_get_full[index] = 1; // signals character available } in_drivers void _tty_isr_00() { _tty_isr_indexed(0); } in_drivers void _tty_isr_01() { _tty_isr_indexed(1); } in_drivers void _tty_isr_02() { _tty_isr_indexed(2); } in_drivers void _tty_isr_03() { _tty_isr_indexed(3); } in_drivers void _tty_isr_04() { _tty_isr_indexed(4); } in_drivers void _tty_isr_05() { _tty_isr_indexed(5); } in_drivers void _tty_isr_06() { _tty_isr_indexed(6); } in_drivers void _tty_isr_07() { _tty_isr_indexed(7); } in_drivers void _tty_isr_08() { _tty_isr_indexed(8); } in_drivers void _tty_isr_09() { _tty_isr_indexed(9); } in_drivers void _tty_isr_10() { _tty_isr_indexed(10); } in_drivers void _tty_isr_11() { _tty_isr_indexed(11); } in_drivers void _tty_isr_12() { _tty_isr_indexed(12); } in_drivers void _tty_isr_13() { _tty_isr_indexed(13); } in_drivers void _tty_isr_14() { _tty_isr_indexed(14); } in_drivers void _tty_isr_15() { _tty_isr_indexed(15); } in_drivers void _tty_isr_16() { _tty_isr_indexed(16); } in_drivers void _tty_isr_17() { _tty_isr_indexed(17); } in_drivers void _tty_isr_18() { _tty_isr_indexed(18); } in_drivers void _tty_isr_19() { _tty_isr_indexed(19); } in_drivers void _tty_isr_20() { _tty_isr_indexed(20); } in_drivers void _tty_isr_21() { _tty_isr_indexed(21); } in_drivers void _tty_isr_22() { _tty_isr_indexed(22); } in_drivers void _tty_isr_23() { _tty_isr_indexed(23); } in_drivers void _tty_isr_24() { _tty_isr_indexed(24); } in_drivers void _tty_isr_25() { _tty_isr_indexed(25); } in_drivers void _tty_isr_26() { _tty_isr_indexed(26); } in_drivers void _tty_isr_27() { _tty_isr_indexed(27); } in_drivers void _tty_isr_28() { _tty_isr_indexed(28); } in_drivers void _tty_isr_29() { _tty_isr_indexed(29); } in_drivers void _tty_isr_30() { _tty_isr_indexed(30); } in_drivers void _tty_isr_31() { _tty_isr_indexed(31); } ////////////////////////////////////////////////////////////////////////////////////////// // I/O BLOCK_DEVICE // The three functions below use the three variables _ioc_lock _ioc_done, // and _ioc_status for synchronisation. // - As the IOC component can be used by several programs running in parallel, // the _ioc_lock variable guaranties exclusive access to the device. // The _ioc_read() and _ioc_write() functions use atomic LL/SC to get the lock. // and set _ioc_lock to a non zero value. // The _ioc_write() and _ioc_read() functions are blocking, polling the _ioc_lock // variable until the device is available. // - When the tranfer is completed, the ISR routine activated by the IOC IRQ // set the _ioc_done variable to a non-zero value. Possible address errors detected // by the IOC peripheral are reported by the ISR in the _ioc_status variable. // The _ioc_completed() function is polling the _ioc_done variable, waiting for // tranfer conpletion. When the completion is signaled, the _ioc_completed() function // reset the _ioc_done variable to zero, and releases the _ioc_lock variable. /////////////////////////////////////////////////////////////////////////////////////// // This blocking function is used by the _ioc_read() and _ioc_write() functions // to get _ioc_lock using LL/SC. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _ioc_get_lock() { register unsigned int* plock = (unsigned int*)&_ioc_lock; asm volatile ("_ioc_llsc: \n" "ll $2, 0(%0) \n" // $2 <= _ioc_lock "bnez $2, _ioc_llsc \n" // retry if busy "li $3, 1 \n" // prepare argument for sc "sc $3, 0(%0) \n" // try to set _ioc_busy "beqz $3, _ioc_llsc \n" // retry if not atomic ::"r"(plock):"$2","$3"); } ////////////////////////////////////////////////////////////////////////////////////// // Transfer data from a memory buffer to the block_device. // - lba : first block index on the disk // - buffer : base address of the memory buffer // - count : number of blocks to be transfered // The source buffer must be in user address space. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _ioc_write( size_t lba, void* buffer, size_t count, size_t ext ) { volatile unsigned int* ioc_address = (unsigned int*)&seg_ioc_base; // get the lock _ioc_get_lock(); // block_device configuration ioc_address[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext; ioc_address[BLOCK_DEVICE_COUNT] = count; ioc_address[BLOCK_DEVICE_LBA] = lba; ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1; ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE; } /////////////////////////////////////////////////////////////////////////////////////// // Transfer data from a file on the block device to a memory buffer. // - lba : first block index on the disk // - buffer : base address of the memory buffer // - count : number of blocks to be transfered // The destination buffer must be in user address space. // All cache lines corresponding to the the target buffer must be invalidated // for cache coherence. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _ioc_read( size_t lba, void* buffer, size_t count, size_t ext ) { volatile unsigned int* ioc_address = (unsigned int*)&seg_ioc_base; // get the lock _ioc_get_lock(); // block_device configuration ioc_address[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext; ioc_address[BLOCK_DEVICE_COUNT] = count; ioc_address[BLOCK_DEVICE_LBA] = lba; ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1; ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ; } /////////////////////////////////////////////////////////////////////////////////////// // This blocking function cheks completion of an I/O transfer and reports errors. // It returns 0 if the transfer is successfully completed. // It returns -1 if an error has been reported. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _ioc_completed() { // waiting for completion while (_ioc_done == 0) asm volatile("nop"); // reset synchronisation variables _ioc_done = 0; _ioc_lock = 0; if( (_ioc_status != BLOCK_DEVICE_READ_SUCCESS) && (_ioc_status != BLOCK_DEVICE_WRITE_SUCCESS) ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _ioc_completed()\n"); _tty_release_lock( 0 ); _exit(); } } ////////////////////////////////////////////////////////////////////////////////////// // This ISR must be executed when an IRQ is activated by IOC to signal completion. // It acknowledge the IRQ using the ioc base address, save the status in _ioc_status, // and set the _ioc_done variable to signal completion. // This variable is defined in the drivers.c file. ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _ioc_isr() { int* ioc_address = (int*)&seg_ioc_base; _ioc_status = ioc_address[BLOCK_DEVICE_STATUS]; // save status & reset IRQ _ioc_done = 1; // signals completion } ////////////////////////////////////////////////////////////////////////////////////// // This ISR must be executed when an IRQ is activated by MEMC to signal // an error detected by the TSAR memory cache after a write transaction. // It displays an error message on the TTY terminal allocated to the processor // executing the ISR. ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _mmc_isr() { int* mmc_address = (int*)&seg_mmc_base; unsigned int cluster_xy = _procid() / NB_PROCS_MAX; _tty_printf( "WRITE ERROR signaled by Memory Cache in cluster %x\n", cluster_xy ); } ////////////////////////////////////////////////////////////////////////////////////// // FRAME_BUFFER // The _fb_sync_write & _fb_sync_read functions use a memcpy strategy to implement // the transfer between a data buffer and the frame buffer. // They are blocking until completion of the transfer. ////////////////////////////////////////////////////////////////////////////////////// // _fb_sync_write() // Transfer data from an user buffer to the frame_buffer device with a memcpy. // - offset : offset (in bytes) in the frame buffer // - buffer : base address of the memory buffer // - length : number of bytes to be transfered ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _fb_sync_write( size_t offset, void* buffer, size_t length, size_t ext ) { volatile char* fb = (char*)(void*)&seg_fbf_base + offset; char* ub = buffer; _memcpy( (void*)fb, (void*)ub, length ); } /////////////////////////////////////////////////////////////////////////////////////// // _fb_sync_read() // Transfer data from the frame_buffer device to an user buffer with a memcpy. // - offset : offset (in bytes) in the frame buffer // - buffer : base address of the memory buffer // - length : number of bytes to be transfered ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _fb_sync_read( size_t offset, void* buffer, size_t length, size_t ext ) { volatile char* fb = (char*)(void*)&seg_fbf_base + offset; char* ub = buffer; _memcpy( (void*)ub, (void*)fb, length ); } /////////////////////////////////////////////////////////////////////////////////////// // Release a software spin-lock /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _release_lock(size_t index) { if( index >= NB_LOCKS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _release_lock()" ); _tty_release_lock( 0 ); _exit(); } _spin_lock[index] = 0; } /////////////////////////////////////////////////////////////////////////////////////// // Try to take a software spin-lock. // This is a blocking call, as there is a busy-waiting loop, // until the lock is granted to the requester. // There is an internal delay of about 100 cycles between // two successive lock read, to avoid bus saturation. /////////////////////////////////////////////////////////////////////////////////////// in_drivers void _get_lock(size_t index) { if( index >= NB_LOCKS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _get_lock()" ); _tty_release_lock( 0 ); _exit(); } register int delay = ((_proctime() +_procid()) & 0xF) << 4; register int * plock = (int *) &_spin_lock[index]; asm volatile ("_locks_llsc: \n" "ll $2, 0(%0) \n" // $2 <= _locks_lock "bnez $2, _locks_delay \n" // random delay if busy "li $3, 1 \n" // prepare argument for sc "sc $3, 0(%0) \n" // try to set _locks_busy "bnez $3, _locks_ok \n" // exit if atomic "_locks_delay: \n" "move $4, %1 \n" // $4 <= delay "_locks_loop: \n" "addi $4, $4, -1 \n" // $4 <= $4 - 1 "beqz $4, _locks_loop \n" // test end delay "j _locks_llsc \n" // retry "_locks_ok: \n" ::"r"(plock),"r"(delay):"$2","$3","$4"); } ////////////////////////////////////////////////////////////////////////////////////// // This function makes a cooperative initialisation of the barrier: // - barrier_count[index] <= N // - barrier_lock[index] <= 0 // All tasks try to initialize the barrier, but the initialisation // is done by only one task, using LL/SC instructions. // This cooperative initialisation is questionnable, // because the barrier can ony be initialised once... ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _barrier_init(unsigned int index, unsigned int value) { register int* pinit = (int*)&_barrier_value[index]; register int* pcount = (int*)&_barrier_count[index]; register int* plock = (int*)&_barrier_lock[index]; if ( index >= NB_BARRIERS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _barrier_init()" ); _tty_release_lock( 0 ); _exit(); } // parallel initialisation using atomic instructions LL/SC asm volatile ("_barrier_init_test: \n" "ll $2, 0(%0) \n" // read barrier_value "bnez $2, _barrier_init_done \n" "move $3, %3 \n" "sc $3, 0(%0) \n" // try to write barrier_value "beqz $3, _barrier_init_test \n" "move $3, %3 \n" "sw $3, 0(%1) \n" // barrier_count <= barrier_value "move $3, $0 \n" // "sw $3, 0(%2) \n" // barrier_lock <= 0 "_barrier_init_done: \n" ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3"); } ////////////////////////////////////////////////////////////////////////////////////// // This blocking function uses a busy_wait technics (on the barrier_lock value), // because the GIET does not support dynamic scheduling/descheduling of tasks. // The barrier state is actually defined by two variables: // _barrier_count[index] define the number of particpants that are waiting // _barrier_lock[index] define the bool variable whose value is polled // The last participant change the value of _barrier_lock[index] to release the barrier... // There is at most 16 independant barriers, and an error is returned // if the barrier index is larger than 15. ////////////////////////////////////////////////////////////////////////////////////// in_drivers void _barrier_wait(unsigned int index) { register int* pcount = (int*)&_barrier_count[index]; register int count; int lock = _barrier_lock[index]; if ( index >= NB_BARRIERS ) { _tty_get_lock( 0 ); _tty_puts( "ERROR in _barrier_wait()" ); _tty_release_lock( 0 ); _exit(); } // parallel decrement _barrier_count[index] using atomic instructions LL/SC // input : pointer on _barrier_count[index] // output : count = _barrier_count[index] (before decrementation) asm volatile ("_barrier_decrement: \n" "ll %0, 0(%1) \n" "addi $3, %0, -1 \n" "sc $3, 0(%1) \n" "beqz $3, _barrier_decrement \n" :"=&r"(count) :"r"(pcount) :"$2","$3"); // the last task re-initializes the barrier_ count variable // and the barrier_lock variable, waking up all other waiting tasks if ( count == 1 ) // last task { _barrier_count[index] = _barrier_value[index]; asm volatile( "sync" ); _barrier_lock[index] = (lock == 0) ? 1 : 0; } else // other tasks { while ( lock == _barrier_lock[index] ) asm volatile("nop"); } } // Local Variables: // tab-width: 4; // c-basic-offset: 4; // c-file-offsets:((innamespace . 0)(inline-open . 0)); // indent-tabs-mode: nil; // End: // // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4