Changeset 588 for trunk/user/fft/fft.c


Ignore:
Timestamp:
Nov 1, 2018, 12:44:35 PM (5 years ago)
Author:
alain
Message:

Introduce a signal based synchro between INIT and KSH processes
to sequencialize multiple KSH[i] processes creation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/user/fft/fft.c

    r582 r588  
    2929//
    3030// This application uses 4 shared data arrays, that are distributed
    31 // in all clusters (one sub-buffer per cluster):
     31// in all clusters (one buffer per cluster):
    3232// - data[N] contains N input data points, with 2 double per point.
    3333// - trans[N] contains N intermediate data points, 2 double per point.
     
    8686// parameters
    8787
    88 #define DEFAULT_M               6
     88#define DEFAULT_M               14              // 16 K data points
    8989#define MODE                    COSIN
    9090#define CHECK                   0
    91 #define DEBUG_MAIN              1               // trace main() function (detailed if odd)
    92 #define DEBUG_FFT1D             1               // trace FFT1D() function (detailed if odd)
     91#define DEBUG_MAIN              0               // trace main() function (detailed if odd)
     92#define DEBUG_SLAVE             0               // trace slave() function (detailed if odd)
     93#define DEBUG_FFT1D             0               // trace FFT1D() function (detailed if odd)
    9394#define DEBUG_ROW               0               // trace FFTRow() function (detailed if odd)
    9495#define PRINT_ARRAY             0
     
    9798#define SWAP(a,b) { double tmp; tmp = a; a = b; b = tmp; }
    9899
    99 /////////////////////////////////////////////////////////////////////////////////
     100/////////////////////////////////////////////////////////////////////////////////////
     101//             structure containing the arguments for the slave() function
     102/////////////////////////////////////////////////////////////////////////////////////
     103
     104typedef struct args_s
     105{
     106    unsigned int   tid;                    // thread continuous index
     107    unsigned int   main_tid;               // main thread continuous index
     108}
     109args_t;
     110
     111/////////////////////////////////////////////////////////////////////////////////////
    100112//             global variables
    101 /////////////////////////////////////////////////////////////////////////////////
     113/////////////////////////////////////////////////////////////////////////////////////
    102114
    103115unsigned int   x_size;                     // number of clusters per row in the mesh
     
    120132
    121133// instrumentation counters
    122 long           parallel_time[THREADS_MAX]; // total computation time (per thread)
    123 long           sync_time[THREADS_MAX];     // cumulative waiting time in barriers (per thread)
    124 long           init_time;                  // initialisation time (in main)
     134unsigned int   parallel_time[THREADS_MAX]; // total computation time (per thread)
     135unsigned int   sync_time[THREADS_MAX];     // cumulated waiting time in barriers (per thread)
     136unsigned int   init_time;                  // initialisation time (in main)
    125137
    126138// synchronisation barrier (all threads)
     
    131143pthread_t       trdid[THREADS_MAX];        // kernel threads identifiers
    132144pthread_attr_t  attr[THREADS_MAX];         // POSIX thread attributes
    133 unsigned int    args[THREADS_MAX];         // slave function arguments
     145args_t          args[THREADS_MAX];         // slave function arguments
    134146
    135147/////////////////////////////////////////////////////////////////////////////////
     
    137149/////////////////////////////////////////////////////////////////////////////////
    138150
    139 void slave( unsigned int * tid );
     151void slave( args_t * args );
    140152
    141153double CheckSum( void );
     
    215227
    216228    unsigned long long  start_init_cycle;
    217     unsigned long long  start_exec_cycle;
    218     unsigned long long  end_exec_cycle;
     229    unsigned long long  end_init_cycle;
    219230
    220231#if CHECK
     
    224235   
    225236    // get FFT application start cycle
    226     if( get_cycle( &start_init_cycle ) )
    227     {
    228         printf("[FFT ERROR] cannot get start cycle\n");
    229     }
     237    get_cycle( &start_init_cycle );
    230238
    231239    // get platform parameters to compute nthreads & nclusters
     
    279287    main_tid = (((main_x * y_size) + main_y) * ncores) + main_lid;
    280288
    281     printf("\n[FFT] main starts on core[%x,%d] / %d complex points / %d thread(s)\n",
    282     main_cxy, main_lid, N, nthreads );
     289    printf("\n[FFT] starts on core[%x,%d] / %d complex points / %d thread(s) / PID %x\n",
     290    main_cxy, main_lid, N, nthreads, getpid() );
    283291
    284292    // allocate memory for the distributed data[i], trans[i], umain[i], twid[i] buffers
     
    307315    InitT( twid );
    308316
    309     printf("\n[FFT] main complete arrays init\n");
     317    printf("\n[FFT] main completes arrays init\n");
    310318
    311319#if CHECK
     
    344352        for (y = 0 ; y < y_size ; y++)
    345353        {
     354            // compute cluster identifier
     355            cxy = HAL_CXY_FROM_XY( x , y );
     356
    346357            for ( lid = 0 ; lid < ncores ; lid++ )
    347358            {
     
    351362                // set thread attributes
    352363                attr[tid].attributes = PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED;
    353                 attr[tid].cxy        = HAL_CXY_FROM_XY( x , y );
     364                attr[tid].cxy        = cxy;
    354365                attr[tid].lid        = lid;
    355366
    356367                // set slave function argument
    357                 args[tid] = tid;
     368                args[tid].tid      = tid;
     369                args[tid].main_tid = main_tid;
    358370
    359371                // create thread
     
    368380                        exit( 0 );
    369381                    }
    370 #if DEBUG_MAIN
    371 printf("\n[FFT] main created thread %x on core %d in cluster(%d,%d) \n", tid, lid, x, y );
     382#if DEBUG_MAIN
     383unsigned long long debug_cycle;
     384get_cycle( &debug_cycle );
     385printf("\n[FFT] main created thread %x on core[%x,%d] / cycle %d\n",
     386tid, cxy, lid, (unsigned int)debug_cycle );
    372387#endif
    373388                }
     
    377392
    378393    // register sequencial initalisation completion cycle
    379     get_cycle( &start_exec_cycle );
    380     init_time = (long)(start_exec_cycle - start_init_cycle);
    381     printf("\n[FFT] main enter parallel execution\n");
     394    get_cycle( &end_init_cycle );
     395    init_time = (unsigned int)(end_init_cycle - start_init_cycle);
     396
     397    printf("\n[FFT] main enters parallel execution\n");
    382398   
    383     // main execute itself the slave() function
     399    // main itself executes the slave() function
    384400    slave( &args[main_tid] );
    385401
     
    396412                if( tid != main_tid )
    397413                {
    398 #if DEBUG_MAIN
    399 printf("\n[FFT] main join thread %x\n", trdid[tid] );
    400 #endif
    401414                    if( pthread_join( trdid[tid] , NULL ) )
    402415                    {
    403                         printf("\n[FFT ERROR] joining thread %x\n", trdid[tid] );
     416                        printf("\n[FFT ERROR] in main thread joining thread %x\n", tid );
    404417                        exit( 0 );
    405418                    }
     419                   
     420#if DEBUG_MAIN
     421printf("\n[FFT] main thread %d joined thread %d\n", main_tid, tid );
     422#endif
    406423
    407424                }
     
    409426        }
    410427    }
    411 
    412     // register parallel execution completion cycle
    413     get_cycle( &end_exec_cycle );
    414     printf("\n[FFT] complete parallel execution / cycle %d\n", (long)end_exec_cycle );
    415428
    416429#if PRINT_ARRAY
     
    433446
    434447    // open instrumentation file
    435     FILE * f = fopen( string , NULL );
    436     if ( f == NULL )
    437     {
    438         printf("\n[FFT ERROR] cannot open instrumentation file %s\n", string );
    439         exit( 0 );
    440     }
     448//  FILE * f = fopen( string , NULL );
     449//  if ( f == NULL )
     450//  {
     451//      printf("\n[FFT ERROR] cannot open instrumentation file %s\n", string );
     452//      exit( 0 );
     453//  }
    441454
    442455    snprintf( string , 256 , "\n[FFT] instrumentation : (%dx%dx%d) threads / %d points\n",
     
    445458    // display on terminal, and save to instrumentation file
    446459    printf( "%s" , string );
    447     fprintf( f , string );
    448 
     460//  fprintf( f , string );
     461
     462    for (tid = 0 ; tid < nthreads ; tid++)
     463    {
     464        snprintf( string , 256 , "\ntid %d : Init %d / Parallel %d / Sync %d\n",
     465        tid, init_time, parallel_time[tid], sync_time[tid] );
     466
     467        // display on terminal, and save to instrumentation file
     468        printf("%s" , string );
     469//      fprintf( f , string );
     470    }
     471
     472    // close instrumentation file and exit
     473//  fclose( f );
     474                             
     475
     476/*
    449477    long min_para = parallel_time[0];
    450478    long max_para = parallel_time[0];
     
    461489
    462490    snprintf( string , 256 , "\n      Init       Parallel   Barrier\n"
    463                              "MIN : %d  |  %d  |  %d   (cycles)\n"
    464                              "MAX : %d  |  %d  |  %d   (cycles)\n",
     491                             "MIN : %d\t | %d\t | %d\t   (cycles)\n"
     492                             "MAX : %d\t | %d\t | %d\t   (cycles)\n",
    465493                             (int)init_time, (int)min_para, (int)min_sync,
    466494                             (int)init_time, (int)max_para, (int)max_sync );
    467 
    468     // display on terminal, and save to instrumentation file
    469     printf("%s" , string );
    470     fprintf( f , string );
    471 
    472     // close instrumentation file and exit
    473     fclose( f );
    474 
    475     exit( 0 );
     495*/
     496
     497    pthread_exit( NULL );
    476498
    477499} // end main()
     
    480502// This function is executed in parallel by all threads.
    481503///////////////////////////////////////////////////////////////
    482 void slave( unsigned int * tid )
     504void slave( args_t * args )
    483505{
    484506    unsigned int   i;
    485     unsigned int   MyNum;           // continuous thread index
     507    unsigned int   MyNum;           // this thread index
     508    unsigned int   MainNum;         // main thread index
    486509    unsigned int   MyFirst;         // index first row allocated to thread
    487510    unsigned int   MyLast;          // index last row allocated to thread
     
    495518    unsigned long long  barrier_stop;
    496519
    497     MyNum = *tid;
    498 
    499     // BARRIER before parallel exec
    500     pthread_barrier_wait( &barrier );
     520    MyNum   = args->tid;
     521    MainNum = args->main_tid;
    501522
    502523    // initialise instrumentation
    503524    get_cycle( &parallel_start );
     525
     526#if DEBUG_SLAVE
     527printf("\n[FFT] %s : thread %x enter / cycle %d\n",
     528__FUNCTION__, MyNum, (unsigned int)parallel_start );
     529#endif
    504530
    505531    // allocate and initialise local array upriv[]
     
    526552    get_cycle( &barrier_stop );
    527553
    528     sync_time[MyNum] = (long)(barrier_stop - barrier_start);
     554    sync_time[MyNum] += (barrier_stop - barrier_start);
    529555
    530556#if CHECK
     
    540566    // register computation time
    541567    get_cycle( &parallel_stop );
    542     parallel_time[MyNum] = (long)(parallel_stop - parallel_start);
    543 
    544     // exit if MyNum != 0
    545     if( MyNum ) pthread_exit( 0 );
     568    parallel_time[MyNum] = (parallel_stop - parallel_start);
     569
     570#if DEBUG_SLAVE
     571printf("\n[FFT] %s : thread %x exit / parallel_time %d / sync_time %d / cycle %d\n",
     572__FUNCTION__, MyNum, parallel_time[MyNum], sync_time[MyNum], (unsigned int)parallel_stop );
     573#endif
     574
     575    // exit only if MyNum != MainNum
     576    if( MyNum != MainNum ) pthread_exit( NULL );
    546577
    547578}  // end slave()
     
    772803
    773804#if DEBUG_FFT1D
    774 printf("\n[FFT] %s : thread %x enter / first %d / last %d\n",
    775 __FUNCTION__, MyNum, MyFirst, MyLast );
     805unsigned long long cycle;
     806get_cycle( &cycle );
     807printf("\n[FFT] %s : thread %x enter / first %d / last %d / cycle %d\n",
     808__FUNCTION__, MyNum, MyFirst, MyLast, (unsigned int)cycle );
    776809#endif
    777810
     
    780813
    781814#if( DEBUG_FFT1D & 1 )
    782 unsigned long long cycle;
    783815get_cycle( &cycle );
    784816printf("\n[FFT] %s : thread %x after first transpose / cycle %d\n",
     
    891923if( PRINT_ARRAY ) PrintArray( x , N );
    892924#endif
    893 
    894925
    895926}  // end FFT1D()
Note: See TracChangeset for help on using the changeset viewer.