Changeset 657 for trunk/user/transpose


Ignore:
Timestamp:
Mar 18, 2020, 11:16:59 PM (4 years ago)
Author:
alain
Message:

Introduce remote_buf.c/.h & socket.c/.h files.
Update dev_nic.c/.h files.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/user/transpose/transpose.c

    r656 r657  
    1515// A core is identified by two indexes [cxy,lid] : cxy is the cluster identifier,
    1616// (that is NOT required to be a continuous index), and lid is the local core index,
    17 // (that must be in the [Ø,NCORES-1] range).
     17// (that must be in the [0,NCORES-1] range).
    1818//
    1919// The main() function can run on any core in any cluster. This main thread
    20 // makes the initialisations, uses the pthread_create() syscall to launch (NTHREADS-1)
    21 // other threads in "attached" mode running in parallel the execute() function, calls
    22 // himself the execute() function, wait completion of the (NTHREADS-1) other threads
    23 // with a pthread_join(), and finally calls the instrument() function to display
    24 // and register the instrumentation results when execution is completed.
    25 // All threads run the execute() function, but each thread transposes only
    26 // (NLINES / NTHREADS) lines. This requires that NLINES == k * NTHREADS.
     20// makes the initialisations, load the input file to the "image_in" buffer,
     21// launches the working threads, calls the instrument() function when all working
     22// threads complete, and saves the result "image_out" buffer to the output file.
    2723//
    28 // The number N of working threads is always defined by the number of cores availables
     24// The number of working threads is always defined by the number of cores availables
    2925// in the architecture, but this application supports three placement modes.
    3026// In all modes, the working threads are identified by the [tid] continuous index
     
    5147//   per core, and the same relation between the thread[tid] and the core[cxy][lpid].
    5248//   
    53 // The buf_in[x,y] and buf_out[put buffers containing the direct and transposed images
    54 // are distributed in clusters: each thread[cid][0] allocate a local input buffer
    55 // and load in this buffer all lines that must be handled by the threads sharing the
    56 // same cid, from the mapper of the input image file.
    57 // In the execute function, all threads in the group defined by the cid index read pixels
    58 // from the local buf_in[cid] buffer, and write pixels to all remote buf_out[cid] buffers.
    59 // Finally, each thread displays a part of the transposed image to the frame buffer.
     49// Each working thread[cid][lid] run the "execute" function, that uses the "buf_in" and
     50// "buf_out" local buffers, containing the direct and transposed images:
     51// Each thread[cid][0] allocates two buf_in[cid] and buf_out[cid] buffers, load from
     52// "image_in" to buf_in[cid] all lines that must be handled by the threads sharing the
     53// same cid, and finally save from buf_out[cid] to "image_out" all lines that have been
     54// transposed to buf_out[cid].
     55// Each thread[cid][lid] in the group defined by the cid index read pixels from the
     56// local buf_in[cid] buffer, and write pixels to all remote // buf_out[cid] buffers.
    6057//
    6158// - The image  must fit the frame buffer size, that must be power of 2.
     
    8279#define THREADS_MAX           (X_MAX * Y_MAX * CORES_MAX)  // max number of threads
    8380
    84 #define IMAGE_SIZE            512                          // image size
    8581#define IMAGE_TYPE            420                          // pixel encoding type
    86 #define INPUT_FILE_PATH       "/misc/couple_512.raw"       // input file pathname
    87 #define OUTPUT_FILE_PATH      "/misc/transposed_512.raw"   // output file pathname
     82
     83//#define IMAGE_SIZE            128                          // image size
     84//#define INPUT_FILE_PATH       "/misc/images_128.raw"       // input file pathname
     85//#define OUTPUT_FILE_PATH      "/misc/transposed_128.raw"   // output file pathname
     86
     87//#define IMAGE_SIZE            256                          // image size
     88//#define INPUT_FILE_PATH       "/misc/lena_256.raw"         // input file pathname
     89#//define OUTPUT_FILE_PATH      "/misc/transposed_256.raw"   // output file pathname
     90
     91//#define IMAGE_SIZE            512                          // image size
     92//#define INPUT_FILE_PATH       "/misc/couple_512.raw"       // input file pathname
     93//#define OUTPUT_FILE_PATH      "/misc/transposed_512.raw"   // output file pathname
     94
     95#define IMAGE_SIZE            1024                         // image size
     96#define INPUT_FILE_PATH       "/misc/philips_1024.raw"     // input file pathname
     97#define OUTPUT_FILE_PATH      "/misc/transposed_1024.raw"  // output file pathname
    8898
    8999#define SAVE_RESULT_FILE      0                            // save result image on disk
    90 #define USE_DQT_BARRIER       1                            // quad-tree barrier if non zero
     100#define USE_DQT_BARRIER       0                            // quad-tree barrier if non zero
    91101
    92102#define NO_PLACEMENT          0                            // uncontrolefdthread placement
    93 #define EXPLICIT_PLACEMENT    0                            // explicit threads placement
    94 #define PARALLEL_PLACEMENT    1                            // parallel threads placement
    95 
    96 #define VERBOSE_MAIN          0                            // main function print comments
    97 #define VERBOSE_EXEC          0                            // exec function print comments
    98 #define VERBOSE_INSTRU        0                            // instru function print comments
     103#define EXPLICIT_PLACEMENT    1                            // explicit threads placement
     104#define PARALLEL_PLACEMENT    0                            // parallel threads placement
     105
     106#define VERBOSE_MAIN          1                            // main function print comments
     107#define VERBOSE_MAIN_DETAILED 0                            // main function print comments
     108#define VERBOSE_EXEC          1                            // exec function print comments
    99109
    100110
     
    109119// instrumentation counters for each thread in each cluster
    110120// indexed by [cid][lid] : cluster continuous index / thread local index
     121unsigned int ALOC_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
     122unsigned int ALOC_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    111123unsigned int LOAD_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    112124unsigned int LOAD_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    113125unsigned int TRSP_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    114126unsigned int TRSP_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    115 unsigned int DISP_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    116 unsigned int DISP_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
    117 
    118 // pointer on buffer containing the input image, maped by the main to the input file
    119 unsigned char *  image_in;
    120 
    121 // pointer on buffer containing the output image, maped by the main to the output file
    122 unsigned char *  image_out;
    123 
    124 // arrays of pointers on distributed buffers indexed by [cid] : cluster continuous index
    125 unsigned char *  buf_in_ptr [CLUSTERS_MAX];
    126 unsigned char *  buf_out_ptr[CLUSTERS_MAX];
     127unsigned int SAVE_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
     128unsigned int SAVE_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
     129unsigned int FREE_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
     130unsigned int FREE_END  [CLUSTERS_MAX][CORES_MAX] = {{ 0 }};
     131
     132// buffer containing the input image, loaded by the main from input file
     133unsigned char  image_in[IMAGE_SIZE * IMAGE_SIZE];
     134
     135// buffer containing the output image, saved by the main to output file
     136unsigned char  image_out[IMAGE_SIZE * IMAGE_SIZE];
     137
     138// arrays of pointers on distributed buffers indexed by [cid]
     139unsigned char *  buf_in [CLUSTERS_MAX];
     140unsigned char *  buf_out[CLUSTERS_MAX];
     141
     142// pointer and identifier for dynamically allocated FBF window
     143void   *  win_buf;
     144int       wid;
    127145
    128146// synchronisation barrier (all working threads)
     
    205223    }
    206224       
    207     // main thread get identifiers for core executing main
     225    // get identifiers for core executing main
    208226    unsigned int  cxy_main;
    209227    unsigned int  lid_main;
     
    214232    unsigned int nthreads  = nclusters * ncores;
    215233
    216     // main thread get FBF size and type
     234    if( nthreads > IMAGE_SIZE )
     235    {
     236        printf("\n[transpose error] number of threads larger than number of lines\n");
     237        exit( 0 );
     238    }
     239
     240    // get FBF size and type
    217241    unsigned int   fbf_width;
    218242    unsigned int   fbf_height;
     
    220244    fbf_get_config( &fbf_width , &fbf_height , &fbf_type );
    221245
    222     if( (fbf_width != IMAGE_SIZE) || (fbf_height != IMAGE_SIZE) || (fbf_type != IMAGE_TYPE) )
     246    if( (fbf_width < IMAGE_SIZE) || (fbf_height < IMAGE_SIZE) || (fbf_type != IMAGE_TYPE) )
    223247    {
    224248        printf("\n[transpose error] image does not fit FBF size or type\n");
     
    226250    }
    227251
    228     if( nthreads > IMAGE_SIZE )
    229     {
    230         printf("\n[transpose error] number of threads larger than number of lines\n");
    231         exit( 0 );
    232     }
    233 
    234     unsigned int npixels = IMAGE_SIZE * IMAGE_SIZE;
     252    // define total number of pixels
     253    int npixels = IMAGE_SIZE * IMAGE_SIZE;
    235254
    236255    // define instrumentation file name
    237256    if( NO_PLACEMENT )
    238257    {
    239         printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d] / PID %x / NO_PLACE\n",
    240         nclusters, ncores, fbf_width, fbf_height, getpid() );
     258        printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / NO_PLACE\n",
     259        nclusters, ncores, INPUT_FILE_PATH , getpid() );
    241260
    242261        // build instrumentation file name
     
    251270    if( EXPLICIT_PLACEMENT )
    252271    {
    253         printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d] / PID %x / EXPLICIT\n",
    254         nclusters, ncores, fbf_width, fbf_height, getpid() );
     272        printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / EXPLICIT\n",
     273        nclusters, ncores, INPUT_FILE_PATH , getpid() );
    255274
    256275        // build instrumentation file name
     
    265284    if( PARALLEL_PLACEMENT )
    266285    {
    267         printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d] / PID %x / PARALLEL\n",
    268         nclusters, ncores, fbf_width, fbf_height, getpid() );
     286        printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / PARALLEL\n",
     287        nclusters, ncores, INPUT_FILE_PATH , getpid() );
    269288
    270289        // build instrumentation file name
     
    277296    }
    278297
     298    // open a window in FBF
     299    wid = fbf_create_window( 0,             // l_zero
     300                             0,             // p_zero
     301                             IMAGE_SIZE,    // lines
     302                             IMAGE_SIZE,    // pixels
     303                             &win_buf );
     304    if( wid < 0)
     305    {
     306        printf("\n[transpose error] cannot open FBF window\n");
     307        exit( 0 );
     308    }
     309
     310#if  VERBOSE_MAIN
     311printf("\n[transpose] main on core[%x,%d] created FBF window %d / buffer %x\n",
     312cxy_main, lid_main, wid , win_buf );
     313#endif
     314
    279315    // open instrumentation file
    280316    snprintf( pathname , 64 , "/home/%s", filename );
    281317    FILE * f = fopen( pathname , NULL );
     318
    282319    if ( f == NULL )
    283320    {
    284         printf("\n[transpose error] cannot open instrumentation file %s\n", pathname );
     321        printf("\n[transpose error] cannot open instru file %s\n", pathname );
    285322        exit( 0 );
    286323    }
     
    312349
    313350#if  VERBOSE_MAIN
    314 printf("\n[transpose] main on core[%x,%d] completes barrier initialisation\n",
     351printf("\n[transpose] main on core[%x,%d] completed barrier initialisation\n",
    315352cxy_main, lid_main );
    316353#endif
    317354
    318     // main thread open input file
     355    // open input file
    319356    int fd_in = open( INPUT_FILE_PATH , O_RDONLY , 0 );
    320357
     
    329366#endif
    330367
    331     // main thread map image_in buffer to input image file
    332     image_in = (unsigned char *)mmap( NULL,
    333                                       npixels,
    334                                       PROT_READ,
    335                                       MAP_FILE | MAP_SHARED,
    336                                       fd_in,
    337                                       0 );     // offset
    338     if ( image_in == NULL )
    339     {
    340         printf("\n[transpose error] main cannot map buffer to file %s\n", INPUT_FILE_PATH );
    341         exit( 0 );
    342     }
    343 
    344 #if  VERBOSE_MAIN
    345 printf("\n[transpose] main map buffer to file <%s>\n", INPUT_FILE_PATH );
    346 #endif
    347 
    348     // main thread display input image on FBF
    349     if( fbf_write( image_in,
    350                    npixels,
    351                    0 ) )
    352     {
    353         printf("\n[transpose error] main cannot access FBF\n");
    354         exit( 0 );
    355     }
    356 
    357 #if SAVE_RESULT_IMAGE
    358 
    359     // main thread open output file
     368    // open output file
    360369    int fd_out = open( OUTPUT_FILE_PATH , O_CREAT , 0 );
    361370
     
    366375    }
    367376
     377    // move input image to input buffer
     378    if( read( fd_in , image_in , npixels ) != npixels )
     379    {
     380        printf("\n[transpose error] main cannot read input image\n");
     381        exit( 0 );
     382    }
     383
    368384#if  VERBOSE_MAIN
    369 printf("\n[transpose] main open file <%s> / fd = %d\n", OUTPUT_FILE_PATH , fd_out );
    370 #endif
    371 
    372     // main thread map image_out buffer to output image file
    373     image_out = (unsigned char *)mmap( NULL,
    374                                        npixels,
    375                                        PROT_WRITE,
    376                                        MAP_FILE | MAP_SHARED,
    377                                        fd_out,
    378                                        0 );     // offset
    379     if ( image_out == NULL )
    380     {
    381         printf("\n[transpose error] main cannot map buf_out to file %s\n", OUTPUT_FILE_PATH );
    382         exit( 0 );
    383     }
    384 
    385 #if  VERBOSE_MAIN
    386 printf("\n[transpose] main map buffer to file <%s>\n", OUTPUT_FILE_PATH );
    387 #endif
    388 
    389 #endif  // SAVE_RESULT_IMAGE
     385printf("\n[transpose] main moved file <%s> to buf_in\n", INPUT_FILE_PATH );
     386#endif
    390387
    391388    /////////////////////////////////////////////////////////////////////////////////////
     
    417414                }
    418415
    419 #if VERBOSE_MAIN
     416#if VERBOSE_MAIN_DETAILED
    420417printf("\n[transpose] main created thread %d\n", tid );
    421418#endif
     
    450447            }
    451448
    452 #if VERBOSE_MAIN 
    453 printf("\n[transpose] main successfully joined thread %x\n", tid );
     449#if VERBOSE_MAIN_DETAILED
     450printf("\n[transpose] main joined thread %x\n", tid );
    454451#endif
    455452       
     
    500497                            exit( 0 );
    501498                        }
    502 #if VERBOSE_MAIN
     499
     500#if VERBOSE_MAIN_DETAILED
    503501printf("\n[transpose] main created thread[%d] on core[%x,%d]\n", tid, cxy, l );
    504502#endif
     
    536534                    exit( 0 );
    537535                }
    538 #if VERBOSE_MAIN
    539 printf("\n[transpose] main joined thread %d on core[%x,%d]\n", tid , cxy , l );
     536
     537#if VERBOSE_MAIN_DETAILED
     538printf("\n[transpose] main joined thread %d\n", tid );
    540539#endif
    541540            }
     
    567566    /////////////////////////////////////////////////////////////////////////////
    568567
    569     // main thread register instrumentation results
     568    // register instrumentation results
    570569    instrument( f , filename );
    571570
    572     // main thread close input file
     571#if VERBOSE_MAIN
     572printf("\n[transpose] main completed instrumentation\n");
     573#endif
     574
     575/*
     576    printf("\n> ");
     577    getchar();
     578
     579    // move window
     580    if( fbf_move_window( wid , 100 , 100 ) )
     581    {
     582        printf("\n[transpose error] main cannot move FBF window\n");
     583        exit( 0 );
     584    }
     585
     586    printf("\n> ");
     587    getchar();
     588*/   
     589    // save image_out to output file
     590    if( write( fd_out , image_out , npixels ) != npixels )
     591    {
     592        printf("\n[transpose error] main cannot write output image\n");
     593        exit( 0 );
     594    }
     595
     596#if VERBOSE_MAIN
     597printf("\n[transpose] main saved buf_out to output file\n");
     598#endif
     599
     600    // close input file
    573601    close( fd_in );
    574602
    575 #if SAVE_RESULT_IMAGE
    576 
    577     // main thread close output file
     603#if VERBOSE_MAIN
     604printf("\n[transpose] main closed input file\n");
     605#endif
     606
     607    // close output file
    578608    close( fd_out );
    579609
    580 #endif
    581 
    582     // main close instrumentation file
     610#if VERBOSE_MAIN
     611printf("\n[transpose] main closed output file\n");
     612#endif
     613
     614    // close instrumentation file
    583615    fclose( f );
     616
     617#if VERBOSE_MAIN
     618printf("\n[transpose] main closed instrumentation file\n");
     619#endif
     620
     621    // delete FBF window
     622    if( fbf_delete_window( wid ) )
     623    {
     624        printf("\n[transpose error] main cannot delete FBF window\n");
     625        exit( 0 );
     626    }
    584627
    585628    // main thread suicide
     
    597640{
    598641    unsigned long long   date;
     642    unsigned int         l;         // line index for loop
     643    unsigned int         p;         // pixel index for loop
     644    int                  error;
     645
     646    unsigned char      * wbuf = win_buf;
    599647 
    600     unsigned int l;                         // line index for loop
    601     unsigned int p;                         // pixel index for loop
    602 
    603648    pthread_parallel_work_args_t * args = (pthread_parallel_work_args_t *)arguments;
    604649
     
    613658    // get thread abstract identifiers
    614659    unsigned int tid = args->tid;
    615     unsigned int cid = tid / ncores;   
    616     unsigned int lid = tid % ncores;
     660    unsigned int cid = tid / ncores;    // abstract cluster index
     661    unsigned int lid = tid % ncores;    // local thread index
    617662
    618663#if VERBOSE_EXEC
     
    620665unsigned int lpid;
    621666get_core_id( &cxy , &lpid );   // get core physical identifiers
     667#endif
     668
     669#if VERBOSE_EXEC
    622670printf("\n[transpose] exec[%d] on core[%x,%d] enters parallel exec\n",
    623671tid , cxy , lpid );
     
    625673
    626674    get_cycle( &date );
    627     LOAD_START[cid][lid] = (unsigned int)date;
    628 
    629     // build total number of pixels per image
     675    ALOC_START[cid][lid] = (unsigned int)date;
     676
     677    // compute total number of pixels per image
    630678    unsigned int npixels = IMAGE_SIZE * IMAGE_SIZE;     
    631679
    632     // build total number of threads and clusters
     680    // compute total number of threads and clusters
    633681    unsigned int nclusters = x_size * y_size;
    634682    unsigned int nthreads  = nclusters * ncores;
    635683
    636     unsigned int buf_size = npixels / nclusters;     // number of bytes in buf_in & buf_out
    637     unsigned int offset   = cid * buf_size;       // offset in file (bytes)
    638 
    639     unsigned char  * buf_in = NULL;        // private pointer on local input buffer
    640     unsigned char  * buf_out = NULL;       // private pointer on local output buffer
    641 
    642     // Each thread[cid,0] allocate a local buffer buf_in, and register
    643     // the base adress in the global variable buf_in_ptr[cid]
    644     // this local buffer is shared by all threads with the same cid
     684    // compute number of pixels per cid & per thread
     685    unsigned int pixels_per_cid = npixels / nclusters;
     686    unsigned int pixels_per_lid = pixels_per_cid / ncores;
     687
     688    // compute first and last line per thread
     689    unsigned int lines_per_cid = pixels_per_cid / IMAGE_SIZE;
     690    unsigned int lines_per_lid = pixels_per_lid / IMAGE_SIZE;
     691
     692    unsigned int line_first = (cid * lines_per_cid) + (lid * lines_per_lid);
     693    unsigned int line_last  = line_first + lines_per_lid;
     694
     695    // Each thread[cid,0] allocates two local buffers, and register the base
     696    // adresses in the global variable buf_in_ptr[cid] & buf_out_ptr[cid].
     697   
    645698    if( lid == 0 )
    646699    {
    647700        // allocate buf_in
    648         buf_in = (unsigned char *)malloc( buf_size );
    649 
    650         if( buf_in == NULL )
     701        buf_in[cid] = (unsigned char *)malloc( pixels_per_cid );
     702
     703        if( buf_in[cid] == NULL )
    651704        {
    652705            printf("\n[transpose error] thread[%d] cannot allocate buf_in\n", tid );
     
    654707        }
    655708
    656         // register buf_in buffer in global array of pointers
    657         buf_in_ptr[cid] = buf_in;
    658 
    659709#if VERBOSE_EXEC
    660710printf("\n[transpose] exec[%d] on core[%x,%d] allocated buf_in = %x\n",
     
    662712#endif
    663713
    664     }
    665 
    666     // Each thread[cid,0] copy relevant part of the image_in to buf_in
    667     if( lid == 0 )
    668     {
    669         memcpy( buf_in,
    670                 image_in + offset,
    671                 buf_size );
    672     }
    673 
    674 #if VERBOSE_EXEC
    675 printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in[%d]\n",
    676 tid , cxy , lpid , cid );
    677 #endif
    678 
    679     // Each thread[cid,0] allocate a local buffer buf_out, and register
    680     // the base adress in the global variable buf_out_ptr[cid]
    681     if( lid == 0 )
    682     {
    683714        // allocate buf_out
    684         buf_out = (unsigned char *)malloc( buf_size );
    685 
    686         if( buf_out == NULL )
     715        buf_out[cid] = (unsigned char *)malloc( pixels_per_cid );
     716
     717        if( buf_out[cid] == NULL )
    687718        {
    688719            printf("\n[transpose error] thread[%d] cannot allocate buf_in\n", tid );
     
    690721        }
    691722
    692         // register buf_in buffer in global array of pointers
    693         buf_out_ptr[cid] = buf_out;
    694 
    695723#if VERBOSE_EXEC
    696724printf("\n[transpose] exec[%d] on core[%x,%d] allocated buf_out = %x\n",
     
    699727
    700728    }
    701    
     729
     730    get_cycle( &date );
     731    ALOC_END[cid][lid] = (unsigned int)date;
     732
     733    /////////////////////////////////
     734    pthread_barrier_wait( &barrier );
     735    /////////////////////////////////
     736
     737    get_cycle( &date );
     738    LOAD_START[cid][lid] = (unsigned int)date;
     739
     740    // all threads copy relevant part of the image_in to buf_in[cid]
     741    memcpy( buf_in[cid] + (lid * pixels_per_lid),
     742            image_in + (cid * pixels_per_cid) + (lid * pixels_per_lid),
     743            pixels_per_lid );
     744
     745#if VERBOSE_EXEC
     746printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in[%d]\n",
     747tid , cxy , lpid , cid );
     748#endif
     749
     750    // all local threads copy part of buf_in[cid] to FBF window for display
     751    memcpy( wbuf + (cid * pixels_per_cid) + (lid * pixels_per_lid),
     752            buf_in[cid] + (lid * pixels_per_lid),
     753            pixels_per_lid );
     754
     755#if  VERBOSE_EXEC
     756printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in to FBF (first %d / last %d)\n",
     757tid , cxy , lpid , line_first , line_last );
     758#endif
     759
     760    // retresh window
     761    error = fbf_refresh_window( wid , line_first , line_last );
     762
     763    if( error )
     764    {
     765        printf("\n[transpose error] exec[%d] cannot refresh FBF window\n", tid );
     766        exit( 0 );
     767    }
     768
    702769    get_cycle( &date );
    703770    LOAD_END[cid][lid] = (unsigned int)date;
     
    705772    /////////////////////////////////
    706773    pthread_barrier_wait( &barrier );
     774    /////////////////////////////////
    707775
    708776    get_cycle( &date );
    709777    TRSP_START[cid][lid] = (unsigned int)date;
    710778
    711     // All threads contribute to parallel transpose from buf_in to buf_out
     779    // All threads contribute to parallel transpose from buf_in to buf_out:
    712780    // each thread makes the transposition for nlt lines (nlt = npixels/nthreads)
    713781    // from line [tid*nlt] to line [(tid + 1)*nlt - 1]
    714782    // (p,l) are the absolute pixel coordinates in the source image
    715     // (l,p) are the absolute pixel coordinates in the source image
    716     // (p,l) are the absolute pixel coordinates in the dest image
    717 
    718     get_cycle( &date );
    719     TRSP_START[cid][lid] = (unsigned int)date;
     783    // (l,p) are the absolute pixel coordinates in the dest image
    720784
    721785    unsigned int nlt   = IMAGE_SIZE / nthreads;    // number of lines per thread
     
    729793    unsigned char byte;
    730794
    731     unsigned int first = tid * nlt;     // first line index for a given thread
     795    unsigned int first = tid * nlt;        // first line index for a given thread
    732796    unsigned int last  = first + nlt;      // last line index for a given thread
    733797
     
    742806            src_index = (l % nlc) * IMAGE_SIZE + p;
    743807
    744             byte        = buf_in_ptr[src_cid][src_index];
     808            byte = buf_in[src_cid][src_index];
    745809
    746810            // write one byte to remote buf_out
     
    748812            dst_index = (p % nlc) * IMAGE_SIZE + l;
    749813
    750             buf_out_ptr[dst_cid][dst_index] = byte;
     814            buf_out[dst_cid][dst_index] = byte;
    751815        }
    752816    }
     
    762826    /////////////////////////////////
    763827    pthread_barrier_wait( &barrier );
     828    /////////////////////////////////
    764829
    765830    get_cycle( &date );
    766     DISP_START[cid][lid] = (unsigned int)date;
    767 
    768     // All threads contribute to parallel display
    769     // from local buf_out to frame buffer
    770     unsigned int  npt   = npixels / nthreads;   // number of pixels per thread
    771 
    772     if( fbf_write( &buf_out_ptr[cid][lid * npt],
    773                    npt,
    774                    npt * tid ) )
    775     {
    776         printf("\n[transpose error] thread[%d] cannot access FBF\n", tid );
    777         pthread_exit( &THREAD_EXIT_FAILURE );
    778     }
    779 
    780 #if VERBOSE_EXEC
    781 printf("\n[transpose] exec[%d] on core [%x,%d] completes display\n",
    782 tid, cxy , lpid );
    783 #endif
    784 
    785     get_cycle( &date );
    786     DISP_END[cid][lid] = (unsigned int)date;
    787 
    788     /////////////////////////////////
    789     pthread_barrier_wait( &barrier );
    790 
    791 #if SAVE_RESULT_IMAGE
    792 
    793     // Each thread[cid,0] copy buf_out to relevant part of image_out
    794     if( lid == 0 )
    795     {
    796         memcpy( image_out + offset,
    797                 buf_out,
    798                 buf_size );
    799     }
     831    SAVE_START[cid][lid] = (unsigned int)date;
     832
     833    // each local threads copy part of buf_out[cid] to FBF window for display
     834    memcpy( wbuf + (cid * pixels_per_cid) + (lid * pixels_per_lid),
     835            buf_out[cid] + (lid * pixels_per_lid),
     836            pixels_per_lid );
     837
     838#if  VERBOSE_EXEC
     839printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_out to FBF (first %d / last %d)\n",
     840tid , cxy , lpid , line_first , line_last );
     841#endif
     842
     843    // refresh window
     844    error = fbf_refresh_window( wid , line_first , line_last );
     845
     846    if( error )
     847    {
     848        printf("\n[transpose error] exec[%d] cannot refresh FBF window\n", tid );
     849        exit( 0 );
     850    }
     851
     852    // each local thread copy relevant part of buf_out to image_out
     853    memcpy( image_out + (cid * pixels_per_cid) + (lid * pixels_per_lid),
     854            buf_out[cid] + (lid * pixels_per_lid),
     855            pixels_per_lid );
    800856
    801857#if VERBOSE_EXEC
     
    804860#endif
    805861
    806 #endif
    807 
    808     // Each thread[cid,0] releases local buffer buf_out
     862    get_cycle( &date );
     863    SAVE_END[cid][lid] = (unsigned int)date;
     864
     865    /////////////////////////////////
     866    pthread_barrier_wait( &barrier );
     867    /////////////////////////////////
     868
     869    get_cycle( &date );
     870    FREE_START[cid][lid] = (unsigned int)date;
     871
     872    // Each thread[cid,0] release local buffers buf_in & buf_out
     873
    809874    if( lid == 0 )
    810875    {
    811         // release buf_out
    812         free( buf_in );
    813         free( buf_out );
    814     }
     876        // release local buffers
     877        free( buf_in[cid] );
     878        free( buf_out[cid] );
     879
     880#if VERBOSE_EXEC
     881printf("\n[transpose] exec[%d] on core[%x,%d] released buf_in & buf_out\n",
     882tid , cxy , lpid );
     883#endif
     884
     885    }
     886
     887    get_cycle( &date );
     888    FREE_END[cid][lid] = (unsigned int)date;
     889
     890    /////////////////////////////////
     891    pthread_barrier_wait( &barrier );
     892    /////////////////////////////////
    815893   
    816894    // thread termination depends on the placement policy
     
    829907        // <work> threads are running in attached mode
    830908        // each thread, but de main, simply exit
    831         if ( tid != tid_main )  pthread_exit( &THREAD_EXIT_SUCCESS );
     909        if ( tid != tid_main ) 
     910        {
     911
     912#if VERBOSE_EXEC
     913printf("\n[transpose] exec[%d] on core[%x,%d] exit\n",
     914tid , cxy , lpid );
     915#endif
     916            pthread_exit( &THREAD_EXIT_SUCCESS );
     917        }
    832918    }
    833919
     
    838924
    839925
    840 ///////////////////////////
     926//////////////////////////
    841927void instrument( FILE * f,
    842928                 char * filename )
    843929{
    844     unsigned int x, y, l;
    845 
    846 #if VERBOSE_EXEC
    847 printf("\n[transpose] main enters instrument\n" );
    848 #endif
    849 
     930    unsigned int cid;
     931    unsigned int l;
     932
     933    unsigned int min_aloc_start = 0xFFFFFFFF;
     934    unsigned int max_aloc_start = 0;
     935    unsigned int min_aloc_ended = 0xFFFFFFFF;
     936    unsigned int max_aloc_ended = 0;
    850937    unsigned int min_load_start = 0xFFFFFFFF;
    851938    unsigned int max_load_start = 0;
     
    856943    unsigned int min_trsp_ended = 0xFFFFFFFF;
    857944    unsigned int max_trsp_ended = 0;
    858     unsigned int min_disp_start = 0xFFFFFFFF;
    859     unsigned int max_disp_start = 0;
    860     unsigned int min_disp_ended = 0xFFFFFFFF;
    861     unsigned int max_disp_ended = 0;
     945    unsigned int min_save_start = 0xFFFFFFFF;
     946    unsigned int max_save_start = 0;
     947    unsigned int min_save_ended = 0xFFFFFFFF;
     948    unsigned int max_save_ended = 0;
     949    unsigned int min_free_start = 0xFFFFFFFF;
     950    unsigned int max_free_start = 0;
     951    unsigned int min_free_ended = 0xFFFFFFFF;
     952    unsigned int max_free_ended = 0;
    862953 
    863     for (x = 0; x < x_size; x++)
    864     {
    865         for (y = 0; y < y_size; y++)
     954    for (cid = 0; cid < (x_size * y_size) ; cid++)
     955    {
     956        for ( l = 0 ; l < ncores ; l++ )
    866957        {
    867             unsigned int cid = y_size * x + y;
    868 
    869             for ( l = 0 ; l < ncores ; l++ )
    870             {
    871                 if (LOAD_START[cid][l] < min_load_start)  min_load_start = LOAD_START[cid][l];
    872                 if (LOAD_START[cid][l] > max_load_start)  max_load_start = LOAD_START[cid][l];
    873                 if (LOAD_END[cid][l]   < min_load_ended)  min_load_ended = LOAD_END[cid][l];
    874                 if (LOAD_END[cid][l]   > max_load_ended)  max_load_ended = LOAD_END[cid][l];
    875                 if (TRSP_START[cid][l] < min_trsp_start)  min_trsp_start = TRSP_START[cid][l];
    876                 if (TRSP_START[cid][l] > max_trsp_start)  max_trsp_start = TRSP_START[cid][l];
    877                 if (TRSP_END[cid][l]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cid][l];
    878                 if (TRSP_END[cid][l]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cid][l];
    879                 if (DISP_START[cid][l] < min_disp_start)  min_disp_start = DISP_START[cid][l];
    880                 if (DISP_START[cid][l] > max_disp_start)  max_disp_start = DISP_START[cid][l];
    881                 if (DISP_END[cid][l]   < min_disp_ended)  min_disp_ended = DISP_END[cid][l];
    882                 if (DISP_END[cid][l]   > max_disp_ended)  max_disp_ended = DISP_END[cid][l];
    883             }
     958            if (ALOC_START[cid][l] < min_aloc_start)  min_aloc_start = ALOC_START[cid][l];
     959            if (ALOC_START[cid][l] > max_aloc_start)  max_aloc_start = ALOC_START[cid][l];
     960            if (ALOC_END[cid][l]   < min_aloc_ended)  min_aloc_ended = ALOC_END[cid][l];
     961            if (ALOC_END[cid][l]   > max_aloc_ended)  max_aloc_ended = ALOC_END[cid][l];
     962            if (LOAD_START[cid][l] < min_load_start)  min_load_start = LOAD_START[cid][l];
     963            if (LOAD_START[cid][l] > max_load_start)  max_load_start = LOAD_START[cid][l];
     964            if (LOAD_END[cid][l]   < min_load_ended)  min_load_ended = LOAD_END[cid][l];
     965            if (LOAD_END[cid][l]   > max_load_ended)  max_load_ended = LOAD_END[cid][l];
     966            if (TRSP_START[cid][l] < min_trsp_start)  min_trsp_start = TRSP_START[cid][l];
     967            if (TRSP_START[cid][l] > max_trsp_start)  max_trsp_start = TRSP_START[cid][l];
     968            if (TRSP_END[cid][l]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cid][l];
     969            if (TRSP_END[cid][l]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cid][l];
     970            if (SAVE_START[cid][l] < min_save_start)  min_save_start = SAVE_START[cid][l];
     971            if (SAVE_START[cid][l] > max_save_start)  max_save_start = SAVE_START[cid][l];
     972            if (SAVE_END[cid][l]   < min_save_ended)  min_save_ended = SAVE_END[cid][l];
     973            if (SAVE_END[cid][l]   > max_save_ended)  max_save_ended = SAVE_END[cid][l];
     974            if (FREE_START[cid][l] < min_free_start)  min_free_start = FREE_START[cid][l];
     975            if (FREE_START[cid][l] > max_free_start)  max_free_start = FREE_START[cid][l];
     976            if (FREE_END[cid][l]   < min_free_ended)  min_free_ended = FREE_END[cid][l];
     977            if (FREE_END[cid][l]   > max_free_ended)  max_free_ended = FREE_END[cid][l];
    884978        }
    885979    }
     
    887981    printf( "\n ------ %s ------\n" , filename );
    888982    fprintf( f , "\n ------ %s ------\n" , filename );
     983
     984    printf( " - ALOC_START : min = %d / max = %d / delta = %d\n",
     985           min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start );
     986    fprintf( f , " - ALOC_START : min = %d / max = %d / delta = %d\n",
     987           min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start );
     988
     989    printf( " - ALOC_END   : min = %d / max = %d / delta = %d\n",
     990           min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start );
     991    fprintf( f , " - ALOC_END   : min = %d / max = %d / delta = %d\n",
     992           min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start );
    889993
    890994    printf( " - LOAD_START : min = %d / max = %d / delta = %d\n",
     
    9081012           min_trsp_ended, max_trsp_ended, max_trsp_ended-min_trsp_ended );
    9091013
    910     printf( " - DISP_START : min = %d / max = %d / delta = %d\n",
    911            min_disp_start, max_disp_start, max_disp_start-min_disp_start );
    912     fprintf( f , " - DISP_START : min = %d / max = %d / delta = %d\n",
    913            min_disp_start, max_disp_start, max_disp_start-min_disp_start );
    914 
    915     printf( " - DISP_END   : min = %d / max = %d / delta = %d\n",
    916            min_disp_ended, max_disp_ended, max_disp_ended-min_disp_ended );
    917     fprintf( f , " - DISP_END   : min = %d / max = %d / delta = %d\n",
    918            min_disp_ended, max_disp_ended, max_disp_ended-min_disp_ended );
    919 
    920     printf( "\n   Sequencial = %d / Parallel = %d\n", SEQUENCIAL_TIME, PARALLEL_TIME );
    921     fprintf( f , "\n   Sequencial = %d / Parallel = %d\n", SEQUENCIAL_TIME, PARALLEL_TIME );
    922 
     1014    printf( " - SAVE_START : min = %d / max = %d / delta = %d\n",
     1015           min_save_start, max_save_start, max_save_start-min_save_start );
     1016    fprintf( f , " - SAVE_START : min = %d / max = %d / delta = %d\n",
     1017           min_save_start, max_save_start, max_save_start-min_save_start );
     1018
     1019    printf( " - SAVE_END   : min = %d / max = %d / delta = %d\n",
     1020           min_save_ended, max_save_ended, max_save_ended-min_save_ended );
     1021    fprintf( f , " - SAVE_END   : min = %d / max = %d / delta = %d\n",
     1022           min_save_ended, max_save_ended, max_save_ended-min_save_ended );
     1023
     1024    printf( " - FREE_START : min = %d / max = %d / delta = %d\n",
     1025           min_free_start, max_free_start, max_free_start-min_free_start );
     1026    fprintf( f , " - FREE_START : min = %d / max = %d / delta = %d\n",
     1027           min_free_start, max_free_start, max_free_start-min_free_start );
     1028
     1029    printf( " - FREE_END   : min = %d / max = %d / delta = %d\n",
     1030           min_free_start, max_free_start, max_free_start-min_free_start );
     1031    fprintf( f , " - FREE_END   : min = %d / max = %d / delta = %d\n",
     1032           min_free_start, max_free_start, max_free_start-min_free_start );
     1033
     1034
     1035    printf( "\n   Sequencial %d"
     1036            "\n   Parallel   %d"
     1037            "\n   Alloc      %d"
     1038            "\n   Load       %d"
     1039            "\n   Transpose  %d"
     1040            "\n   Save       %d"
     1041            "\n   Free       %d\n" ,
     1042            SEQUENCIAL_TIME / 1000 ,
     1043            PARALLEL_TIME / 1000 ,
     1044            (max_aloc_ended - min_aloc_start) / 1000 ,
     1045            (max_load_ended - min_load_start) / 1000 ,
     1046            (max_trsp_ended - min_trsp_start) / 1000 ,
     1047            (max_save_ended - min_save_start) / 1000 ,
     1048            (max_free_ended - min_free_start) / 1000 );
     1049
     1050    fprintf( f , "\n   Sequencial %d"
     1051            "\n   Parallel   %d"
     1052            "\n   Alloc      %d"
     1053            "\n   Load       %d"
     1054            "\n   Transpose  %d"
     1055            "\n   Save       %d"
     1056            "\n   Free       %d\n" ,
     1057            SEQUENCIAL_TIME / 1000 ,
     1058            PARALLEL_TIME / 1000 ,
     1059            (max_aloc_ended - min_aloc_start) / 1000 ,
     1060            (max_load_ended - min_load_start) / 1000 ,
     1061            (max_trsp_ended - min_trsp_start) / 1000 ,
     1062            (max_save_ended - min_save_start) / 1000 ,
     1063            (max_free_ended - min_free_start) / 1000 );
    9231064}  // end instrument()
    9241065
Note: See TracChangeset for help on using the changeset viewer.