Ignore:
Timestamp:
Feb 12, 2014, 9:51:23 AM (10 years ago)
Author:
alain
Message:
  • Updatre the gier_tsar to support the vci_iopic component in the tsar_generic_leti plat-form.
  • Modify the soft_transpose_giet application to make optional the graphic display on frame buffer and to introduce a systematic auto-check
Location:
trunk/softs/soft_transpose_giet
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/softs/soft_transpose_giet/ldscript

    r623 r629  
    1010peripherals are not present in the architecture */
    1111
    12 seg_reset_base  = 0x10000000;       /* le code de boot */
     12seg_reset_base  = 0x00000000;       /* boot code */
    1313
    14 seg_kcode_base  = 0x00001000;       /* le code du système */
    15 seg_kdata_base  = 0x00010000;       /* les donnees du système */
    16 seg_kunc_base   = 0x00020000;       /* les données non cachées du système */
     14seg_kcode_base  = 0x00010000;       /* kernel code */
     15seg_kdata_base  = 0x00020000;       /* kernel cacheable data */
     16seg_kunc_base   = 0x00030000;       /* kernel uncacheable data */
    1717
    18 seg_code_base   = 0x00030000;       /* le code utilisateur */
    19 seg_data_base   = 0x00040000;       /* les données utilisateur */
     18seg_code_base   = 0x00040000;       /* application code */
     19seg_data_base   = 0x00050000;       /* application data */
    2020
    21 seg_heap_base   = 0x00100000;       /* le tas utilisateur */
    22 seg_stack_base  = 0x00400000;       /* la pile utilisateur */
     21seg_heap_base   = 0x00100000;       /* heaps for application tasks */
     22seg_stack_base  = 0x00300000;       /* stacks */
    2323
    24 seg_xcu_base    = 0xF0000000;       /* controleur XCU */
    25 seg_dma_base    = 0xF1000000;       /* controleur DMA */
    26 seg_tty_base    = 0xF2000000;       /* controleur TTY */
    27 seg_fbf_base    = 0xF3000000;       /* controleur FBF */
    28 seg_ioc_base    = 0xF4000000;       /* controleur IOC */
     24seg_xcu_base    = 0xF0000000;       /* controler XCU */
     25seg_tty_base    = 0xF4000000;       /* controler TTY */
     26seg_fbf_base    = 0xF3000000;       /* controler FBF */
     27seg_ioc_base    = 0xF2000000;       /* controler IOC */
     28seg_nic_base    = 0xF7000000;       /* controler NIC */
     29seg_cma_base    = 0xF8000000;       /* controler CMA */
     30seg_pic_base    = 0xF9000000;       /* controler PIC */
     31seg_mmc_base    = 0xE0000000;       /* config    MMC */
    2932
    3033
  • trunk/softs/soft_transpose_giet/main.c

    r248 r629  
    11
     2#include "hard_config.h"
    23#include "stdio.h"
    34#include "limits.h"
    45#include "../giet_tsar/block_device.h"
    56
    6 #define NL              512
    7 #define NP              512
    8 #define NB_IMAGES       1
    9 #define NB_CLUSTER_MAX  256
    10 
    11 #define PRINTF(...)      ({ if (proc_id == 0) { tty_printf(__VA_ARGS__); } })
    12 
    13 //#define DISPLAY_ONLY
    14 
    15 ///////////////////////////////////////////
     7#define NL              128
     8#define NP              128
     9#define NB_IMAGES       5
     10
     11#define PRINTF(...)      ({ if (lpid == 0) { _tty_printf(__VA_ARGS__); } })
     12
     13#define DISPLAY_OK
     14
    1615// tricks to read parameters from ldscript
    17 ///////////////////////////////////////////
    18 
    19 struct plaf;
    20 
    21 extern struct plouf seg_ioc_base;
     16extern struct plaf seg_ioc_base;
    2217extern struct plaf seg_heap_base;
    23 extern struct plaf NB_PROCS;
    24 extern struct plaf NB_CLUSTERS;
     18
     19// global variables stored in seg_data (cluster 0)
     20
     21// instrumentation counters for each processor
     22unsigned int LOAD_START[256][4];
     23unsigned int LOAD_END  [256][4];
     24unsigned int TRSP_START[256][4];
     25unsigned int TRSP_END  [256][4];
     26unsigned int DISP_START[256][4];
     27unsigned int DISP_END  [256][4];
     28
     29// checksum variables
     30unsigned check_line_before[NL];
     31unsigned check_line_after[NL];
    2532
    2633/////////////
    27 void main(){
    28    unsigned int frame = 0;
    29    unsigned int date  = 0;
    30 
    31    unsigned int c; // cluster index for loops
    32    unsigned int l; // line index for loops
    33    unsigned int p; // pixel index for loops
    34 
    35    unsigned int proc_id       = procid();                      // processor id
    36    unsigned int nlocal_procs  = (unsigned int) &NB_PROCS;      // number of processors per cluster
    37    unsigned int nclusters     = (unsigned int) &NB_CLUSTERS;   // number of clusters
    38    unsigned int local_id      = proc_id % nlocal_procs;        // local processor id
    39    unsigned int cluster_id    = proc_id / nlocal_procs;        // cluster id
    40    unsigned int base          = (unsigned int) &seg_heap_base; // base address for shared buffers
    41    unsigned int increment     = 0x80000000 / nclusters * 2;    // cluster increment
    42    unsigned int nglobal_procs = nclusters * nlocal_procs;      // number of tasks
    43    unsigned int npixels       = NP * NL;                       // number of pixel per frame
    44    
    45    unsigned int * ioc_address = (unsigned int *) &seg_ioc_base;
    46    unsigned int block_size    = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
    47    unsigned int nblocks       = npixels / block_size;   // number of blocks per frame
    48 
    49    PRINTF("\n *** Entering main at cycle %d ***\n\n", proctime());
     34void main()
     35{
     36    unsigned int image = 0;
     37
     38    unsigned int l;                                             // line index for loops
     39    unsigned int p;                                             // pixel index for loops
     40
     41    unsigned int * ioc_address = (unsigned int *) &seg_ioc_base;
     42    unsigned int block_size    = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
     43
     44    unsigned int proc_id     = _procid();                       // processor id
     45    unsigned int nclusters   = X_SIZE*Y_SIZE;                   // number of clusters
     46    unsigned int lpid        = proc_id % NB_PROCS_MAX;          // local processor id
     47    unsigned int cluster_xy  = proc_id / NB_PROCS_MAX;          // cluster index (8 bits format)
     48    unsigned int x           = cluster_xy >> Y_WIDTH;           // x coordinate
     49    unsigned int y           = cluster_xy & ((1<<Y_WIDTH)-1);   // y coordinate
     50    unsigned int ntasks      = nclusters * NB_PROCS_MAX;        // number of tasks
     51    unsigned int npixels     = NP * NL;                         // number of pixel per image
     52    unsigned int nblocks     = npixels / block_size;            // number of blocks per image
     53
     54    // task_id is a "continuous" index for the the task running on processor (x,y,lpid)
     55    unsigned int task_id = (((x * Y_SIZE) + y) * NB_PROCS_MAX) + lpid;
     56
     57    // cluster_id is a "continuous" index for cluster(x,y)
     58    unsigned int cluster_id  = (x * Y_SIZE) + y;               
     59
     60    PRINTF("\n *** Proc 0 in cluster [%d,%d] enters main at cycle %d ***\n\n",
     61           x, y, _proctime());
    5062
    5163   //  parameters checking
    52    if ((nlocal_procs != 1) && (nlocal_procs != 2) && (nlocal_procs != 4)){
    53       PRINTF("NB_PROCS must be 1, 2 or 4\n");
    54       exit(1);
     64   if ((NB_PROCS_MAX != 1) && (NB_PROCS_MAX != 2) && (NB_PROCS_MAX != 4))
     65   {
     66      PRINTF("NB_PROCS_MAX must be 1, 2 or 4\n");
     67      _exit();
    5568   }
    5669   if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && (nclusters != 8) &&
    5770         (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters != 128) &&
    58          (nclusters != 256)){
     71         (nclusters != 256))
     72   {
    5973      PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n");
    60       exit(1);
     74      _exit();
    6175   }
    62    if (nglobal_procs > 1024){
    63       PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n");
    64       exit(1);
    65    }
    66    if (proc_id >= nglobal_procs){
    67       PRINTF("processor id %d larger than NB_CLUSTERS*NB_PROCS\n", proc_id);
    68       exit(1);
    69    }
    70 
    71    // Arrays of pointers on the shared, distributed buffers containing the frames
    72    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
    73    unsigned char * A[NB_CLUSTER_MAX];
    74    unsigned char * B[NB_CLUSTER_MAX];
    75 
    76    // Arrays of pointers on the instrumentation arrays
    77    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
    78    // each pointer points on the base adress of an array of NPROCS unsigned int
    79    unsigned int * LOAD_START[NB_CLUSTER_MAX];
    80    unsigned int * LOAD_END[NB_CLUSTER_MAX];
    81    unsigned int * TRSP_START[NB_CLUSTER_MAX];
    82    unsigned int * TRSP_END[NB_CLUSTER_MAX];
    83    unsigned int * DISP_START[NB_CLUSTER_MAX];
    84    unsigned int * DISP_END[NB_CLUSTER_MAX];
    85 
    86    // shared buffers address definition
    87    // from the seg_heap_base and increment depending on the cluster index
    88    // These arrays of pointers are identical and replicated in the stack of each task
    89    for (c = 0; c < nclusters; c++){
    90       A[c]          = (unsigned char *) (base                                  + increment * c);
    91       B[c]          = (unsigned char *) (base +     npixels                    + increment * c);
    92       LOAD_START[c] = (unsigned int *)  (base + 2 * npixels                    + increment * c);
    93       LOAD_END[c]   = (unsigned int *)  (base + 2 * npixels +     nlocal_procs + increment * c);
    94       TRSP_START[c] = (unsigned int *)  (base + 2 * npixels + 2 * nlocal_procs + increment * c);
    95       TRSP_END[c]   = (unsigned int *)  (base + 2 * npixels + 3 * nlocal_procs + increment * c);
    96       DISP_START[c] = (unsigned int *)  (base + 2 * npixels + 4 * nlocal_procs + increment * c);
    97       DISP_END[c]   = (unsigned int *)  (base + 2 * npixels + 5 * nlocal_procs + increment * c);
    98    }
     76
     77   // pointers on the distributed buffers containing the images,
     78   // allocated in the heap segment: each buffer contains 256 Kbytes
     79   unsigned char* buf_in  = (unsigned char*)&seg_heap_base;
     80   unsigned char* buf_out = buf_in + 0x00100000;
    9981
    10082   PRINTF("NB_CLUSTERS     = %d\n", nclusters);
    101    PRINTF("NB_LOCAL_PROCS  = %d\n", nlocal_procs);
    102    PRINTF("NB_GLOBAL_PROCS = %d\n", nglobal_procs);
     83   PRINTF("NB_LOCAL_PROCS  = %d\n", NB_PROCS_MAX);
     84   PRINTF("NB_TASKS        = %d\n", ntasks);
    10385   PRINTF("NB_PIXELS       = %d\n", npixels);
    10486   PRINTF("BLOCK_SIZE      = %d\n", block_size);
    10587   PRINTF("NB_BLOCKS       = %d\n\n", nblocks);
    10688
    107 
    108    PRINTF("*** Starting barrier init at cycle %d ***\n", proctime());
     89   PRINTF("*** Proc 0 in cluster [%d,%d] starts barrier init at cycle %d\n",
     90          x, y, _proctime());
    10991
    11092   //  barriers initialization
    111    barrier_init(0, nglobal_procs);
    112    barrier_init(1, nglobal_procs);
    113    barrier_init(2, nglobal_procs);
    114 
    115    PRINTF("*** Completing barrier init at cycle %d ***\n", proctime());
    116 
    117    // Main loop (on frames)
    118    while (frame < NB_IMAGES){
    119       // pseudo parallel load from disk to A[c] buffer : nblocks/nclusters blocks
    120       // only task running on processor with (local_id == 0) does it
    121 
    122       if (local_id == 0){
    123          int p;
    124 
    125          date = proctime();
    126          PRINTF("\n*** Starting load for frame %d at cycle %d\n", frame, date);
     93   _barrier_init(0, ntasks);
     94   _barrier_init(1, ntasks);
     95   _barrier_init(2, ntasks);
     96   _barrier_init(3, ntasks);
     97
     98   PRINTF("*** Proc 0 in cluster [%d,%d] completes barrier init at cycle %d\n",
     99          x, y, _proctime());
     100
     101   // Main loop (on images)
     102   while (image < NB_IMAGES)
     103   {
     104      // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks
     105      // only task running on processor with (lpid == 0) does it
     106
     107      LOAD_START[cluster_id][lpid] = _proctime();
     108
     109      if (lpid == 0)
     110      {
     111         _ioc_read( ((image * nblocks) + ((nblocks * cluster_id) / nclusters)),
     112                    buf_in,
     113                    (nblocks / nclusters),
     114                    cluster_xy );
     115
     116         PRINTF("\n*** Proc 0 in cluster [%d,%d] starts load for image %d at cycle %d\n",
     117                x, y, image, _proctime() );
     118
     119         _ioc_completed();
     120
     121         PRINTF("*** Proc 0 in cluster [%d,%d] completes load for image %d at cycle %d\n",
     122                x, y, image, _proctime() );
     123      }
     124
     125      LOAD_END[cluster_id][lpid] = _proctime();
     126
     127      _barrier_wait(0);
     128
     129      // parallel transpose from buf_in to buf_out buffers
     130      // each processor makes the transposition for (NL/ntasks) lines
     131      // (p,l) are the pixel coordinates in the source image
     132
     133      PRINTF("\n*** proc 0 in cluster [%d,%d] starts transpose for image %d at cycle %d\n",
     134             x, y, image, _proctime());
     135
     136      TRSP_START[cluster_id][lpid] = _proctime();
     137
     138      unsigned int nlt   = NL / ntasks;                // number of lines per processor
     139      unsigned int first = task_id * nlt;              // first line index
     140      unsigned int last  = first + nlt;                // last line index
     141      unsigned int nlines_clusters = NL / nclusters;   // number of lines per cluster
     142      unsigned int npix_clusters   = NP / nclusters;   // number of pixels per cluster
     143
     144      unsigned int src_cluster;
     145      unsigned int src_index;
     146      unsigned int dst_cluster;
     147      unsigned int dst_index;
     148
     149      unsigned int word;
     150
     151      for (l = first; l < last; l++)
     152      {
     153         PRINTF("    - processing line %d\n", l);
     154
     155         check_line_before[l] = 0;
    127156         
    128          for (p = 0; p < nlocal_procs; p++){
    129             LOAD_START[cluster_id][p] = date;
    130          }
    131          if (ioc_read(frame * nblocks + nblocks * cluster_id / nclusters, A[cluster_id], nblocks / nclusters)){
    132             PRINTF("echec ioc_read\n");
    133             exit();
    134          }
    135          if (ioc_completed()){
    136             PRINTF("echec ioc_completed\n");
    137             exit();
    138          }
    139 
    140          date = proctime();
    141          PRINTF("*** Completing load for frame %d at cycle %d\n", frame, date);
    142          for (p = 0; p < nlocal_procs; p++){
    143             LOAD_END[cluster_id][p] = date;
     157         // in each iteration we read one word an write four bytes
     158         for (p = 0 ; p < NP ; p = p+4)
     159         {
     160            // read one word, with extended address from local buffer
     161            src_cluster = cluster_xy;
     162            src_index   = (l % nlines_clusters) * NP + p;
     163            word = _word_extended_read( src_cluster,
     164                                        (unsigned int)&buf_in[src_index] );
     165
     166            unsigned char byte0 = (unsigned char)( word      & 0x000000FF);
     167            unsigned char byte1 = (unsigned char)((word>>8)  & 0x000000FF);
     168            unsigned char byte2 = (unsigned char)((word>>16) & 0x000000FF);
     169            unsigned char byte3 = (unsigned char)((word>>24) & 0x000000FF);
     170
     171            // compute checksum
     172            check_line_before[l] = check_line_before[l] + byte0 + byte1 + byte2 + byte3;
     173
     174            // write four bytes with extended address to four remote buffers
     175            dst_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     176                           ((p / npix_clusters) % Y_SIZE);
     177            dst_index   = (p % npix_clusters) * NL + l;
     178            _byte_extended_write( dst_cluster,
     179                                  (unsigned int)&buf_out[dst_index],
     180                                  byte0 );
     181
     182            dst_cluster = ((((p+1) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     183                           (((p+1) / npix_clusters) % Y_SIZE);
     184            dst_index   = ((p+1) % npix_clusters) * NL + l;
     185            _byte_extended_write( dst_cluster,
     186                                  (unsigned int)&buf_out[dst_index],
     187                                  byte1 );
     188
     189            dst_cluster = ((((p+2) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     190                           (((p+2) / npix_clusters) % Y_SIZE);
     191            dst_index   = ((p+2) % npix_clusters) * NL + l;
     192            _byte_extended_write( dst_cluster,
     193                                  (unsigned int)&buf_out[dst_index],
     194                                  byte2 );
     195
     196            dst_cluster = ((((p+3) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     197                           (((p+3) / npix_clusters) % Y_SIZE);
     198            dst_index   = ((p+3) % npix_clusters) * NL + l;
     199            _byte_extended_write( dst_cluster,
     200                                  (unsigned int)&buf_out[dst_index],
     201                                  byte3 );
    144202         }
    145203      }
    146204
    147       barrier_wait(0);
    148 
    149       // parallel transpose from A to B buffers
    150       // each processor makes the transposition for (NL/nglobal_procs) lines
    151       // (p,l) are the (x,y) pixel coordinates in the source frame
    152 
    153 #ifndef DISPLAY_ONLY
    154       date = proctime();
    155       PRINTF("\n*** Starting transpose for frame %d at cycle %d\n", frame, date);
    156       TRSP_START[cluster_id][local_id] = date;
    157 
    158       unsigned int nlt   = NL / nglobal_procs; // Nombre de ligne à traiter par processeur
    159       unsigned int first = proc_id * nlt;      // Index de la premiÚre ligne à traiter pour le proc courant (celui qui exécute le code)
    160       unsigned int last  = first + nlt;        // Index de la derniÚre ligne
    161       unsigned int nlines_clusters = NL / nclusters; // Nombre de lignes à traiter par cluster
    162       unsigned int npix_clusters   = NP / nclusters; // Nombre de pixels par ligne à traiter par cluster
    163 
    164       for (l = first; l < last; l++){
    165          PRINTF("    - processing line %d\n", l);
    166          for (p = 0; p < NP; p++){
    167             unsigned int source_index   = (l % nlines_clusters) * NP + p;
    168             unsigned int dest_cluster   = p / npix_clusters;
    169             unsigned int dest_index     = (p % npix_clusters) * NL + l;
    170             B[dest_cluster][dest_index] = A[cluster_id][source_index];
     205      PRINTF("*** proc 0 in cluster [%d,%d] complete transpose for image %d at cycle %d\n",
     206             x, y, image, _proctime() );
     207
     208      TRSP_END[cluster_id][lpid] = _proctime();
     209
     210      _barrier_wait(1);
     211
     212      // optional parallel display from local buf_out to frame buffer
     213
     214#ifdef DISPLAY_OK
     215
     216      PRINTF("\n*** proc 0 in cluster [%d,%d] starts display for image %d at cycle %d\n",
     217             x, y, image, _proctime() );
     218
     219      DISP_START[cluster_id][lpid] = _proctime();
     220
     221      unsigned int npxt = npixels / ntasks;   // number of pixels per task
     222      unsigned int buffer = (unsigned int)buf_out + npxt*lpid;
     223
     224      _fb_sync_write( npxt * task_id, buffer, npxt, cluster_xy );
     225
     226      PRINTF("*** Proc 0 in cluster [%d,%d] completes display for image %d at cycle %d\n",
     227             x, y, image, _proctime() );
     228
     229      DISP_END[cluster_id][lpid] = _proctime();
     230
     231      _barrier_wait(2);
     232
     233#endif
     234
     235      // Instrumentation and checksum (done by processor 0 in cluster 0)
     236      if (proc_id == 0)
     237      {
     238         PRINTF("\n*** Proc [0,0,0] starts checks for image %d at cycle %d\n\n",
     239                  image, _proctime() );
     240
     241         unsigned int success = 1;
     242
     243         for ( l = 0 ; l < NL ; l++ )
     244         {
     245            check_line_after[l] = 0;
     246
     247            for ( p = 0 ; p < NP ; p++ )
     248            {
     249               // read one byte in remote buffer
     250               src_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     251                             ((p / npix_clusters) % Y_SIZE);
     252               src_index   = (p % npix_clusters) * NL + l;
     253
     254               unsigned char byte = _byte_extended_read( src_cluster,
     255                                                         (unsigned int)&buf_out[src_index] );
     256
     257               check_line_after[l] = check_line_after[l] + byte;
     258            }
     259
     260            PRINTF(" - l = %d / before = %d / after = %d \n",
     261                   l, check_line_before[l], check_line_after[l] );
     262
     263            if ( check_line_before[l] != check_line_after[l] ) success = 0;
    171264         }
    172       }
    173 
    174       date = proctime();
    175       PRINTF("*** Completing transpose for frame %d at cycle %d\n", frame, date);
    176       TRSP_END[cluster_id][local_id] = date;
    177       barrier_wait(1);
    178 #endif
    179 
    180       // parallel display from B[c] to frame buffer
    181       // each processor uses its private dma to display NL*NP/nglobal_procs pixels
    182 
    183       date = proctime();
    184       PRINTF("\n*** Starting display for frame %d at cycle %d\n", frame, date);
    185       DISP_START[cluster_id][local_id] = date;
    186 
    187       unsigned int npxt = npixels / nglobal_procs;   // number of pixels per proc
    188 
    189 #ifndef DISPLAY_ONLY
    190       if (fb_write(npxt * proc_id, B[cluster_id] + npxt * local_id, npxt)){
    191          PRINTF("[%d]: echec fb_sync_write\n", proc_id);
    192          exit();
    193       }
    194 #else
    195       if (fb_write(npxt * proc_id, A[cluster_id] + npxt * local_id, npxt)){
    196          PRINTF("[%d]: echec fb_sync_write\n", proc_id);
    197          exit();
    198       }
    199 #endif
    200 
    201       if (fb_completed()){
    202          PRINTF("[%d]: echec fb_completed\n", proc_id);
    203          exit();
    204       }
    205 
    206       date = proctime();
    207       PRINTF("*** Completing display for frame %d at cycle %d\n", frame, date);
    208       DISP_END[cluster_id][local_id] = date;
    209 
    210       barrier_wait(2);
    211 
    212       // Instrumentation (done by processor 0 in cluster 0)
    213       if (local_id == 0){
    214          date = proctime();
    215          PRINTF("\n*** Starting Instrumentation for frame %d at cycle %d\n\n", frame, date);
     265
     266         if ( success ) PRINTF("\n*** proc [0,0,0] : CHECKSUM OK \n\n");
     267         else           PRINTF("\n*** proc [0,0,0] : CHECKSUM KO \n\n");
    216268
    217269         int cc, pp;
     
    229281         unsigned int max_disp_ended = 0;
    230282
    231          for (cc = 0; cc < nclusters; cc++){
    232             for (pp = 0; pp < nlocal_procs; pp++){
    233                if (LOAD_START[cc][pp] < min_load_start){
    234                   min_load_start = LOAD_START[cc][pp];
    235                }
    236                if (LOAD_START[cc][pp] > max_load_start){
    237                   max_load_start = LOAD_START[cc][pp];
    238                }
    239                if (LOAD_END[cc][pp] < min_load_ended){
    240                   min_load_ended = LOAD_END[cc][pp];
    241                }
    242                if (LOAD_END[cc][pp] > max_load_ended){
    243                   max_load_ended = LOAD_END[cc][pp];
    244                }
    245 
    246                if (TRSP_START[cc][pp] < min_trsp_start){
    247                   min_trsp_start = TRSP_START[cc][pp];
    248                }
    249                if (TRSP_START[cc][pp] > max_trsp_start){
    250                   max_trsp_start = TRSP_START[cc][pp];
    251                }
    252                if (TRSP_END[cc][pp] < min_trsp_ended){
    253                   min_trsp_ended = TRSP_END[cc][pp];
    254                }
    255                if (TRSP_END[cc][pp] > max_trsp_ended){
    256                   max_trsp_ended = TRSP_END[cc][pp];
    257                }
    258 
    259                if (DISP_START[cc][pp] < min_disp_start){
    260                   min_disp_start = DISP_START[cc][pp];
    261                }
    262                if (DISP_START[cc][pp] > max_disp_start){
    263                   max_disp_start = DISP_START[cc][pp];
    264                }
    265                if (DISP_END[cc][pp] < min_disp_ended){
    266                   min_disp_ended = DISP_END[cc][pp];
    267                }
    268                if (DISP_END[cc][pp] > max_disp_ended){
    269                   max_disp_ended = DISP_END[cc][pp];
    270                }
     283         for (cc = 0; cc < nclusters; cc++)
     284         {
     285            for (pp = 0; pp < NB_PROCS_MAX; pp++)
     286            {
     287               if (LOAD_START[cc][pp] < min_load_start)  min_load_start = LOAD_START[cc][pp];
     288               if (LOAD_START[cc][pp] > max_load_start)  max_load_start = LOAD_START[cc][pp];
     289               if (LOAD_END[cc][pp]   < min_load_ended)  min_load_ended = LOAD_END[cc][pp];
     290               if (LOAD_END[cc][pp]   > max_load_ended)  max_load_ended = LOAD_END[cc][pp];
     291               if (TRSP_START[cc][pp] < min_trsp_start)  min_trsp_start = TRSP_START[cc][pp];
     292               if (TRSP_START[cc][pp] > max_trsp_start)  max_trsp_start = TRSP_START[cc][pp];
     293               if (TRSP_END[cc][pp]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cc][pp];
     294               if (TRSP_END[cc][pp]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cc][pp];
     295               if (DISP_START[cc][pp] < min_disp_start)  min_disp_start = DISP_START[cc][pp];
     296               if (DISP_START[cc][pp] > max_disp_start)  max_disp_start = DISP_START[cc][pp];
     297               if (DISP_END[cc][pp]   < min_disp_ended)  min_disp_ended = DISP_END[cc][pp];
     298               if (DISP_END[cc][pp]   > max_disp_ended)  max_disp_ended = DISP_END[cc][pp];
    271299            }
    272300         }
    273301
    274302         PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n",
    275                min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start);
     303               min_load_start, max_load_start, (min_load_start+max_load_start)/2,
     304               max_load_start-min_load_start);
     305
    276306         PRINTF(" - LOAD_END   : min = %d / max = %d / med = %d / delta = %d\n",
    277                min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended);
     307               min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2,
     308               max_load_ended-min_load_ended);
    278309
    279310         PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n",
    280                min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start);
     311               min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2,
     312               max_trsp_start-min_trsp_start);
     313
    281314         PRINTF(" - TRSP_END   : min = %d / max = %d / med = %d / delta = %d\n",
    282                min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended);
     315               min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2,
     316               max_trsp_ended-min_trsp_ended);
    283317
    284318         PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n",
    285                min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start);
     319               min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2,
     320               max_disp_start-min_disp_start);
     321
    286322         PRINTF(" - DISP_END   : min = %d / max = %d / med = %d / delta = %d\n",
    287                min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended);
    288 
    289          PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended);
     323               min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2,
     324               max_disp_ended-min_disp_ended);
    290325      }
    291       frame++;
    292 
    293    } // end while frame     
    294 
    295    PRINTF("*** End of main ***\n");
    296 
    297    while(1);
     326
     327      image++;
     328
     329      _barrier_wait( 3 );
     330   } // end while image     
     331
     332
     333   _exit();
     334
    298335} // end main()
    299336
Note: See TracChangeset for help on using the changeset viewer.