Changeset 764 for soft


Ignore:
Timestamp:
Jan 19, 2016, 1:19:14 PM (8 years ago)
Author:
alain
Message:

Modify the transpose application to use replace the
giet_fat_read() / giet_fat_write() system calls
by the giet_far_mmap() system call, in order to
avoid one copy of data in user space.

The read/write version is still available as transpose_rw.c

Location:
soft/giet_vm/applications/transpose
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/transpose/transpose.c

    r712 r764  
    44// author : Alain Greiner
    55///////////////////////////////////////////////////////////////////////////////////////
    6 // This multi-threaded aplication transposes a raw image (one pbyte per pixel).
     6// This multi-threaded aplication read a raw image (one byte per pixel)
     7// stored on disk, transpose it, display the result on the frame buffer,
     8// and store the transposed image on disk.
     9// The input image can be interactively selected if the INTERACTIVE flag is set.
    710// It can run on a multi-processors, multi-clusters architecture, with one thread
    811// per processor, and uses the POSIX threads API.
    9 // It does not use the CMA to display the result image.
     12// It uses the giet_fat_mmap() to directly access the input and output files
     13// in the kernel files cache. It does not use the CMA to display the result image.
    1014//
    1115// The main() function can be launched on any processor P[x,y,l].
     
    1519// when the parallel execution is completed.
    1620//
    17 // The input and output buffers containing the image are distributed in clusters.
    18 //
    19 // The execute() function read a set of lines from an input file on disk,
    20 // to the local buffer buf_in[x][y], transpose it, write the result to a remote buffer
    21 // buf_out[x'][y'], display the content of the local buffer buf_out[x][y] to the
    22 // frame buffer, and store it on disk to another output file.
     21// The buf_in[x,y] and buf_out[put buffers containing the direct ans transposed images
     22// are distributed in clusters:
     23// In each cluster[x,y], the thread running on processor P[x,y,0] uses the giet_fat_mmap()
     24// function to map the buf_in[x,y] and buf_out[x,y] buffers containing a set of lines.
     25// Then, all threads in cluster[x,y] read pixels from the local buf_in[x,y] buffer, and
     26// write the pixels to the remote buf_out[x,y] buffers. Finally, each thread display
     27// a part of the transposed image to the frame buffer.
    2328//
    2429// - The image size must fit the frame buffer size.
     
    2631// - The number of clusters  must be a power of 2 no larger than 256.
    2732// - The number of processors per cluster must be a power of 2 no larger than 4.
     33// - The number of clusters cannot be larger than (image_size * image_size) / 4096,
     34//   because the size of buf_in[x,y] and buf_out[x,y] must be multiple of 4096.
     35//
     36// The transpose_rw.c file contains a variant that use the giet_fat_read()
     37// and giet_fat_write() system calls, to access the files.
    2838///////////////////////////////////////////////////////////////////////////////////////
    2939
     
    3343#include "malloc.h"
    3444
    35 #define BLOCK_SIZE            512                         // block size on disk
    36 #define X_MAX                 16                          // max number of clusters in row
    37 #define Y_MAX                 16                          // max number of clusters in column
    38 #define PROCS_MAX             4                           // max number of procs per cluster
    39 #define CLUSTER_MAX           (X_MAX * Y_MAX)             // max number of clusters
    40 #define IMAGE_SIZE            256                         // default image size
    41 #define INPUT_FILE_PATH       "/misc/lena_256.raw"        // default input file pathname
    42 #define OUTPUT_FILE_PATH      "/home/lena_transposed.raw" // default output file pathname
     45#define BLOCK_SIZE            512                          // block size on disk
     46#define X_MAX                 16                           // max number of clusters in row
     47#define Y_MAX                 16                           // max number of clusters in column
     48#define PROCS_MAX             4                            // max number of procs per cluster
     49#define CLUSTER_MAX           (X_MAX * Y_MAX)              // max number of clusters
     50#define IMAGE_SIZE            256                          // default image size
     51#define INPUT_FILE_PATH       "/misc/lena_256.raw"         // default input file pathname
     52#define OUTPUT_FILE_PATH      "/home/lena_transposed.raw"  // default output file pathname
     53#define INTERACTIVE           0                            // interactive capture of filenames
     54#define VERBOSE               0                            // print comments on TTY
     55
    4356
    4457// macro to use a shared TTY
     
    5265
    5366// instrumentation counters for each processor in each cluster
    54 unsigned int LOAD_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    55 unsigned int LOAD_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     67unsigned int MMAP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     68unsigned int MMAP_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    5669unsigned int TRSP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    5770unsigned int TRSP_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    5871unsigned int DISP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    5972unsigned int DISP_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    60 unsigned int STOR_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    61 unsigned int STOR_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    6273
    6374// arrays of pointers on distributed buffers
     
    6677unsigned char*  buf_out[CLUSTER_MAX];
    6778
    68 // checksum variables
    69 unsigned check_line_before[1024];
    70 unsigned check_line_after[1024];
    71 
    7279// lock protecting shared TTY
    7380user_lock_t  tty_lock;
     
    8087char          output_file_name[256];
    8188unsigned int  image_size;
     89
     90// input & output file descriptors
     91int  fd_in;
     92int  fd_out;
    8293
    8394////////////////////////////////////////////
     
    105116    unsigned int nthreads      = x_size * y_size * nprocs;      // number of threads
    106117    unsigned int npixels       = image_size * image_size;       // pixels per image
    107     int          fd_in         = 0;                             // initial file descriptor
    108     int          fd_out        = 0;                             // output file descriptor
    109118    unsigned int cluster_id    = (x_id * y_size) + y_id;        // "continuous" index   
    110     unsigned int thread_id     = (cluster_id * nprocs) + p_id;  // "continuous" thread index
     119    unsigned int thread_id     = (cluster_id * nprocs) + p_id;  // "continuous" index
    111120
    112121    // parallel load of image:
    113     // allocate buf_in and buf_out distributed buffers (one buf_in & one buf_out per cluster).
    114     // open input and output files, and load the relevant lines in local buf_in.
    115     // only thread running on processor[x,y,0] does it.
    116 
    117     LOAD_START[x_id][y_id][p_id] = giet_proctime();
     122    // thread running on processor[x,y,0]
     123    // map input & output files in buf_in & buf_out buffers.
     124
     125    MMAP_START[x_id][y_id][p_id] = giet_proctime();
    118126
    119127    if ( p_id == 0 )
    120128    {
    121         buf_in[cluster_id]  = remote_malloc( npixels/nclusters, x_id, y_id );
    122         buf_out[cluster_id] = remote_malloc( npixels/nclusters, x_id, y_id );
    123 
    124         if ( (x_id==0) && (y_id==0) )
    125         {
    126             printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes buffer allocation at cycle %d\n",
    127                    x_id, y_id, p_id, giet_proctime() );
    128         }
    129 
    130         // open input file
    131         fd_in = giet_fat_open( input_file_name , O_RDONLY );  // read_only
    132         if ( fd_in < 0 )
    133         {
    134             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n",
    135                    x_id , y_id , p_id , input_file_name );
    136             giet_pthread_exit(" open() failure");
    137         }
    138         else if ( (x_id==0) && (y_id==0) )
    139         {
    140             printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n",
    141                    input_file_name , fd_in );
    142         }
    143 
    144         // open output file
    145         fd_out = giet_fat_open( output_file_name , O_CREATE );   // create if required
    146         if ( fd_out < 0 )
    147         {
    148             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n",
    149                             x_id , y_id , p_id , output_file_name );
    150             giet_pthread_exit(" open() failure");
    151         }
    152         else if ( (x_id==0) && (y_id==0) )
    153         {
    154             printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n",
    155                    output_file_name , fd_out );
    156         }
    157 
    158 
    159         unsigned int offset = ((npixels*cluster_id)/nclusters);
    160         if ( giet_fat_lseek( fd_in,
    161                              offset,
    162                              SEEK_SET ) != offset )
    163         {
    164             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fd = %d\n",
    165                    x_id , y_id , p_id , fd_in );
    166             giet_pthread_exit(" seek() failure");
    167         }
    168 
    169         unsigned int pixels = npixels / nclusters;
    170         if ( giet_fat_read( fd_in,
    171                             buf_in[cluster_id],
    172                             pixels ) != pixels )
    173         {
    174             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot read fd = %d\n",
    175                    x_id , y_id , p_id , fd_in );
    176             giet_pthread_exit(" read() failure");
    177         }
    178 
    179         if ( (x_id==0) && (y_id==0) )
    180         {
    181             printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes load at cycle %d\n",
    182                    x_id, y_id, p_id, giet_proctime() );
    183         }
    184     }
    185 
    186     LOAD_END[x_id][y_id][p_id] = giet_proctime();
     129        // map buf_in and buf_out
     130        unsigned int length = npixels / nclusters;
     131        unsigned int offset = length * cluster_id;
     132       
     133        buf_in[cluster_id] =  giet_fat_mmap( NULL,
     134                                             length,
     135                                             MAP_PROT_READ,
     136                                             MAP_SHARED,
     137                                             fd_in,
     138                                             offset );
     139        if ( buf_in[cluster_id] == NULL )
     140        {
     141            printf("\n[TRANSPOSE ERROR] Thread[%d,%d,%d] cannot map input file\n",
     142                   x_id , y_id , p_id );
     143            giet_pthread_exit( NULL );
     144        }
     145                 
     146        if TRANSPOSE_DEBUG
     147        printf("\n@@@ Thread[%d,%d,%d] call mmap : length = %x / offset = %x / buf_in = %x\n",
     148               x_id , y_id , p_id , length , offset , buf_in[cluster_id] );
     149           
     150        buf_out[cluster_id] = giet_fat_mmap( NULL,
     151                                             length,
     152                                             MAP_PROT_WRITE,
     153                                             MAP_SHARED,
     154                                             fd_out,
     155                                             offset );
     156        if ( buf_out[cluster_id] == NULL )
     157        {
     158            printf("\n[TRANSPOSE ERROR] Thread[%d,%d,%d] cannot map output file\n",
     159                   x_id , y_id , p_id );
     160            giet_pthread_exit( NULL );
     161        }
     162                   
     163        if TRANSPOSE_DEBUG
     164        printf("\n@@@ Thread[%d,%d,%d] call mmap : length = %x / offset = %x / buf_out = %x\n",
     165               x_id , y_id , p_id , length , offset , buf_out[cluster_id] );
     166       
     167    }
     168
     169    MMAP_END[x_id][y_id][p_id] = giet_proctime();
    187170
    188171    /////////////////////////////
     
    212195    for ( l = first ; l < last ; l++ )
    213196    {
    214         check_line_before[l] = 0;
    215      
    216197        // in each iteration we transfer one byte
    217198        for ( p = 0 ; p < image_size ; p++ )
     
    222203            byte        = buf_in[src_cluster][src_index];
    223204
    224             // compute checksum
    225             check_line_before[l] = check_line_before[l] + byte;
    226 
    227205            // write one byte to remote buf_out
    228206            dst_cluster = p / nlc;
     
    234212    if ( (p_id == 0) && (x_id==0) && (y_id==0) )
    235213    {
    236         printf("\n[TRANSPOSE] proc [%d,%d,%d] completes transpose at cycle %d\n",
     214        printf("\n[TRANSPOSE] Thread[%d,%d,%d] completes transpose at cycle %d\n",
    237215        x_id, y_id, p_id, giet_proctime() );
    238216    }
     
    257235    if ( (x_id==0) && (y_id==0) && (p_id==0) )
    258236    {
    259         printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display at cycle %d\n",
     237        printf("\n[TRANSPOSE] Thread[%d,%d,%d] completes display at cycle %d\n",
    260238               x_id, y_id, p_id, giet_proctime() );
    261239    }
     
    267245    /////////////////////////////
    268246
    269     // parallel store : buf_out buffers to disk
    270     // only thread running on processor(x,y,0) does it
    271 
    272     STOR_START[x_id][y_id][p_id] = giet_proctime();
    273 
    274     if ( p_id == 0 )
    275     {
    276         unsigned int offset = ((npixels*cluster_id)/nclusters);
    277         if ( giet_fat_lseek( fd_out,
    278                              offset,
    279                              SEEK_SET ) != offset )
    280         {
    281             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fr = %d\n",
    282                    x_id , y_id , p_id , fd_out );
    283             giet_pthread_exit(" seek() failure");
    284         }
    285 
    286         unsigned int pixels = npixels / nclusters;
    287         if ( giet_fat_write( fd_out,
    288                              buf_out[cluster_id],
    289                              pixels ) != pixels )
    290         {
    291             printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot write fd = %d\n",
    292                    x_id , y_id , p_id , fd_out );
    293             giet_pthread_exit(" write() failure");
    294         }
    295 
    296         if ( (x_id==0) && (y_id==0) )
    297         {
    298             printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes store at cycle %d\n",
    299                    x_id, y_id, p_id, giet_proctime() );
    300         }
    301     }
    302 
    303     STOR_END[x_id][y_id][p_id] = giet_proctime();
    304 
    305     // In each cluster, only thread running on Processor[x,y,0] releases
    306     // the distributed buffers and close the file descriptors.
    307 
    308     if ( p_id==0 )
    309     {
    310         free( buf_in[cluster_id] );
    311         free( buf_out[cluster_id] );
    312 
    313         giet_fat_close( fd_in );
    314         giet_fat_close( fd_out );
    315     }
    316 
     247    // all threads, but thread[0,0,0], suicide
    317248    if ( (x_id != 0) || (y_id != 0) || (p_id != 0) )
    318249    giet_pthread_exit( "completed" );
     
    342273    unsigned int min_disp_ended = 0xFFFFFFFF;
    343274    unsigned int max_disp_ended = 0;
    344     unsigned int min_stor_start = 0xFFFFFFFF;
    345     unsigned int max_stor_start = 0;
    346     unsigned int min_stor_ended = 0xFFFFFFFF;
    347     unsigned int max_stor_ended = 0;
    348275
    349276    for (x = 0; x < x_size; x++)
     
    353280            for ( l = 0 ; l < nprocs ; l++ )
    354281            {
    355                 if (LOAD_START[x][y][l] < min_load_start)  min_load_start = LOAD_START[x][y][l];
    356                 if (LOAD_START[x][y][l] > max_load_start)  max_load_start = LOAD_START[x][y][l];
    357                 if (LOAD_END[x][y][l]   < min_load_ended)  min_load_ended = LOAD_END[x][y][l];
    358                 if (LOAD_END[x][y][l]   > max_load_ended)  max_load_ended = LOAD_END[x][y][l];
     282                if (MMAP_START[x][y][l] < min_load_start)  min_load_start = MMAP_START[x][y][l];
     283                if (MMAP_START[x][y][l] > max_load_start)  max_load_start = MMAP_START[x][y][l];
     284                if (MMAP_END[x][y][l]   < min_load_ended)  min_load_ended = MMAP_END[x][y][l];
     285                if (MMAP_END[x][y][l]   > max_load_ended)  max_load_ended = MMAP_END[x][y][l];
    359286                if (TRSP_START[x][y][l] < min_trsp_start)  min_trsp_start = TRSP_START[x][y][l];
    360287                if (TRSP_START[x][y][l] > max_trsp_start)  max_trsp_start = TRSP_START[x][y][l];
     
    365292                if (DISP_END[x][y][l]   < min_disp_ended)  min_disp_ended = DISP_END[x][y][l];
    366293                if (DISP_END[x][y][l]   > max_disp_ended)  max_disp_ended = DISP_END[x][y][l];
    367                 if (STOR_START[x][y][l] < min_stor_start)  min_stor_start = STOR_START[x][y][l];
    368                 if (STOR_START[x][y][l] > max_stor_start)  max_stor_start = STOR_START[x][y][l];
    369                 if (STOR_END[x][y][l]   < min_stor_ended)  min_stor_ended = STOR_END[x][y][l];
    370                 if (STOR_END[x][y][l]   > max_stor_ended)  max_stor_ended = STOR_END[x][y][l];
    371294            }
    372295        }
     
    375298    printf("\n   ---------------- Instrumentation Results ---------------------\n");
    376299
    377     printf(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n",
     300    printf(" - MMAP_START : min = %d / max = %d / med = %d / delta = %d\n",
    378301           min_load_start, max_load_start, (min_load_start+max_load_start)/2,
    379302           max_load_start-min_load_start);
    380303
    381     printf(" - LOAD_END   : min = %d / max = %d / med = %d / delta = %d\n",
     304    printf(" - MMAP_END   : min = %d / max = %d / med = %d / delta = %d\n",
    382305           min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2,
    383306           max_load_ended-min_load_ended);
     
    398321           min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2,
    399322           max_disp_ended-min_disp_ended);
    400 
    401     printf(" - STOR_START : min = %d / max = %d / med = %d / delta = %d\n",
    402            min_stor_start, max_stor_start, (min_stor_start+max_stor_start)/2,
    403            max_stor_start-min_stor_start);
    404 
    405     printf(" - STOR_END   : min = %d / max = %d / med = %d / delta = %d\n",
    406            min_stor_ended, max_stor_ended, (min_stor_ended+max_stor_ended)/2,
    407            max_stor_ended-min_stor_ended);
    408323
    409324}  // end instrument()
     
    422337    unsigned int y_id;                          // y cluster coordinate
    423338    unsigned int p_id;                          // local processor index
    424 
    425339    giet_proc_xyp( &x_id , &y_id , &p_id );
    426340
     
    429343    unsigned int y_size;                       // number of clusters in a column
    430344    unsigned int nprocs;                       // number of processors per cluster
    431 
    432345    giet_procs_number( &x_size , &y_size , &nprocs );
    433346
     
    436349
    437350    giet_pthread_assert( ((x_size == 1) || (x_size == 2) || (x_size == 4) ||
    438                   (x_size == 8) || (x_size == 16)),
     351                          (x_size == 8) || (x_size == 16)),
    439352                         "[TRANSPOSE ERROR] x_size must be 1,2,4,8,16");
    440353
    441354    giet_pthread_assert( ((y_size == 1) || (y_size == 2) || (y_size == 4) ||
    442                   (y_size == 8) || (y_size == 16)),
     355                          (y_size == 8) || (y_size == 16)),
    443356                         "[TRANSPOSE ERROR] y_size must be 1,2,4,8,16");
    444357
     
    456369    giet_fbf_size( &width , &height );
    457370
    458     // enter interactive part if required
    459371    printf("\n[TRANSPOSE] start at cycle %d on %d cores / FBF = %d * %d pixels\n",
    460372           giet_proctime(), nthreads , width , height );
    461373
    462     // input_file_name, output_file_name, and size  acquisition
    463     printf("\n[TRANSPOSE] enter path for input file / default is : %s\n> ", INPUT_FILE_PATH ); 
    464     giet_tty_gets( input_file_name , 256 );
    465     printf("\n");
    466 
    467     if ( strcmp( input_file_name , "" ) == 0 ) strcpy( input_file_name , INPUT_FILE_PATH );
    468 
    469     printf("\n[TRANSPOSE] enter path for output file / default is : %s\n> ", OUTPUT_FILE_PATH ); 
    470     giet_tty_gets( output_file_name , 256 );
    471     printf("\n");
    472 
    473     if ( strcmp( output_file_name , "" ) == 0 ) strcpy( output_file_name , OUTPUT_FILE_PATH );
    474 
    475     printf("\n[TRANSPOSE] enter image size / default is : %d\n> ", IMAGE_SIZE ); 
    476     giet_tty_getw( &image_size );
    477     printf("\n");
    478    
    479     if ( image_size == 0 ) image_size = IMAGE_SIZE;
    480 
     374    if ( INTERACTIVE ) // input_file_name, output_file_name, and size  acquisition
     375    {
     376        printf("\n[TRANSPOSE] enter path for input file / default is : %s\n> ", INPUT_FILE_PATH ); 
     377        giet_tty_gets( input_file_name , 256 );
     378        printf("\n");
     379        if ( strcmp( input_file_name , "" ) == 0 ) strcpy( input_file_name , INPUT_FILE_PATH );
     380
     381        printf("\n[TRANSPOSE] enter path for output file / default is : %s\n> ", OUTPUT_FILE_PATH ); 
     382        giet_tty_gets( output_file_name , 256 );
     383        printf("\n");
     384        if ( strcmp( output_file_name , "" ) == 0 ) strcpy( output_file_name , OUTPUT_FILE_PATH );
     385
     386        printf("\n[TRANSPOSE] enter image size / default is : %d\n> ", IMAGE_SIZE ); 
     387        giet_tty_getw( &image_size );
     388        printf("\n");
     389        if ( image_size == 0 ) image_size = IMAGE_SIZE;
     390    }
     391    else
     392    {
     393        strcpy( input_file_name , INPUT_FILE_PATH );
     394        strcpy( output_file_name , OUTPUT_FILE_PATH );
     395        image_size = IMAGE_SIZE;
     396    }
     397
     398    // check image size / number of clusters
     399    giet_pthread_assert( ((((image_size * image_size) / (x_size * y_size)) & 0xFFF) == 0) ,
     400                         "[TRANSPOSE ERROR] pixels per cluster must be multiple of 4096");
     401   
    481402    printf("\n[TRANSPOSE] input = %s / output = %s / size = %d\n",
    482403           input_file_name, output_file_name, image_size );
    483404
    484     giet_pthread_assert( (nprocs * x_size * y_size <= image_size ),
    485                          "[TRANSPOSE ERROR] number of threads larger than number of lines");
    486 
    487405    // distributed heap initialisation
    488406    for ( x = 0 ; x < x_size ; x++ )
     
    494412    }
    495413
     414    // open input and output files
     415    fd_in = giet_fat_open( input_file_name , O_RDONLY );  // read_only
     416    if ( fd_in < 0 )
     417    {
     418        printf("\n[TRANSPOSE ERROR] main cannot open file %s\n", input_file_name );
     419        giet_pthread_exit( NULL );
     420    }
     421    else
     422    {
     423        printf("\n[TRANSPOSE] main open file %s / fd = %d\n", input_file_name , fd_in );
     424    }
     425
     426    fd_out = giet_fat_open( output_file_name , O_CREATE );   // create if required
     427    if ( fd_out < 0 )
     428    {
     429        printf("\n[TRANSPOSE ERROR] main cannot open file %s\n", output_file_name );
     430        giet_pthread_exit(" open() failure");
     431    }
     432    else
     433    {
     434        printf("\n[TRANSPOSE] main open file %s / fd = %d\n", output_file_name , fd_out );
     435    }
     436
    496437    // allocate thread[] array
    497438    pthread_t* thread = malloc( nthreads * sizeof(pthread_t) );
     
    501442
    502443    // Initialisation completed
    503     printf("\n[TRANSPOSE] initialisation completed at cycle %d\n", giet_proctime() );
     444    printf("\n[TRANSPOSE] main completes initialisation\n");
    504445   
    505446    // launch other threads to run execute() function
     
    537478    instrument( x_size , y_size , nprocs );
    538479
     480    // close input and output files
     481    giet_fat_close( fd_in );
     482    giet_fat_close( fd_out );
     483
     484    // suicide
    539485    giet_pthread_exit( "completed" );
    540486   
  • soft/giet_vm/applications/transpose/transpose.py

    r708 r764  
    3535    # define vsegs base & size
    3636    code_base  = 0x10000000
    37     code_size  = 0x00010000     # 64 Kbytes (per cluster)
     37    code_size  = 0x00010000     # 64 Kbytes  (256 Mbytes max)
    3838   
    3939    data_base  = 0x20000000
    40     data_size  = 0x00010000     # 64 Kbytes (non replicated)
     40    data_size  = 0x00010000     # 64 Kbytes  (256 Mbytes max)
     41
     42    mmap_base  = 0x30000000
     43    mmap_size  = 0x10000000     # 256 Mbytes (non mapped)
    4144
    4245    stack_base = 0x40000000
    43     stack_size = 0x00010000     # 64 Kbytes (per thread)
     46    stack_size = 0x00010000     # 64 Kbytes per thread  (64 Mbytes max)
    4447
    4548    heap_base  = 0x60000000
    46     heap_size  = 0x00200000     # 2 Mbytes (per cluster)
     49    heap_size  = 0x00200000     # 2 Mbytes per cluster  (512 Mbytes max)
    4750
    4851    # create vspace
    49     vspace = mapping.addVspace( name = 'transpose', startname = 'trsp_data', active = False )
     52    vspace = mapping.addVspace( name = 'transpose', startname = 'trsp_data', active = True )
    5053   
    5154    # data vseg : shared (only in cluster[0,0])
     
    5457                     binpath = 'bin/transpose/appli.elf',
    5558                     local = False )
     59
     60    # mmap vseg : non mapped in physical memory
     61    mapping.addVseg( vspace, 'trsp_mmap', mmap_base , mmap_size,
     62                     'C_WU', vtype = 'MMAP', local = False )
    5663
    5764    # code vsegs : local (one copy in each cluster)
     
    7582                    proc_id = (((x * y_size) + y) * nprocs) + p
    7683                    base    = stack_base + (proc_id * stack_size)
    77 
     84                    size    = stack_size - 4096
    7885                    mapping.addVseg( vspace, 'trsp_stack_%d_%d_%d' % (x,y,p),
    79                                      base, stack_size, 'C_WU', vtype = 'BUFFER',
     86                                     base , size , 'C_WU', vtype = 'BUFFER',
    8087                                     x = x , y = y , pseg = 'RAM',
    81                                      local = True, big = True )
     88                                     local = True )
    8289
    8390    # heap vsegs: distributed non local (all heap vsegs can be accessed by all tasks)
Note: See TracChangeset for help on using the changeset viewer.