Ignore:
Timestamp:
Jul 8, 2015, 3:57:15 PM (9 years ago)
Author:
alain
Message:

Modify all applications to support two new rules:
1) introduce a local Makefile for each application.
2) change "application.elf" name to "application/appli.elf" name in the application.py" file.
Introduce the shell application.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/ocean/main.C

    r581 r589  
    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*  Copyright (c) 1994 Stanford University                               */
    4 /*                                                                       */
    5 /*  All rights reserved.                                                 */
    6 /*                                                                       */
    7 /*  Permission is given to use, copy, and modify this software for any   */
    8 /*  non-commercial purpose as long as this copyright notice is not       */
    9 /*  removed.  All other uses, including redistribution in whole or in    */
    10 /*  part, are forbidden without prior written permission.                */
    11 /*                                                                       */
    12 /*  This software is provided with absolutely no warranty and no         */
    13 /*  support.                                                             */
    14 /*                                                                       */
    15 /*************************************************************************/
     1#line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET"
    162
    17 /*************************************************************************/
    18 /*                                                                       */
    19 /*  SPLASH Ocean Code                                                    */
    20 /*                                                                       */
    21 /*  This application studies the role of eddy and boundary currents in   */
    22 /*  influencing large-scale ocean movements.  This implementation uses   */
    23 /*  dynamically allocated four-dimensional arrays for grid data storage. */
    24 /*                                                                       */
    25 /*  Command line options:                                                */
    26 /*                                                                       */
    27 /*     -mM : Simulate MxM ocean. M must be (power of 2) +2.              */
    28 /*     -nN : N = number of threads. N must be power of 2.                */
    29 /*     -eE : E = error tolerance for iterative relaxation.               */
    30 /*     -rR : R = distance between grid points in meters.                 */
    31 /*     -tT : T = timestep in seconds.                                    */
    32 /*     -s  : Print timing statistics.                                    */
    33 /*     -o  : Print out relaxation residual values.                       */
    34 /*     -h  : Print out command line options.                             */
    35 /*                                                                       */
    36 /*  Default: OCEAN -m130 -n1 -e1e-7 -r20000.0 -t28800.0                  */
    37 /*                                                                       */
    38 /*  NOTE: This code works under both the FORK and SPROC models.          */
    39 /*                                                                       */
    40 /*************************************************************************/
    41 
    42 MAIN_ENV
    43 
    44 #define DEFAULT_M        514
    45 #define DEFAULT_N        4
    46 #define DEFAULT_E        1e-7
    47 #define DEFAULT_T    28800.0
    48 #define DEFAULT_R    20000.0
    49 #define UP               0
    50 #define DOWN             1
    51 #define LEFT             2
    52 #define RIGHT            3
    53 #define UPLEFT           4
    54 #define UPRIGHT          5
    55 #define DOWNLEFT         6
    56 #define DOWNRIGHT        7
    57 #define PAGE_SIZE     4096
    58 
    59 #include <stdio.h>
    60 #include <math.h>
    61 #include <stdlib.h>
    62 
    63 #include "decs.h"
    64 
    65 struct multi_struct *multi;
    66 struct global_struct *global;
    67 struct locks_struct *locks;
    68 struct bars_struct *bars;
    69 
    70 struct Global_Private *main_gp;
    71 double ****main_psi;
    72 double ****main_psim;
    73 double ***main_psium;
    74 double ***main_psilm;
    75 double ***main_psib;
    76 double ***main_ga;
    77 double ***main_gb;
    78 double ****main_work1;
    79 double ***main_work2;
    80 double ***main_work3;
    81 double ****main_work4;
    82 double ****main_work5;
    83 double ***main_work6;
    84 double ****main_work7;
    85 double ***main_oldga;
    86 double ***main_oldgb;
    87 double ****main_q_multi;
    88 double ****main_rhs_multi;
    89 double ****temparray;
    90 double ***tauz;
    91 long *main_imx;
    92 long *main_jmx;
    93 
    94 long nprocs = DEFAULT_N;
    95 const double h1 = 1000.0;
    96 const double h3 = 4000.0;
    97 const double h = 5000.0;
    98 const double lf = -5.12e11;
    99 double res = DEFAULT_R;
    100 double dtau = DEFAULT_T;
    101 const double f0 = 8.3e-5;
    102 const double beta = 2.0e-11;
    103 const double gpr = 0.02;
    104 double ysca;
    105 long oim;
    106 long jmm1;
    107 double tolerance = DEFAULT_E;
    108 const double pi = 3.141592653589793;
    109 const double t0 = 0.5e-4;
    110 const double outday0 = 1.0;
    111 const double outday1 = 2.0;
    112 const double outday2 = 2.0;
    113 const double outday3 = 2.0;
    114 const double maxwork = 10000.0;
    115 double factjacob;
    116 double factlap;
    117 
    118 //TODO : répliquer ça :
    119 double *main_lev_res;
    120 double *main_lev_tol;
    121 double *main_i_int_coeff;
    122 double *main_j_int_coeff;
    123 long *main_xpts_per_proc;
    124 long *main_ypts_per_proc;
    125 long main_xprocs;
    126 long main_yprocs;
    127 long main_numlev;
    128 double main_eig2;
    129 long main_im = DEFAULT_M;
    130 long main_jm;
    131 
    132 long minlevel;
    133 long do_stats = 1;
    134 long do_output = 0;
    135 long *ids_procs;
    136 
    137 
    138 __attribute__ ((constructor)) int main(int argc, char *argv[])
    139 {
    140     long i;
    141     long j;
    142     long k;
    143     long x_part;
    144     long y_part;
    145     long d_size;
    146     long itemp;
    147     long jtemp;
    148     double procsqrt;
    149     long temp = 0;
    150     double min_total;
    151     double max_total;
    152     double avg_total;
    153     double avg_wait;
    154     double max_wait;
    155     double min_wait;
    156     double min_multi;
    157     double max_multi;
    158     double avg_multi;
    159     double min_frac;
    160     double max_frac;
    161     double avg_frac;
    162     long imax_wait;
    163     long imin_wait;
    164     long ch;
    165     unsigned long long computeend;
    166     unsigned long long start;
    167     im = main_im;
    168    
    169     CLOCK(start);
    170 
    171     while ((ch = getopt(argc, argv, "m:n:e:r:t:soh")) != -1) {
    172         switch (ch) {
    173         case 'm':
    174             im = atoi(optarg);
    175             if (log_2(im - 2) == -1) {
    176                 printerr("Grid must be ((power of 2)+2) in each dimension\n");
    177                 exit(-1);
    178             }
    179             break;
    180         case 'n':
    181             nprocs = atoi(optarg);
    182             if (nprocs < 1) {
    183                 printerr("N must be >= 1\n");
    184                 exit(-1);
    185             }
    186             if (log_2(nprocs) == -1) {
    187                 printerr("N must be a power of 2\n");
    188                 exit(-1);
    189             }
    190             break;
    191         case 'e':
    192             tolerance = atof(optarg);
    193             break;
    194         case 'r':
    195             res = atof(optarg);
    196             break;
    197         case 't':
    198             dtau = atof(optarg);
    199             break;
    200         case 's':
    201             do_stats = !do_stats;
    202             break;
    203         case 'o':
    204             do_output = !do_output;
    205             break;
    206         case 'h':
    207             printf("Usage: ocean <options>\n\n");
    208             printf("options:\n");
    209             printf("  -mM : Simulate MxM ocean.  M must be (power of 2) + 2 (default = %d).\n", DEFAULT_M);
    210             printf("  -nN : N = number of threads. N must be power of 2 (default = %d).\n", DEFAULT_N);
    211             printf("  -eE : E = error tolerance for iterative relaxation (default = %f).\n", DEFAULT_E);
    212             printf("  -rR : R = distance between grid points in meters (default = %f).\n", DEFAULT_R);
    213             printf("  -tT : T = timestep in seconds (default = %f).\n", DEFAULT_T);
    214             printf("  -s  : Print timing statistics.\n");
    215             printf("  -o  : Print out relaxation residual values.\n");
    216             printf("  -h  : Print out command line options.\n\n");
    217             exit(0);
    218             break;
    219         }
    220     }
    221 
    222     MAIN_INITENV
    223    
    224     jm = im;
    225 
    226     printf("\n");
    227     printf("Ocean simulation with W-cycle multigrid solver\n");
    228     printf("    Processors                         : %1ld\n", nprocs);
    229     printf("    Grid size                          : %1ld x %1ld\n", im, jm);
    230     printf("    Grid resolution (meters)           : %0.2f\n", res);
    231     printf("    Time between relaxations (seconds) : %0.0f\n", dtau);
    232     printf("    Error tolerance                    : %0.7g\n", tolerance);
    233     printf("\n");
    234 
    235     xprocs = 0;
    236     yprocs = 0;
    237 
    238     procsqrt = sqrt((double) nprocs);
    239     j = (long) procsqrt;
    240 
    241     while ((xprocs == 0) && (j > 0)) {
    242         k = nprocs / j;
    243         if (k * j == nprocs) {
    244             if (k > j) {
    245                 xprocs = j;
    246                 yprocs = k;
    247             } else {
    248                 xprocs = k;
    249                 yprocs = j;
    250             }
    251         }
    252         j--;
    253     }
    254 
    255     if (xprocs == 0) {
    256         printerr("Could not find factors for subblocking\n");
    257         exit(-1);
    258     }
    259 
    260     minlevel = 0;
    261     itemp = 1;
    262     jtemp = 1;
    263     numlev = 0;
    264     minlevel = 0;
    265 
    266     while (itemp < (im - 2)) {
    267         itemp = itemp * 2;
    268         jtemp = jtemp * 2;
    269         if ((itemp / yprocs > 1) && (jtemp / xprocs > 1)) {
    270             numlev++;
    271         }
    272     }
    273 
    274     if (numlev == 0) {
    275         printerr("Must have at least 2 grid points per processor in each dimension\n");
    276         exit(-1);
    277     }
    278 
    279     main_imx = (long *) G_MALLOC(numlev * sizeof(long), 0);
    280     main_jmx = (long *) G_MALLOC(numlev * sizeof(long), 0);
    281     main_lev_res = (double *) G_MALLOC(numlev * sizeof(double), 0);
    282     main_lev_tol = (double *) G_MALLOC(numlev * sizeof(double), 0);
    283     main_i_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);
    284     main_j_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);
    285     main_xpts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);
    286     main_ypts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);
    287     ids_procs = (long *) G_MALLOC(nprocs * sizeof(long), 0);
    288    
    289     imx = main_imx;
    290     jmx = main_jmx;
    291     lev_res = main_lev_res;
    292     lev_tol = main_lev_tol;
    293     i_int_coeff = main_i_int_coeff;
    294     j_int_coeff = main_j_int_coeff;
    295     xpts_per_proc = main_xpts_per_proc;
    296     ypts_per_proc = main_ypts_per_proc;
    297 
    298     for (i = 0; i < nprocs; i++) {
    299         ids_procs[i] = i;
    300     }
    301 
    302     imx[numlev - 1] = im;
    303     jmx[numlev - 1] = jm;
    304     lev_res[numlev - 1] = res;
    305     lev_tol[numlev - 1] = tolerance;
    306 
    307     for (i = numlev - 2; i >= 0; i--) {
    308         imx[i] = ((imx[i + 1] - 2) / 2) + 2;
    309         jmx[i] = ((jmx[i + 1] - 2) / 2) + 2;
    310         lev_res[i] = lev_res[i + 1] * 2;
    311     }
    312 
    313     for (i = 0; i < numlev; i++) {
    314         xpts_per_proc[i] = (jmx[i] - 2) / xprocs;
    315         ypts_per_proc[i] = (imx[i] - 2) / yprocs;
    316     }
    317     for (i = numlev - 1; i >= 0; i--) {
    318         if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) {
    319             minlevel = i + 1;
    320             break;
    321         }
    322     }
    323 
    324     for (i = 0; i < numlev; i++) {
    325         temp += imx[i];
    326     }
    327     temp = 0;
    328     j = 0;
    329     for (k = 0; k < numlev; k++) {
    330         for (i = 0; i < imx[k]; i++) {
    331             j++;
    332             temp += jmx[k];
    333         }
    334     }
    335 
    336     d_size = nprocs * sizeof(double ***);
    337     main_psi = (double ****) G_MALLOC(d_size, 0);
    338     main_psim = (double ****) G_MALLOC(d_size, 0);
    339     main_work1 = (double ****) G_MALLOC(d_size, 0);
    340     main_work4 = (double ****) G_MALLOC(d_size, 0);
    341     main_work5 = (double ****) G_MALLOC(d_size, 0);
    342     main_work7 = (double ****) G_MALLOC(d_size, 0);
    343     temparray = (double ****) G_MALLOC(d_size, -1);
    344 
    345     psi = main_psi;
    346     psim = main_psim;
    347     work1 = main_work1;
    348     work4 = main_work4;
    349     work5 = main_work5;
    350     work7 = main_work7;
    351 
    352     d_size = 2 * sizeof(double **);
    353     for (i = 0; i < nprocs; i++) {
    354         psi[i] = (double ***) G_MALLOC(d_size, i);
    355         psim[i] = (double ***) G_MALLOC(d_size, i);
    356         work1[i] = (double ***) G_MALLOC(d_size, i);
    357         work4[i] = (double ***) G_MALLOC(d_size, i);
    358         work5[i] = (double ***) G_MALLOC(d_size, i);
    359         work7[i] = (double ***) G_MALLOC(d_size, i);
    360         temparray[i] = (double ***) G_MALLOC(d_size, i);
    361     }
    362 
    363     d_size = nprocs * sizeof(double **);
    364     main_psium = (double ***) G_MALLOC(d_size, 0);
    365     main_psilm = (double ***) G_MALLOC(d_size, 0);
    366     main_psib = (double ***) G_MALLOC(d_size, 0);
    367     main_ga = (double ***) G_MALLOC(d_size, 0);
    368     main_gb = (double ***) G_MALLOC(d_size, 0);
    369     main_work2 = (double ***) G_MALLOC(d_size, 0);
    370     main_work3 = (double ***) G_MALLOC(d_size, 0);
    371     main_work6 = (double ***) G_MALLOC(d_size, 0);
    372     tauz = (double ***) G_MALLOC(d_size, 0);
    373     main_oldga = (double ***) G_MALLOC(d_size, 0);
    374     main_oldgb = (double ***) G_MALLOC(d_size, 0);
    375 
    376     psium = main_psium;
    377     psilm = main_psilm;
    378     psib = main_psib;
    379     ga = main_ga;
    380     gb = main_gb;
    381     work2 = main_work2;
    382     work3 = main_work3;
    383     work6 = main_work6;
    384     oldga = main_oldga;
    385     oldgb = main_oldgb;
    386 
    387     main_gp = (struct Global_Private *) G_MALLOC((nprocs + 1) * sizeof(struct Global_Private), -1);
    388     gp = main_gp;
    389 
    390     for (i = 0; i < nprocs; i++) {
    391         gp[i].pad = (char *) G_MALLOC(PAGE_SIZE * sizeof(char), i);
    392         gp[i].rel_num_x = (long *) G_MALLOC(numlev * sizeof(long), i);
    393         gp[i].rel_num_y = (long *) G_MALLOC(numlev * sizeof(long), i);
    394         gp[i].eist = (long *) G_MALLOC(numlev * sizeof(long), i);
    395         gp[i].ejst = (long *) G_MALLOC(numlev * sizeof(long), i);
    396         gp[i].oist = (long *) G_MALLOC(numlev * sizeof(long), i);
    397         gp[i].ojst = (long *) G_MALLOC(numlev * sizeof(long), i);
    398         gp[i].rlist = (long *) G_MALLOC(numlev * sizeof(long), i);
    399         gp[i].rljst = (long *) G_MALLOC(numlev * sizeof(long), i);
    400         gp[i].rlien = (long *) G_MALLOC(numlev * sizeof(long), i);
    401         gp[i].rljen = (long *) G_MALLOC(numlev * sizeof(long), i);
    402         gp[i].neighbors = (long *) G_MALLOC(8 * sizeof(long), i);
    403         gp[i].rownum = (long *) G_MALLOC(sizeof(long), i);
    404         gp[i].colnum = (long *) G_MALLOC(sizeof(long), i);
    405         gp[i].lpid = (long *) G_MALLOC(sizeof(long), i);
    406         gp[i].multi_time = (double *) G_MALLOC(sizeof(double), i);
    407         gp[i].total_time = (double *) G_MALLOC(sizeof(double), i);
    408         gp[i].sync_time = (double *) G_MALLOC(sizeof(double), i);
    409         gp[i].process_time = (double *) G_MALLOC(sizeof(double), i);
    410         gp[i].step_start = (double *) G_MALLOC(sizeof(double), i);
    411         gp[i].steps_time = (double *) G_MALLOC(10 * sizeof(double), i);
    412         *gp[i].multi_time = 0;
    413         *gp[i].total_time = 0;
    414         *gp[i].sync_time = 0;
    415         *gp[i].process_time = 0;
    416         *gp[i].lpid = i;
    417     }
    418 
    419     subblock();
    420 
    421     x_part = (jm - 2) / xprocs + 2;
    422     y_part = (im - 2) / yprocs + 2;
    423 
    424     d_size = x_part * y_part * sizeof(double) + y_part * sizeof(double *);
    425 
    426     global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct), -1);
    427 
    428     for (i = 0; i < nprocs; i++) {
    429         psi[i][0] = (double **) G_MALLOC(d_size, i);
    430         psi[i][1] = (double **) G_MALLOC(d_size, i);
    431         psim[i][0] = (double **) G_MALLOC(d_size, i);
    432         psim[i][1] = (double **) G_MALLOC(d_size, i);
    433         psium[i] = (double **) G_MALLOC(d_size, i);
    434         psilm[i] = (double **) G_MALLOC(d_size, i);
    435         psib[i] = (double **) G_MALLOC(d_size, i);
    436         ga[i] = (double **) G_MALLOC(d_size, i);
    437         gb[i] = (double **) G_MALLOC(d_size, i);
    438         work1[i][0] = (double **) G_MALLOC(d_size, i);
    439         work1[i][1] = (double **) G_MALLOC(d_size, i);
    440         work2[i] = (double **) G_MALLOC(d_size, i);
    441         work3[i] = (double **) G_MALLOC(d_size, i);
    442         work4[i][0] = (double **) G_MALLOC(d_size, i);
    443         work4[i][1] = (double **) G_MALLOC(d_size, i);
    444         work5[i][0] = (double **) G_MALLOC(d_size, i);
    445         work5[i][1] = (double **) G_MALLOC(d_size, i);
    446         work6[i] = (double **) G_MALLOC(d_size, i);
    447         work7[i][0] = (double **) G_MALLOC(d_size, i);
    448         work7[i][1] = (double **) G_MALLOC(d_size, i);
    449         temparray[i][0] = (double **) G_MALLOC(d_size, i);
    450         temparray[i][1] = (double **) G_MALLOC(d_size, i);
    451         tauz[i] = (double **) G_MALLOC(d_size, i);
    452         oldga[i] = (double **) G_MALLOC(d_size, i);
    453         oldgb[i] = (double **) G_MALLOC(d_size, i);
    454     }
    455 
    456     oim = im;
    457     //f = (double *) G_MALLOC(oim*sizeof(double), 0);
    458     multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct), -1);
    459 
    460     d_size = numlev * sizeof(double **);
    461     if (numlev % 2 == 1) {      /* To make sure that the actual data
    462                                    starts double word aligned, add an extra
    463                                    pointer */
    464         d_size += sizeof(double **);
    465     }
    466     for (i = 0; i < numlev; i++) {
    467         d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);
    468     }
    469 
    470     d_size *= nprocs;
    471 
    472     if (nprocs % 2 == 1) {      /* To make sure that the actual data
    473                                    starts double word aligned, add an extra
    474                                    pointer */
    475         d_size += sizeof(double ***);
    476     }
    477 
    478     d_size += nprocs * sizeof(double ***);
    479     main_q_multi = (double ****) G_MALLOC(d_size, -1);
    480     main_rhs_multi = (double ****) G_MALLOC(d_size, -1);
    481     q_multi = main_q_multi;
    482     rhs_multi = main_rhs_multi;
    483 
    484 
    485     locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct), -1);
    486     bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct), -1);
    487 
    488     LOCKINIT(locks->idlock)
    489     LOCKINIT(locks->psiailock)
    490     LOCKINIT(locks->psibilock)
    491     LOCKINIT(locks->donelock)
    492     LOCKINIT(locks->error_lock)
    493     LOCKINIT(locks->bar_lock)
    494 #if defined(MULTIPLE_BARRIERS)
    495     BARINIT(bars->iteration, nprocs)
    496     BARINIT(bars->gsudn, nprocs)
    497     BARINIT(bars->p_setup, nprocs)
    498     BARINIT(bars->p_redph, nprocs)
    499     BARINIT(bars->p_soln, nprocs)
    500     BARINIT(bars->p_subph, nprocs)
    501     BARINIT(bars->sl_prini, nprocs)
    502     BARINIT(bars->sl_psini, nprocs)
    503     BARINIT(bars->sl_onetime, nprocs)
    504     BARINIT(bars->sl_phase_1, nprocs)
    505     BARINIT(bars->sl_phase_2, nprocs)
    506     BARINIT(bars->sl_phase_3, nprocs)
    507     BARINIT(bars->sl_phase_4, nprocs)
    508     BARINIT(bars->sl_phase_5, nprocs)
    509     BARINIT(bars->sl_phase_6, nprocs)
    510     BARINIT(bars->sl_phase_7, nprocs)
    511     BARINIT(bars->sl_phase_8, nprocs)
    512     BARINIT(bars->sl_phase_9, nprocs)
    513     BARINIT(bars->sl_phase_10, nprocs)
    514     BARINIT(bars->error_barrier, nprocs)
    515 #else
    516     BARINIT(bars->barrier, nprocs)
    517 #endif
    518     link_all();
    519 
    520     multi->err_multi = 0.0;
    521     i_int_coeff[0] = 0.0;
    522     j_int_coeff[0] = 0.0;
    523 
    524     for (i = 0; i < numlev; i++) {
    525         i_int_coeff[i] = 1.0 / (imx[i] - 1);
    526         j_int_coeff[i] = 1.0 / (jmx[i] - 1);
    527     }
    528 
    529     /*
    530        initialize constants and variables
    531 
    532        id is a global shared variable that has fetch-and-add operations
    533        performed on it by processes to obtain their pids.   
    534      */
    535 
    536     //global->id = 0;
    537     global->trackstart = 0;
    538     global->psibi = 0.0;
    539 
    540     factjacob = -1. / (12. * res * res);
    541     factlap = 1. / (res * res);
    542     eig2 = -h * f0 * f0 / (h1 * h3 * gpr);
    543 
    544     jmm1 = jm - 1;
    545     ysca = ((double) jmm1) * res;
    546     im = (imx[numlev - 1] - 2) / yprocs + 2;
    547     jm = (jmx[numlev - 1] - 2) / xprocs + 2;
    548    
    549     main_im = im;
    550     main_jm = jm;
    551     main_numlev = numlev;
    552     main_xprocs = xprocs;
    553     main_yprocs = yprocs;
    554     main_eig2 = eig2;
    555 
    556     if (do_output) {
    557         printf("              MULTIGRID OUTPUTS\n");
    558     }
    559 
    560     CREATE(slave, nprocs);
    561     WAIT_FOR_END(nprocs);
    562     CLOCK(computeend);
    563 
    564     printf("\n");
    565     printf("                PROCESS STATISTICS\n");
    566     printf("                  Total          Multigrid         Multigrid\n");
    567     printf(" Proc             Time             Time            Fraction\n");
    568     printf("    0   %15.0f    %15.0f        %10.3f\n", (*gp[0].total_time), (*gp[0].multi_time), (*gp[0].multi_time) / (*gp[0].total_time));
    569 
    570     if (do_stats) {
    571         double phase_time;
    572         min_total = max_total = avg_total = (*gp[0].total_time);
    573         min_multi = max_multi = avg_multi = (*gp[0].multi_time);
    574         min_frac = max_frac = avg_frac = (*gp[0].multi_time) / (*gp[0].total_time);
    575         avg_wait = *gp[0].sync_time;
    576         max_wait = *gp[0].sync_time;
    577         min_wait = *gp[0].sync_time;
    578         imax_wait = 0;
    579         imin_wait = 0;
    580 
    581         for (i = 1; i < nprocs; i++) {
    582             if ((*gp[i].total_time) > max_total) {
    583                 max_total = (*gp[i].total_time);
    584             }
    585             if ((*gp[i].total_time) < min_total) {
    586                 min_total = (*gp[i].total_time);
    587             }
    588             if ((*gp[i].multi_time) > max_multi) {
    589                 max_multi = (*gp[i].multi_time);
    590             }
    591             if ((*gp[i].multi_time) < min_multi) {
    592                 min_multi = (*gp[i].multi_time);
    593             }
    594             if ((*gp[i].multi_time) / (*gp[i].total_time) > max_frac) {
    595                 max_frac = (*gp[i].multi_time) / (*gp[i].total_time);
    596             }
    597             if ((*gp[i].multi_time) / (*gp[i].total_time) < min_frac) {
    598                 min_frac = (*gp[i].multi_time) / (*gp[i].total_time);
    599             }
    600             avg_total += (*gp[i].total_time);
    601             avg_multi += (*gp[i].multi_time);
    602             avg_frac += (*gp[i].multi_time) / (*gp[i].total_time);
    603             avg_wait += (*gp[i].sync_time);
    604             if (max_wait < (*gp[i].sync_time)) {
    605                 max_wait = (*gp[i].sync_time);
    606                 imax_wait = i;
    607             }
    608             if (min_wait > (*gp[i].sync_time)) {
    609                 min_wait = (*gp[i].sync_time);
    610                 imin_wait = i;
    611             }
    612         }
    613         avg_total = avg_total / nprocs;
    614         avg_multi = avg_multi / nprocs;
    615         avg_frac = avg_frac / nprocs;
    616         avg_wait = avg_wait / nprocs;
    617         for (i = 1; i < nprocs; i++) {
    618             printf("  %3ld   %15.0f    %15.0f        %10.3f\n", i, (*gp[i].total_time), (*gp[i].multi_time), (*gp[i].multi_time) / (*gp[i].total_time));
    619         }
    620         printf("  Avg   %15.0f    %15.0f        %10.3f\n", avg_total, avg_multi, avg_frac);
    621         printf("  Min   %15.0f    %15.0f        %10.3f\n", min_total, min_multi, min_frac);
    622         printf("  Max   %15.0f    %15.0f        %10.3f\n", max_total, max_multi, max_frac);
    623        
    624         printf("\n\n                  Sync\n");
    625         printf(" Proc      Time        Fraction\n");
    626         for (i = 0; i < nprocs; i++) {
    627             printf("  %ld        %u      %f\n", i, (unsigned int)*gp[i].sync_time, *gp[i].sync_time / ((long)(*gp[i].total_time)));
    628         }
    629 
    630         printf("  Avg   %f   %f\n", avg_wait, (double) avg_wait / (long) (computeend - global->trackstart));
    631         printf("  Min   %f   %f\n", min_wait, (double) min_wait / (long) (*gp[imin_wait].total_time));
    632         printf("  Max   %f   %f\n", max_wait, (double) max_wait / (long) (*gp[imax_wait].total_time));
    633 
    634         printf("\nPhases Avg :\n\n");
    635         for (i = 0; i < 10; i++) {
    636             phase_time = 0;
    637             for (j = 0; j < nprocs; j++) {
    638                 phase_time += gp[j].steps_time[i];
    639             }
    640             phase_time /= (double) nprocs;
    641             printf("  %d = %f (fraction %f)\n", i + 1, phase_time, phase_time / (long) (computeend - global->trackstart));
    642         }
    643     }
    644     printf("\n");
    645 
    646     global->starttime = start;
    647     printf("                       TIMING INFORMATION\n");
    648     printf("[NPROCS]           : %16ld\n", nprocs);
    649     printf("[START1]           : %16llu\n", global->starttime);
    650     printf("[START2]           : %16llu\n", global->trackstart);
    651     printf("[END]              : %16llu\n", computeend);
    652     printf("[TOTAL]            : %16llu\n", computeend - global->starttime);    // With init
    653     printf("[PARALLEL_COMPUTE] : %16llu\n", computeend - global->trackstart);   // Without init
    654     printf("(excludes first timestep)\n");
    655     printf("\n");
    656 
    657     MAIN_END
    658    
    659 }
    660 
    661 long log_2(long number)
    662 {
    663     long cumulative = 1;
    664     long out = 0;
    665     long done = 0;
    666 
    667     while ((cumulative < number) && (!done) && (out < 50)) {
    668         if (cumulative == number) {
    669             done = 1;
    670         } else {
    671             cumulative = cumulative * 2;
    672             out++;
    673         }
    674     }
    675 
    676     if (cumulative == number) {
    677         return (out);
    678     } else {
    679         return (-1);
    680     }
    681 }
    682 
    683 void printerr(char *s)
    684 {
    685     fprintf(stderr, "ERROR: %s\n", s);
    686 }
    687 
    688 
    689 // Local Variables:
    690 // tab-width: 4
    691 // c-basic-offset: 4
    692 // c-file-offsets:((innamespace . 0)(inline-open . 0))
    693 // indent-tabs-mode: nil
    694 // End:
    695 
    696 // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
Note: See TracChangeset for help on using the changeset viewer.