#include "stdio.h" //////////////////////////////////// // Image parameters #define PIXEL_SIZE 2 #define NL 1024 #define NP 1024 #define BLOCK_SIZE 1024 #define PRINTF if(lid==0) tty_printf #define TA(c,l,p) (A[c][((NP)*(l))+(p)]) #define TB(c,p,l) (B[c][((NL)*(p))+(l)]) #define TC(c,l,p) (C[c][((NP)*(l))+(p)]) #define TD(c,l,p) (D[c][((NP)*(l))+(p)]) #define TZ(c,l,p) (Z[c][((NP)*(l))+(p)]) #define max(x,y) ((x) > (y) ? (x) : (y)) #define min(x,y) ((x) < (y) ? (x) : (y)) /////////////////////////////////////////// // tricks to read parameters from ldscript /////////////////////////////////////////// struct plaf; extern struct plaf seg_heap_base; extern struct plaf NB_PROCS; extern struct plaf NB_CLUSTERS; ///////////// void main() { ////////////////////////////////// // convolution kernel parameters // The content of this section is // Philips proprietary information. /////////////////////////////////// int vnorm = 115; int vf[35]; vf[0] = 1; vf[1] = 1; vf[2] = 2; vf[3] = 2; vf[4] = 2; vf[5] = 2; vf[6] = 3; vf[7] = 3; vf[8] = 3; vf[9] = 4; vf[10] = 4; vf[11] = 4; vf[12] = 4; vf[13] = 5; vf[14] = 5; vf[15] = 5; vf[16] = 5; vf[17] = 5; vf[18] = 5; vf[19] = 5; vf[20] = 5; vf[21] = 5; vf[22] = 4; vf[23] = 4; vf[24] = 4; vf[25] = 4; vf[26] = 3; vf[27] = 3; vf[28] = 3; vf[29] = 2; vf[30] = 2; vf[31] = 2; vf[32] = 2; vf[33] = 1; vf[34] = 1; int hrange = 100; int hnorm = 201; unsigned int date = 0; unsigned int delta = 0; int c; // cluster index for loops int l; // line index for loops int p; // pixel index for loops int x; // filter index for loops int pid = procid(); // processor id int nprocs = (int)&NB_PROCS; // number of processors per cluster int nclusters = (int)&NB_CLUSTERS; // number of clusters int lid = pid%nprocs; // local task id int cid = pid/nprocs; // cluster task id int base = (unsigned int)&seg_heap_base; // base address for shared buffers int increment = (0x80000000 / nclusters) * 2; // cluster increment int ntasks = nclusters * nprocs; // number of tasks int nblocks = (NP*NL*PIXEL_SIZE)/BLOCK_SIZE; // number of blocks per image int lines_per_task = NL/ntasks; // number of lines per task int lines_per_cluster = NL/nclusters; // number of lines per cluster int pixels_per_task = NP/ntasks; // number of columns per task int pixels_per_cluster = NP/nclusters; // number of columns per cluster int first, last; PRINTF("\n*** Processor %d entering main at cycle %d ***\n\n", pid, proctime()); ////////////////////////// // parameters checking if( (nprocs != 1) && (nprocs != 2) && (nprocs != 4) ) { PRINTF("NB_PROCS must be 1, 2 or 4\n"); while(1); } if( (nclusters != 4) && (nclusters != 8) && (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters !=128) && (nclusters != 256) ) { PRINTF("NB_CLUSTERS must be a power of 2 between 4 and 256\n"); while(1); } if( pid >= ntasks ) { PRINTF("processor id %d larger than NB_CLUSTERS*NB_PROCS\n", pid); while(1); } if ( NL % nclusters != 0 ) { PRINTF("NB_CLUSTERS must be a divider of NL"); while(1); } if( NP % nclusters != 0 ) { PRINTF("NB_CLUSTERS must be a divider of NP"); while(1); } ////////////////////////////////////////////////////////////////// // Arrays of pointers on the shared, distributed buffers // containing the images (sized for the worst case : 256 clusters) unsigned short* A[256]; int* B[256]; int* C[256]; int* D[256]; unsigned char* Z[256]; // The shared, distributed buffers addresses are computed // from the seg_heap_base value defined in the ldscript file // and from the cluster increment = 4Gbytes/nclusters. // These arrays of pointers are identical and // replicated in the stack of each task for( c=0 ; cNP-1) TA(cid,l,z) == TA(cid,l,NP-1) delta = proctime() - date; date = date + delta; PRINTF("\n*** Starting horizontal filter at cycle %d (%d)\n", date, delta); // l = absolute line index / p = absolute pixel index // first & last define which lines are handled by a given task(cid,lid) first = (cid*nprocs + lid)*lines_per_task; last = first + lines_per_task; for ( l=first ; lNL-1) TB(cid,p,x) == TB(cid,p,NL-1) delta = proctime() - date; date = date + delta; PRINTF("\n*** starting vertical filter at cycle %d (%d)\n", date, delta); // l = absolute line index / p = absolute pixel index // first & last define which pixels are handled by a given task(cid,lid) first = (cid*nprocs + lid)*pixels_per_task; last = first + pixels_per_task; for ( p=first ; p>8) & 0xFF); } fb_write(NP*(cid*lines_per_cluster+l), &TZ(cid,l,0), NP); } delta = proctime() - date; date = date + delta; PRINTF("*** Completing display at cycle %d (%d)\n", date, delta); while(1); } // end main()