[444] | 1 | /* |
---|
| 2 | (C) Copyright IBM Corp. 2008 |
---|
| 3 | |
---|
| 4 | All rights reserved. |
---|
| 5 | |
---|
| 6 | Redistribution and use in source and binary forms, with or without |
---|
| 7 | modification, are permitted provided that the following conditions are met: |
---|
| 8 | |
---|
| 9 | * Redistributions of source code must retain the above copyright notice, |
---|
| 10 | this list of conditions and the following disclaimer. |
---|
| 11 | * Redistributions in binary form must reproduce the above copyright |
---|
| 12 | notice, this list of conditions and the following disclaimer in the |
---|
| 13 | documentation and/or other materials provided with the distribution. |
---|
| 14 | * Neither the name of IBM nor the names of its contributors may be |
---|
| 15 | used to endorse or promote products derived from this software without |
---|
| 16 | specific prior written permission. |
---|
| 17 | |
---|
| 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
---|
| 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
| 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
| 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
---|
| 22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
---|
| 23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
---|
| 24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
---|
| 25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
---|
| 26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
---|
| 27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
---|
| 28 | POSSIBILITY OF SUCH DAMAGE. |
---|
| 29 | |
---|
| 30 | Author: Ken Werner <ken.werner@de.ibm.com> |
---|
| 31 | */ |
---|
| 32 | |
---|
| 33 | #include <stdio.h> |
---|
| 34 | #include <stdlib.h> |
---|
| 35 | #include <string.h> |
---|
| 36 | #include <unistd.h> |
---|
| 37 | #include <sys/types.h> |
---|
| 38 | #include <sys/stat.h> |
---|
| 39 | #include <sys/uio.h> |
---|
| 40 | #include <fcntl.h> |
---|
| 41 | #include <ea.h> |
---|
| 42 | #include <spu_intrinsics.h> |
---|
| 43 | #include <spu_mfcio.h> |
---|
| 44 | #include <spu_timer.h> |
---|
| 45 | #include <limits.h> |
---|
| 46 | #include <sys/linux_syscalls.h> |
---|
| 47 | |
---|
| 48 | /* Magic cookie. */ |
---|
| 49 | #define GMON_MAGIC_COOKIE "gmon" |
---|
| 50 | |
---|
| 51 | /* Version number. */ |
---|
| 52 | #define GMON_VERSION 1 |
---|
| 53 | |
---|
| 54 | /* Fraction of text space to allocate for histogram counters. */ |
---|
| 55 | #define HISTFRACTION 4 |
---|
| 56 | |
---|
| 57 | /* Histogram counter type. */ |
---|
| 58 | #define HISTCOUNTER unsigned short |
---|
| 59 | |
---|
| 60 | /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is |
---|
| 61 | based on the minimum number of bytes of separation between two subroutine |
---|
| 62 | call points in the object code. */ |
---|
| 63 | #define HASHFRACTION 4 |
---|
| 64 | |
---|
| 65 | /* Percent of text space to allocate for tostructs with a minimum. */ |
---|
| 66 | #define ARCDENSITY 3 |
---|
| 67 | |
---|
| 68 | /* Minimal amount of arcs. */ |
---|
| 69 | #define MINARCS 50 |
---|
| 70 | |
---|
| 71 | /* Rounding macros. */ |
---|
| 72 | #define ROUNDDOWN(x,y) (((x)/(y))*(y)) |
---|
| 73 | #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) |
---|
| 74 | |
---|
| 75 | /* Sampling rate in Hertz. */ |
---|
| 76 | #define SAMPLE_INTERVAL 100 |
---|
| 77 | |
---|
| 78 | /* Tag definitions for the gmon.out sub headers. */ |
---|
| 79 | #define GMON_TAG_TIME_HIST 0 |
---|
| 80 | #define GMON_TAG_CG_ARC 1 |
---|
| 81 | |
---|
| 82 | struct tostruct |
---|
| 83 | { |
---|
| 84 | uintptr_t selfpc; |
---|
| 85 | long count; |
---|
| 86 | unsigned short link; |
---|
| 87 | }; |
---|
| 88 | |
---|
| 89 | struct gmon_hdr |
---|
| 90 | { |
---|
| 91 | char cookie[4]; |
---|
| 92 | int32_t version; |
---|
| 93 | char spare[3 * 4]; |
---|
| 94 | }; |
---|
| 95 | |
---|
| 96 | struct gmon_hist_hdr |
---|
| 97 | { |
---|
| 98 | uintptr_t low_pc; |
---|
| 99 | uintptr_t high_pc; |
---|
| 100 | int32_t hist_size; |
---|
| 101 | int32_t prof_rate; |
---|
| 102 | char dimen[15]; |
---|
| 103 | char dimen_abbrev; |
---|
| 104 | } __attribute__ ((packed)); |
---|
| 105 | |
---|
| 106 | struct rawarc |
---|
| 107 | { |
---|
| 108 | uintptr_t raw_frompc; |
---|
| 109 | uintptr_t raw_selfpc; |
---|
| 110 | long raw_count; |
---|
| 111 | } __attribute__ ((packed)); |
---|
| 112 | |
---|
| 113 | /* start and end of the text section */ |
---|
| 114 | extern char _start; |
---|
| 115 | extern char _etext; |
---|
| 116 | |
---|
| 117 | /* EAR entry for the starting address of SPE executable image. */ |
---|
| 118 | extern const unsigned long long _EAR_; |
---|
| 119 | asm (".section .toe,\"a\",@nobits\n\r" |
---|
| 120 | ".align 4\n\r" |
---|
| 121 | ".type _EAR_, @object\n\r" |
---|
| 122 | ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous"); |
---|
| 123 | |
---|
| 124 | /* froms are indexing tos */ |
---|
| 125 | static __ea unsigned short *froms; |
---|
| 126 | static __ea struct tostruct *tos = 0; |
---|
| 127 | static long tolimit = 0; |
---|
| 128 | static uintptr_t s_lowpc = 0; |
---|
| 129 | static uintptr_t s_highpc = 0; |
---|
| 130 | static unsigned long s_textsize = 0; |
---|
| 131 | |
---|
| 132 | static int fd; |
---|
| 133 | static int hist_size; |
---|
| 134 | static int timer_id; |
---|
| 135 | |
---|
| 136 | void |
---|
| 137 | __sample (int id) |
---|
| 138 | { |
---|
| 139 | unsigned int pc; |
---|
| 140 | unsigned int pc_backup; |
---|
| 141 | off_t offset; |
---|
| 142 | unsigned short val; |
---|
| 143 | |
---|
| 144 | if (id != timer_id) |
---|
| 145 | return; |
---|
| 146 | |
---|
| 147 | /* Fetch program counter. */ |
---|
| 148 | pc = spu_read_srr0 () & ~3; |
---|
| 149 | pc_backup = pc; |
---|
| 150 | if (pc < s_lowpc || pc > s_highpc) |
---|
| 151 | return; |
---|
| 152 | pc -= (uintptr_t) & _start; |
---|
| 153 | offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr) |
---|
| 154 | + 1 + sizeof (struct gmon_hist_hdr); |
---|
| 155 | |
---|
| 156 | /* Read, increment and write the counter. */ |
---|
| 157 | if (pread (fd, &val, 2, offset) != 2) |
---|
| 158 | { |
---|
| 159 | perror ("can't read the histogram"); |
---|
| 160 | return; |
---|
| 161 | } |
---|
| 162 | if (val < USHRT_MAX) |
---|
| 163 | ++val; |
---|
| 164 | if (pwrite (fd, &val, 2, offset) != 2) |
---|
| 165 | { |
---|
| 166 | perror ("can't write the histogram"); |
---|
| 167 | } |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | static void |
---|
| 171 | write_histogram (int fd) |
---|
| 172 | { |
---|
| 173 | struct gmon_hist_hdr hist_hdr; |
---|
| 174 | u_char tag = GMON_TAG_TIME_HIST; |
---|
| 175 | hist_hdr.low_pc = s_lowpc; |
---|
| 176 | hist_hdr.high_pc = s_highpc; |
---|
| 177 | hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */ |
---|
| 178 | hist_hdr.prof_rate = 100; /* Hertz. */ |
---|
| 179 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); |
---|
| 180 | hist_hdr.dimen_abbrev = 's'; |
---|
| 181 | struct iovec iov[2] = { |
---|
| 182 | {&tag, sizeof (tag)}, |
---|
| 183 | {&hist_hdr, sizeof (struct gmon_hist_hdr)} |
---|
| 184 | }; |
---|
| 185 | if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag)) |
---|
| 186 | perror ("can't write the histogram header"); |
---|
| 187 | |
---|
| 188 | /* Skip the already written histogram data. */ |
---|
| 189 | lseek (fd, hist_size, SEEK_CUR); |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | static void |
---|
| 193 | write_callgraph (int fd) |
---|
| 194 | { |
---|
| 195 | int fromindex, endfrom; |
---|
| 196 | uintptr_t frompc; |
---|
| 197 | int toindex; |
---|
| 198 | struct rawarc rawarc; |
---|
| 199 | u_char tag = GMON_TAG_CG_ARC; |
---|
| 200 | endfrom = s_textsize / (HASHFRACTION * sizeof (*froms)); |
---|
| 201 | for (fromindex = 0; fromindex < endfrom; ++fromindex) |
---|
| 202 | { |
---|
| 203 | if (froms[fromindex]) |
---|
| 204 | { |
---|
| 205 | frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms)); |
---|
| 206 | for (toindex = froms[fromindex]; toindex != 0; |
---|
| 207 | toindex = tos[toindex].link) |
---|
| 208 | { |
---|
| 209 | rawarc.raw_frompc = frompc; |
---|
| 210 | rawarc.raw_selfpc = tos[toindex].selfpc; |
---|
| 211 | rawarc.raw_count = tos[toindex].count; |
---|
| 212 | struct iovec iov[2] = { |
---|
| 213 | {&tag, sizeof (tag)}, |
---|
| 214 | {&rawarc, sizeof (struct rawarc)} |
---|
| 215 | }; |
---|
| 216 | if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc)) |
---|
| 217 | perror ("can't write the callgraph"); |
---|
| 218 | } |
---|
| 219 | } |
---|
| 220 | } |
---|
| 221 | } |
---|
| 222 | |
---|
| 223 | void |
---|
| 224 | __mcleanup (void) |
---|
| 225 | { |
---|
| 226 | struct gmon_hdr ghdr; |
---|
| 227 | |
---|
| 228 | /* Disable sampling. */ |
---|
| 229 | spu_timer_stop (timer_id); |
---|
| 230 | spu_timer_free (timer_id); |
---|
| 231 | spu_clock_stop (); |
---|
| 232 | |
---|
| 233 | /* Jump to the beginning of the gmon.out file. */ |
---|
| 234 | if (lseek (fd, 0, SEEK_SET) == -1) |
---|
| 235 | { |
---|
| 236 | perror ("Cannot seek to the beginning of the gmon.out file."); |
---|
| 237 | close (fd); |
---|
| 238 | return; |
---|
| 239 | } |
---|
| 240 | |
---|
| 241 | /* Write the gmon.out header. */ |
---|
| 242 | memset (&ghdr, '\0', sizeof (struct gmon_hdr)); |
---|
| 243 | memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie)); |
---|
| 244 | ghdr.version = GMON_VERSION; |
---|
| 245 | if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1) |
---|
| 246 | { |
---|
| 247 | perror ("Cannot write the gmon header to the gmon.out file."); |
---|
| 248 | close (fd); |
---|
| 249 | return; |
---|
| 250 | } |
---|
| 251 | |
---|
| 252 | /* Write the sampling buffer (histogram). */ |
---|
| 253 | write_histogram (fd); |
---|
| 254 | |
---|
| 255 | /* Write the call graph. */ |
---|
| 256 | write_callgraph (fd); |
---|
| 257 | |
---|
| 258 | close (fd); |
---|
| 259 | } |
---|
| 260 | |
---|
| 261 | void |
---|
| 262 | __monstartup (unsigned long long spu_id) |
---|
| 263 | { |
---|
| 264 | char filename[64]; |
---|
| 265 | s_lowpc = |
---|
| 266 | ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER)); |
---|
| 267 | s_highpc = |
---|
| 268 | ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER)); |
---|
| 269 | s_textsize = s_highpc - s_lowpc; |
---|
| 270 | |
---|
| 271 | hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER); |
---|
| 272 | |
---|
| 273 | /* Allocate froms. */ |
---|
| 274 | froms = malloc_ea (s_textsize / HASHFRACTION); |
---|
| 275 | if (froms == NULL) |
---|
| 276 | { |
---|
| 277 | fprintf (stderr, "Cannot allocate ea memory for the froms array.\n"); |
---|
| 278 | return; |
---|
| 279 | } |
---|
| 280 | memset_ea (froms, 0, s_textsize / HASHFRACTION); |
---|
| 281 | |
---|
| 282 | /* Determine tolimit. */ |
---|
| 283 | tolimit = s_textsize * ARCDENSITY / 100; |
---|
| 284 | if (tolimit < MINARCS) |
---|
| 285 | tolimit = MINARCS; |
---|
| 286 | |
---|
| 287 | /* Allocate tos. */ |
---|
| 288 | tos = malloc_ea (tolimit * sizeof (struct tostruct)); |
---|
| 289 | if (tos == NULL) |
---|
| 290 | { |
---|
| 291 | fprintf (stderr, "Cannot allocate ea memory for the tos array.\n"); |
---|
| 292 | return; |
---|
| 293 | } |
---|
| 294 | memset_ea (tos, 0, tolimit * sizeof (struct tostruct)); |
---|
| 295 | |
---|
| 296 | /* Determine the gmon.out file name. */ |
---|
| 297 | if (spu_id) |
---|
| 298 | snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out", |
---|
| 299 | linux_getpid (), spu_id, _EAR_); |
---|
| 300 | else |
---|
| 301 | strncpy (filename, "gmon.out", sizeof (filename)); |
---|
| 302 | /* Open the gmon.out file. */ |
---|
| 303 | fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); |
---|
| 304 | if (fd == -1) |
---|
| 305 | { |
---|
| 306 | char errstr[128]; |
---|
| 307 | snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename); |
---|
| 308 | perror (errstr); |
---|
| 309 | return; |
---|
| 310 | } |
---|
| 311 | /* Truncate the file up to the size where the histogram fits in. */ |
---|
| 312 | if (ftruncate (fd, |
---|
| 313 | sizeof (struct gmon_hdr) + 1 + |
---|
| 314 | sizeof (struct gmon_hist_hdr) + hist_size) == -1) |
---|
| 315 | { |
---|
| 316 | char errstr[128]; |
---|
| 317 | snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename); |
---|
| 318 | perror (errstr); |
---|
| 319 | return; |
---|
| 320 | } |
---|
| 321 | |
---|
| 322 | /* Start the histogram sampler. */ |
---|
| 323 | spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih); |
---|
| 324 | timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample); |
---|
| 325 | spu_clock_start (); |
---|
| 326 | spu_timer_start (timer_id); |
---|
| 327 | |
---|
| 328 | atexit (__mcleanup); |
---|
| 329 | } |
---|
| 330 | |
---|
| 331 | void |
---|
| 332 | __mcount_internal (uintptr_t frompc, uintptr_t selfpc) |
---|
| 333 | { |
---|
| 334 | /* sefpc: the address of the function just entered. */ |
---|
| 335 | /* frompc: the caller of the function just entered. */ |
---|
| 336 | unsigned int mach_stat; |
---|
| 337 | __ea unsigned short *frompcindex; |
---|
| 338 | unsigned short toindex; |
---|
| 339 | __ea struct tostruct *top; |
---|
| 340 | __ea struct tostruct *prevtop; |
---|
| 341 | |
---|
| 342 | /* Save current state and disable interrupts. */ |
---|
| 343 | mach_stat = spu_readch(SPU_RdMachStat); |
---|
| 344 | spu_idisable (); |
---|
| 345 | |
---|
| 346 | /* Sanity checks. */ |
---|
| 347 | if (frompc < s_lowpc || frompc > s_highpc) |
---|
| 348 | goto done; |
---|
| 349 | frompc -= s_lowpc; |
---|
| 350 | if (frompc > s_textsize) |
---|
| 351 | goto done; |
---|
| 352 | |
---|
| 353 | /* frompc indexes into the froms array the value at that position indexes |
---|
| 354 | into the tos array. */ |
---|
| 355 | frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))]; |
---|
| 356 | toindex = *frompcindex; |
---|
| 357 | if (toindex == 0) |
---|
| 358 | { |
---|
| 359 | /* First time traversing this arc link of tos[0] incremented. */ |
---|
| 360 | toindex = ++tos[0].link; |
---|
| 361 | /* Sanity check. */ |
---|
| 362 | if (toindex >= tolimit) |
---|
| 363 | { |
---|
| 364 | --tos[0].link; |
---|
| 365 | goto done; |
---|
| 366 | } |
---|
| 367 | /* Save the index into the froms array for the next time we traverse this arc. */ |
---|
| 368 | *frompcindex = toindex; |
---|
| 369 | top = &tos[toindex]; |
---|
| 370 | /* Sets the address of the function just entered. */ |
---|
| 371 | top->selfpc = selfpc; |
---|
| 372 | top->count = 1; |
---|
| 373 | top->link = 0; |
---|
| 374 | goto done; |
---|
| 375 | } |
---|
| 376 | |
---|
| 377 | /* toindex points to a tostruct */ |
---|
| 378 | top = &tos[toindex]; |
---|
| 379 | if (top->selfpc == selfpc) |
---|
| 380 | { |
---|
| 381 | /* The arc is at front of the chain. This is the most common case. */ |
---|
| 382 | top->count++; |
---|
| 383 | goto done; |
---|
| 384 | } |
---|
| 385 | |
---|
| 386 | /* top->selfpc != selfpc |
---|
| 387 | The pc we have got is not the pc we already stored (i.e. multiple function |
---|
| 388 | calls to the same fuction within a function. The arc is not at front of |
---|
| 389 | the chain. */ |
---|
| 390 | for (;;) |
---|
| 391 | { |
---|
| 392 | if (top->link == 0) |
---|
| 393 | { |
---|
| 394 | /* We are at the end of the chain and selfpc was not found. Thus we create |
---|
| 395 | a new tostruct and link it to the head of the chain. */ |
---|
| 396 | toindex = ++tos[0].link; |
---|
| 397 | /* Sanity check. */ |
---|
| 398 | if (toindex >= tolimit) |
---|
| 399 | { |
---|
| 400 | --tos[0].link; |
---|
| 401 | goto done; |
---|
| 402 | } |
---|
| 403 | top = &tos[toindex]; |
---|
| 404 | top->selfpc = selfpc; |
---|
| 405 | top->count = 1; |
---|
| 406 | /* Link back to the old tos entry. */ |
---|
| 407 | top->link = *frompcindex; |
---|
| 408 | /* Store a link to the new top in the froms array which makes the |
---|
| 409 | current tos head of the chain. */ |
---|
| 410 | *frompcindex = toindex; |
---|
| 411 | goto done; |
---|
| 412 | } |
---|
| 413 | else |
---|
| 414 | { |
---|
| 415 | /* Otherwise check the next arc on the chain. */ |
---|
| 416 | prevtop = top; |
---|
| 417 | top = &tos[top->link]; |
---|
| 418 | if (top->selfpc == selfpc) |
---|
| 419 | { |
---|
| 420 | /* selfpc matches; increment its count. */ |
---|
| 421 | top->count++; |
---|
| 422 | /* Move it to the head of the chain. */ |
---|
| 423 | /* Save previous tos index. */ |
---|
| 424 | toindex = prevtop->link; |
---|
| 425 | /* Link the former to to the current tos. */ |
---|
| 426 | prevtop->link = top->link; |
---|
| 427 | /* Link back to the old tos entry. */ |
---|
| 428 | top->link = *frompcindex; |
---|
| 429 | /* Store a link to the new top in the froms array which makes the |
---|
| 430 | current tos head of the chain. */ |
---|
| 431 | *frompcindex = toindex; |
---|
| 432 | goto done; |
---|
| 433 | } |
---|
| 434 | } |
---|
| 435 | } |
---|
| 436 | done: |
---|
| 437 | /* Enable interrupts if necessary. */ |
---|
| 438 | if (__builtin_expect (mach_stat & 1, 0)) |
---|
| 439 | spu_ienable (); |
---|
| 440 | } |
---|