/*- * Copyright (c) 2014-2015 Netflix, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include "eval_expr.h" __FBSDID("$FreeBSD$"); static int max_pmc_counters = 1; static int run_all = 0; #define MAX_COUNTER_SLOTS 1024 #define MAX_NLEN 64 #define MAX_CPU 64 static int verbose = 0; extern char **environ; extern struct expression *master_exp; struct expression *master_exp=NULL; #define PMC_INITIAL_ALLOC 512 extern char **valid_pmcs; char **valid_pmcs = NULL; extern int valid_pmc_cnt; int valid_pmc_cnt=0; extern int pmc_allocated_cnt; int pmc_allocated_cnt=0; /* * The following two varients on popen and pclose with * the cavet that they get you the PID so that you * can supply it to pclose so it can send a SIGTERM * to the process. */ static FILE * my_popen(const char *command, const char *dir, pid_t *p_pid) { FILE *io_out, *io_in; int pdesin[2], pdesout[2]; char *argv[4]; pid_t pid; char cmd[4]; char cmd2[1024]; char arg1[4]; if ((strcmp(dir, "r") != 0) && (strcmp(dir, "w") != 0)) { errno = EINVAL; return(NULL); } if (pipe(pdesin) < 0) return (NULL); if (pipe(pdesout) < 0) { (void)close(pdesin[0]); (void)close(pdesin[1]); return (NULL); } strcpy(cmd, "sh"); strcpy(arg1, "-c"); strcpy(cmd2, command); argv[0] = cmd; argv[1] = arg1; argv[2] = cmd2; argv[3] = NULL; switch (pid = fork()) { case -1: /* Error. */ (void)close(pdesin[0]); (void)close(pdesin[1]); (void)close(pdesout[0]); (void)close(pdesout[1]); return (NULL); /* NOTREACHED */ case 0: /* Child. */ /* Close out un-used sides */ (void)close(pdesin[1]); (void)close(pdesout[0]); /* Now prepare the stdin of the process */ close(0); (void)dup(pdesin[0]); (void)close(pdesin[0]); /* Now prepare the stdout of the process */ close(1); (void)dup(pdesout[1]); /* And lets do stderr just in case */ close(2); (void)dup(pdesout[1]); (void)close(pdesout[1]); /* Now run it */ execve("/bin/sh", argv, environ); exit(127); /* NOTREACHED */ } /* Parent; assume fdopen can't fail. */ /* Store the pid */ *p_pid = pid; if (strcmp(dir, "r") != 0) { io_out = fdopen(pdesin[1], "w"); (void)close(pdesin[0]); (void)close(pdesout[0]); (void)close(pdesout[1]); return(io_out); } else { /* Prepare the input stream */ io_in = fdopen(pdesout[0], "r"); (void)close(pdesout[1]); (void)close(pdesin[0]); (void)close(pdesin[1]); return (io_in); } } /* * pclose -- * Pclose returns -1 if stream is not associated with a `popened' command, * if already `pclosed', or waitpid returns an error. */ static void my_pclose(FILE *io, pid_t the_pid) { int pstat; pid_t pid; /* * Find the appropriate file pointer and remove it from the list. */ (void)fclose(io); /* Die if you are not dead! */ kill(the_pid, SIGTERM); do { pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); } while (pid == -1 && errno == EINTR); } struct counters { struct counters *next_cpu; char counter_name[MAX_NLEN]; /* Name of counter */ int cpu; /* CPU we are on */ int pos; /* Index we are filling to. */ uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ uint64_t sum; /* Summary of entries */ }; extern struct counters *glob_cpu[MAX_CPU]; struct counters *glob_cpu[MAX_CPU]; extern struct counters *cnts; struct counters *cnts=NULL; extern int ncnts; int ncnts=0; extern int (*expression)(struct counters *, int); int (*expression)(struct counters *, int); static const char *threshold=NULL; static const char *command; struct cpu_entry { const char *name; const char *thresh; const char *command; int (*func)(struct counters *, int); int counters_required; }; struct cpu_type { char cputype[32]; int number; struct cpu_entry *ents; void (*explain)(const char *name); }; extern struct cpu_type the_cpu; struct cpu_type the_cpu; static void explain_name_sb(const char *name) { const char *mythresh; if (strcmp(name, "allocstall1") == 0) { printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "allocstall2") == 0) { printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "br_miss") == 0) { printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "splitload") == 0) { printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "contested") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "blockstorefwd") == 0) { printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "cache2") == 0) { printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "cache1") == 0) { printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "dtlbmissload") == 0) { printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "frontendstall") == 0) { printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); mythresh = "thresh >= .15"; } else if (strcmp(name, "clears") == 0) { printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); printf(" MACHINE_CLEARS.SMC + \n"); printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .02"; } else if (strcmp(name, "microassist") == 0) { printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "fpassist") == 0) { printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistavx") == 0) { printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistsse") == 0) { printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .9"; } else if (strcmp(name, "eff2") == 0) { printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); mythresh = "thresh > 1.0"; } else if (strcmp(name, "dtlbmissstore") == 0) { printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .05"; } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } static void explain_name_ib(const char *name) { const char *mythresh; if (strcmp(name, "br_miss") == 0) { printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .9"; } else if (strcmp(name, "eff2") == 0) { printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); mythresh = "thresh > 1.0"; } else if (strcmp(name, "cache1") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "cache2") == 0) { printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "lcpstall") == 0) { printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "datashare") == 0) { printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "blockstorefwd") == 0) { printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "splitload") == 0) { printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "dtlbmissload") == 0) { printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "dtlbmissstore") == 0) { printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "contested") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "clears") == 0) { printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); printf(" MACHINE_CLEARS.SMC + \n"); printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .02"; } else if (strcmp(name, "microassist") == 0) { printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "fpassist") == 0) { printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistavx") == 0) { printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistsse") == 0) { printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } static void explain_name_has(const char *name) { const char *mythresh; if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; } else if (strcmp(name, "eff2") == 0) { printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "lcpstall") == 0) { printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "cache1") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "cache2") == 0) { printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "contested") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "datashare") == 0) { printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "blockstorefwd") == 0) { printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "splitload") == 0) { printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "dtlbmissload") == 0) { printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "br_miss") == 0) { printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "clears") == 0) { printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); printf(" MACHINE_CLEARS.SMC + \n"); printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .02"; } else if (strcmp(name, "microassist") == 0) { printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "fpassist") == 0) { printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistavx") == 0) { printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistsse") == 0) { printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } static struct counters * find_counter(struct counters *base, const char *name) { struct counters *at; int len; at = base; len = strlen(name); while(at) { if (strncmp(at->counter_name, name, len) == 0) { return(at); } at = at->next_cpu; } printf("Can't find counter %s\n", name); printf("We have:\n"); at = base; while(at) { printf("- %s\n", at->counter_name); at = at->next_cpu; } exit(-1); } static int allocstall1(struct counters *cpu, int pos) { /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ int ret; struct counters *partial; struct counters *unhalt; double un, par, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); if (pos != -1) { par = partial->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { par = partial->sum * 1.0; un = unhalt->sum * 1.0; } res = par/un; ret = printf("%1.3f", res); return(ret); } static int allocstall2(struct counters *cpu, int pos) { /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *partial; struct counters *unhalt; double un, par, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); if (pos != -1) { par = partial->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { par = partial->sum * 1.0; un = unhalt->sum * 1.0; } res = par/un; ret = printf("%1.3f", res); return(ret); } static int br_mispredict(struct counters *cpu, int pos) { struct counters *brctr; struct counters *unhalt; int ret; /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ double br, un, con, res; con = 20.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); if (pos != -1) { br = brctr->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { br = brctr->sum * 1.0; un = unhalt->sum * 1.0; } res = (con * br)/un; ret = printf("%1.3f", res); return(ret); } static int br_mispredictib(struct counters *cpu, int pos) { struct counters *brctr; struct counters *unhalt; struct counters *clear, *clear2, *clear3; struct counters *uops; struct counters *recv; struct counters *iss; /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ int ret; /* * (BR_MISP_RETIRED.ALL_BRANCHES / * (BR_MISP_RETIRED.ALL_BRANCHES + * MACHINE_CLEAR.COUNT) * * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) * */ double br, cl, cl2, cl3, uo, re, un, con, res, is; con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); iss = find_counter(cpu, "UOPS_ISSUED.ANY"); recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); if (pos != -1) { br = brctr->vals[pos] * 1.0; cl = clear->vals[pos] * 1.0; cl2 = clear2->vals[pos] * 1.0; cl3 = clear3->vals[pos] * 1.0; uo = uops->vals[pos] * 1.0; re = recv->vals[pos] * 1.0; is = iss->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { br = brctr->sum * 1.0; cl = clear->sum * 1.0; cl2 = clear2->sum * 1.0; cl3 = clear3->sum * 1.0; uo = uops->sum * 1.0; re = recv->sum * 1.0; is = iss->sum * 1.0; un = unhalt->sum * 1.0; } res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); ret = printf("%1.3f", res); return(ret); } static int br_mispredict_broad(struct counters *cpu, int pos) { struct counters *brctr; struct counters *unhalt; struct counters *clear; struct counters *uops; struct counters *uops_ret; struct counters *recv; int ret; double br, cl, uo, uo_r, re, con, un, res; con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); uops = find_counter(cpu, "UOPS_ISSUED.ANY"); uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); if (pos != -1) { un = unhalt->vals[pos] * 1.0; br = brctr->vals[pos] * 1.0; cl = clear->vals[pos] * 1.0; uo = uops->vals[pos] * 1.0; uo_r = uops_ret->vals[pos] * 1.0; re = recv->vals[pos] * 1.0; } else { un = unhalt->sum * 1.0; br = brctr->sum * 1.0; cl = clear->sum * 1.0; uo = uops->sum * 1.0; uo_r = uops_ret->sum * 1.0; re = recv->sum * 1.0; } res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); ret = printf("%1.3f", res); return(ret); } static int splitloadib(struct counters *cpu, int pos) { int ret; struct counters *mem; struct counters *l1d, *ldblock; struct counters *unhalt; double un, memd, res, l1, ldb; /* * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", */ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); if (pos != -1) { memd = mem->vals[pos] * 1.0; l1 = l1d->vals[pos] * 1.0; ldb = ldblock->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; l1 = l1d->sum * 1.0; ldb = ldblock->sum * 1.0; un = unhalt->sum * 1.0; } res = ((l1 / memd) * ldb)/un; ret = printf("%1.3f", res); return(ret); } static int splitload(struct counters *cpu, int pos) { int ret; struct counters *mem; struct counters *unhalt; double con, un, memd, res; /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ con = 5.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); if (pos != -1) { memd = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (memd * con)/un; ret = printf("%1.3f", res); return(ret); } static int splitload_sb(struct counters *cpu, int pos) { int ret; struct counters *mem; struct counters *unhalt; double con, un, memd, res; /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ con = 5.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); if (pos != -1) { memd = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (memd * con)/un; ret = printf("%1.3f", res); return(ret); } static int splitstore_sb(struct counters *cpu, int pos) { /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ int ret; struct counters *mem_split; struct counters *mem_stores; double memsplit, memstore, res; mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); if (pos != -1) { memsplit = mem_split->vals[pos] * 1.0; memstore = mem_stores->vals[pos] * 1.0; } else { memsplit = mem_split->sum * 1.0; memstore = mem_stores->sum * 1.0; } res = memsplit/memstore; ret = printf("%1.3f", res); return(ret); } static int splitstore(struct counters *cpu, int pos) { /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ int ret; struct counters *mem_split; struct counters *mem_stores; double memsplit, memstore, res; mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); if (pos != -1) { memsplit = mem_split->vals[pos] * 1.0; memstore = mem_stores->vals[pos] * 1.0; } else { memsplit = mem_split->sum * 1.0; memstore = mem_stores->sum * 1.0; } res = memsplit/memstore; ret = printf("%1.3f", res); return(ret); } static int contested(struct counters *cpu, int pos) { /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, memd, res; con = 60.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); if (pos != -1) { memd = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (memd * con)/un; ret = printf("%1.3f", res); return(ret); } static int contested_has(struct counters *cpu, int pos) { /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, memd, res; con = 84.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); if (pos != -1) { memd = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (memd * con)/un; ret = printf("%1.3f", res); return(ret); } static int contestedbroad(struct counters *cpu, int pos) { /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *mem2; struct counters *unhalt; double con, un, memd, memtoo, res; con = 84.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); if (pos != -1) { memd = mem->vals[pos] * 1.0; memtoo = mem2->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { memd = mem->sum * 1.0; memtoo = mem2->sum * 1.0; un = unhalt->sum * 1.0; } res = ((memd * con) + memtoo)/un; ret = printf("%1.3f", res); return(ret); } static int blockstoreforward(struct counters *cpu, int pos) { /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ int ret; struct counters *ldb; struct counters *unhalt; double con, un, ld, res; con = 13.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); if (pos != -1) { ld = ldb->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ld = ldb->sum * 1.0; un = unhalt->sum * 1.0; } res = (ld * con)/un; ret = printf("%1.3f", res); return(ret); } static int cache2(struct counters *cpu, int pos) { /* ** Suspect *** * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem1, *mem2, *mem3; struct counters *unhalt; double con1, con2, con3, un, me_1, me_2, me_3, res; con1 = 26.0; con2 = 43.0; con3 = 60.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); if (pos != -1) { me_1 = mem1->vals[pos] * 1.0; me_2 = mem2->vals[pos] * 1.0; me_3 = mem3->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me_1 = mem1->sum * 1.0; me_2 = mem2->sum * 1.0; me_3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; ret = printf("%1.3f", res); return(ret); } static int datasharing(struct counters *cpu, int pos) { /* * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; struct counters *unhalt; double con, res, me, un; con = 43.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (me * con)/un; ret = printf("%1.3f", res); return(ret); } static int datasharing_has(struct counters *cpu, int pos) { /* * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; struct counters *unhalt; double con, res, me, un; con = 72.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (me * con)/un; ret = printf("%1.3f", res); return(ret); } static int cache2ib(struct counters *cpu, int pos) { /* * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, me, res; con = 29.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (con * me)/un; ret = printf("%1.3f", res); return(ret); } static int cache2has(struct counters *cpu, int pos) { /* * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) * / CPU_CLK_UNHALTED.THREAD_P */ int ret; struct counters *mem1, *mem2, *mem3; struct counters *unhalt; double con1, con2, con3, un, me1, me2, me3, res; con1 = 36.0; con2 = 72.0; con3 = 84.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); if (pos != -1) { me1 = mem1->vals[pos] * 1.0; me2 = mem2->vals[pos] * 1.0; me3 = mem3->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me1 = mem1->sum * 1.0; me2 = mem2->sum * 1.0; me3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; ret = printf("%1.3f", res); return(ret); } static int cache2broad(struct counters *cpu, int pos) { /* * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, me, res; con = 36.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (con * me)/un; ret = printf("%1.3f", res); return(ret); } static int cache1(struct counters *cpu, int pos) { /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, me, res; con = 180.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (me * con)/un; ret = printf("%1.3f", res); return(ret); } static int cache1ib(struct counters *cpu, int pos) { /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, me, res; con = 180.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (me * con)/un; ret = printf("%1.3f", res); return(ret); } static int cache1broad(struct counters *cpu, int pos) { /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; double con, un, me, res; con = 180.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); if (pos != -1) { me = mem->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { me = mem->sum * 1.0; un = unhalt->sum * 1.0; } res = (me * con)/un; ret = printf("%1.3f", res); return(ret); } static int dtlb_missload(struct counters *cpu, int pos) { /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ int ret; struct counters *dtlb_m, *dtlb_d; struct counters *unhalt; double con, un, d1, d2, res; con = 7.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); if (pos != -1) { d1 = dtlb_m->vals[pos] * 1.0; d2 = dtlb_d->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { d1 = dtlb_m->sum * 1.0; d2 = dtlb_d->sum * 1.0; un = unhalt->sum * 1.0; } res = ((d1 * con) + d2)/un; ret = printf("%1.3f", res); return(ret); } static int dtlb_missstore(struct counters *cpu, int pos) { /* * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / * CPU_CLK_UNHALTED.THREAD_P (t >= .1) */ int ret; struct counters *dtsb_m, *dtsb_d; struct counters *unhalt; double con, un, d1, d2, res; con = 7.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); if (pos != -1) { d1 = dtsb_m->vals[pos] * 1.0; d2 = dtsb_d->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { d1 = dtsb_m->sum * 1.0; d2 = dtsb_d->sum * 1.0; un = unhalt->sum * 1.0; } res = ((d1 * con) + d2)/un; ret = printf("%1.3f", res); return(ret); } static int itlb_miss(struct counters *cpu, int pos) { /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ int ret; struct counters *itlb; struct counters *unhalt; double un, d1, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); if (pos != -1) { d1 = itlb->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { d1 = itlb->sum * 1.0; un = unhalt->sum * 1.0; } res = d1/un; ret = printf("%1.3f", res); return(ret); } static int itlb_miss_broad(struct counters *cpu, int pos) { /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ int ret; struct counters *itlb; struct counters *unhalt; struct counters *four_k; double un, d1, res, k; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); if (pos != -1) { d1 = itlb->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; k = four_k->vals[pos] * 1.0; } else { d1 = itlb->sum * 1.0; un = unhalt->sum * 1.0; k = four_k->sum * 1.0; } res = (7.0 * k + d1)/un; ret = printf("%1.3f", res); return(ret); } static int icache_miss(struct counters *cpu, int pos) { /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ int ret; struct counters *itlb, *icache; struct counters *unhalt; double un, d1, ic, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); if (pos != -1) { d1 = itlb->vals[pos] * 1.0; ic = icache->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { d1 = itlb->sum * 1.0; ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } res = (ic-d1)/un; ret = printf("%1.3f", res); return(ret); } static int icache_miss_has(struct counters *cpu, int pos) { /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ int ret; struct counters *icache; struct counters *unhalt; double un, con, ic, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); icache = find_counter(cpu, "ICACHE.MISSES"); con = 36.0; if (pos != -1) { ic = icache->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } res = (con * ic)/un; ret = printf("%1.3f", res); return(ret); } static int lcp_stall(struct counters *cpu, int pos) { /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ int ret; struct counters *ild; struct counters *unhalt; double un, d1, res; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); ild = find_counter(cpu, "ILD_STALL.LCP"); if (pos != -1) { d1 = ild->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { d1 = ild->sum * 1.0; un = unhalt->sum * 1.0; } res = d1/un; ret = printf("%1.3f", res); return(ret); } static int frontendstall(struct counters *cpu, int pos) { /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ int ret; struct counters *idq; struct counters *unhalt; double con, un, id, res; con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); if (pos != -1) { id = idq->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { id = idq->sum * 1.0; un = unhalt->sum * 1.0; } res = id/(un * con); ret = printf("%1.3f", res); return(ret); } static int clears(struct counters *cpu, int pos) { /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ int ret; struct counters *clr1, *clr2, *clr3; struct counters *unhalt; double con, un, cl1, cl2, cl3, res; con = 100.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); if (pos != -1) { cl1 = clr1->vals[pos] * 1.0; cl2 = clr2->vals[pos] * 1.0; cl3 = clr3->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { cl1 = clr1->sum * 1.0; cl2 = clr2->sum * 1.0; cl3 = clr3->sum * 1.0; un = unhalt->sum * 1.0; } res = ((cl1 + cl2 + cl3) * con)/un; ret = printf("%1.3f", res); return(ret); } static int clears_broad(struct counters *cpu, int pos) { int ret; struct counters *clr1, *clr2, *clr3, *cyc; struct counters *unhalt; double con, un, cl1, cl2, cl3, cy, res; con = 100.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); if (pos != -1) { cl1 = clr1->vals[pos] * 1.0; cl2 = clr2->vals[pos] * 1.0; cl3 = clr3->vals[pos] * 1.0; cy = cyc->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { cl1 = clr1->sum * 1.0; cl2 = clr2->sum * 1.0; cl3 = clr3->sum * 1.0; cy = cyc->sum * 1.0; un = unhalt->sum * 1.0; } /* Formula not listed but extrapulated to add the cy ?? */ res = ((cl1 + cl2 + cl3 + cy) * con)/un; ret = printf("%1.3f", res); return(ret); } static int microassist(struct counters *cpu, int pos) { /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ int ret; struct counters *idq; struct counters *unhalt; double un, id, res, con; con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); idq = find_counter(cpu, "IDQ.MS_UOPS"); if (pos != -1) { id = idq->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { id = idq->sum * 1.0; un = unhalt->sum * 1.0; } res = id/(un * con); ret = printf("%1.3f", res); return(ret); } static int microassist_broad(struct counters *cpu, int pos) { int ret; struct counters *idq; struct counters *unhalt; struct counters *uopiss; struct counters *uopret; double un, id, res, con, uoi, uor; con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); idq = find_counter(cpu, "IDQ.MS_UOPS"); uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); if (pos != -1) { id = idq->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; uoi = uopiss->vals[pos] * 1.0; uor = uopret->vals[pos] * 1.0; } else { id = idq->sum * 1.0; un = unhalt->sum * 1.0; uoi = uopiss->sum * 1.0; uor = uopret->sum * 1.0; } res = (uor/uoi) * (id/(un * con)); ret = printf("%1.3f", res); return(ret); } static int aliasing(struct counters *cpu, int pos) { /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; con = 5.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); if (pos != -1) { lds = ld->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } res = (lds * con)/un; ret = printf("%1.3f", res); return(ret); } static int aliasing_broad(struct counters *cpu, int pos) { /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; con = 7.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); if (pos != -1) { lds = ld->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } res = (lds * con)/un; ret = printf("%1.3f", res); return(ret); } static int fpassists(struct counters *cpu, int pos) { /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ int ret; struct counters *fp; struct counters *inst; double un, fpd, res; inst = find_counter(cpu, "INST_RETIRED.ANY_P"); fp = find_counter(cpu, "FP_ASSIST.ANY"); if (pos != -1) { fpd = fp->vals[pos] * 1.0; un = inst->vals[pos] * 1.0; } else { fpd = fp->sum * 1.0; un = inst->sum * 1.0; } res = fpd/un; ret = printf("%1.3f", res); return(ret); } static int otherassistavx(struct counters *cpu, int pos) { /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; con = 75.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); if (pos != -1) { ot = oth->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } res = (ot * con)/un; ret = printf("%1.3f", res); return(ret); } static int otherassistsse(struct counters *cpu, int pos) { int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ con = 75.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); if (pos != -1) { ot = oth->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } res = (ot * con)/un; ret = printf("%1.3f", res); return(ret); } static int efficiency1(struct counters *cpu, int pos) { int ret; struct counters *uops; struct counters *unhalt; double un, ot, con, res; /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); if (pos != -1) { ot = uops->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } res = ot/(con * un); ret = printf("%1.3f", res); return(ret); } static int efficiency2(struct counters *cpu, int pos) { int ret; struct counters *uops; struct counters *unhalt; double un, ot, res; /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "INST_RETIRED.ANY_P"); if (pos != -1) { ot = uops->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; } else { ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } res = un/ot; ret = printf("%1.3f", res); return(ret); } #define SANDY_BRIDGE_COUNT 20 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { /*01*/ { "allocstall1", "thresh > .05", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", allocstall1, 2 }, /* -- not defined for SB right (partial-rat_stalls) 02*/ { "allocstall2", "thresh > .05", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", allocstall2, 2 }, /*03*/ { "br_miss", "thresh >= .2", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", br_mispredict, 2 }, /*04*/ { "splitload", "thresh >= .1", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", splitload_sb, 2 }, /* 05*/ { "splitstore", "thresh >= .01", "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", splitstore_sb, 2 }, /*06*/ { "contested", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", contested, 2 }, /*07*/ { "blockstorefwd", "thresh >= .05", "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", blockstoreforward, 2 }, /*08*/ { "cache2", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache2, 4 }, /*09*/ { "cache1", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache1, 2 }, /*10*/ { "dtlbmissload", "thresh >= .1", "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missload, 3 }, /*11*/ { "dtlbmissstore", "thresh >= .05", "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missstore, 3 }, /*12*/ { "frontendstall", "thresh >= .15", "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", frontendstall, 2 }, /*13*/ { "clears", "thresh >= .02", "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", clears, 4 }, /*14*/ { "microassist", "thresh >= .05", "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", microassist, 2 }, /*15*/ { "aliasing_4k", "thresh >= .1", "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", aliasing, 2 }, /*16*/ { "fpassist", "look for a excessive value", "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", fpassists, 2 }, /*17*/ { "otherassistavx", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistavx, 2}, /*18*/ { "otherassistsse", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistsse, 2 }, /*19*/ { "eff1", "thresh < .9", "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency1, 2 }, /*20*/ { "eff2", "thresh > 1.0", "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency2, 2 }, }; #define IVY_BRIDGE_COUNT 21 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { /*1*/ { "eff1", "thresh < .75", "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency1, 2 }, /*2*/ { "eff2", "thresh > 1.0", "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency2, 2 }, /*3*/ { "itlbmiss", "thresh > .05", "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", itlb_miss, 2 }, /*4*/ { "icachemiss", "thresh > .05", "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", icache_miss, 3 }, /*5*/ { "lcpstall", "thresh > .05", "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", lcp_stall, 2 }, /*6*/ { "cache1", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache1ib, 2 }, /*7*/ { "cache2", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache2ib, 2 }, /*8*/ { "contested", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", contested, 2 }, /*9*/ { "datashare", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", datasharing, 2 }, /*10*/ { "blockstorefwd", "thresh >= .05", "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", blockstoreforward, 2 }, /*11*/ { "splitload", "thresh >= .1", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", splitloadib, 4 }, /*12*/ { "splitstore", "thresh >= .01", "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", splitstore, 2 }, /*13*/ { "aliasing_4k", "thresh >= .1", "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", aliasing, 2 }, /*14*/ { "dtlbmissload", "thresh >= .1", "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missload , 3}, /*15*/ { "dtlbmissstore", "thresh >= .05", "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missstore, 3 }, /*16*/ { "br_miss", "thresh >= .2", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", br_mispredictib, 8 }, /*17*/ { "clears", "thresh >= .02", "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", clears, 4 }, /*18*/ { "microassist", "thresh >= .05", "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", microassist, 2 }, /*19*/ { "fpassist", "look for a excessive value", "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", fpassists, 2 }, /*20*/ { "otherassistavx", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistavx , 2}, /*21*/ { "otherassistsse", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistsse, 2 }, }; #define HASWELL_COUNT 20 static struct cpu_entry haswell[HASWELL_COUNT] = { /*1*/ { "eff1", "thresh < .75", "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency1, 2 }, /*2*/ { "eff2", "thresh > 1.0", "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency2, 2 }, /*3*/ { "itlbmiss", "thresh > .05", "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", itlb_miss, 2 }, /*4*/ { "icachemiss", "thresh > .05", "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", icache_miss_has, 2 }, /*5*/ { "lcpstall", "thresh > .05", "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", lcp_stall, 2 }, /*6*/ { "cache1", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache1ib, 2 }, /*7*/ { "cache2", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache2has, 4 }, /*8*/ { "contested", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", contested_has, 2 }, /*9*/ { "datashare", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", datasharing_has, 2 }, /*10*/ { "blockstorefwd", "thresh >= .05", "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", blockstoreforward, 2 }, /*11*/ { "splitload", "thresh >= .1", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", splitload , 2}, /*12*/ { "splitstore", "thresh >= .01", "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", splitstore, 2 }, /*13*/ { "aliasing_4k", "thresh >= .1", "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", aliasing, 2 }, /*14*/ { "dtlbmissload", "thresh >= .1", "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missload, 3 }, /*15*/ { "br_miss", "thresh >= .2", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", br_mispredict, 2 }, /*16*/ { "clears", "thresh >= .02", "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", clears, 4 }, /*17*/ { "microassist", "thresh >= .05", "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", microassist, 2 }, /*18*/ { "fpassist", "look for a excessive value", "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", fpassists, 2 }, /*19*/ { "otherassistavx", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistavx, 2 }, /*20*/ { "otherassistsse", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistsse, 2 }, }; static void explain_name_broad(const char *name) { const char *mythresh; if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; } else if (strcmp(name, "eff2") == 0) { printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); mythresh = "thresh > .05"; } else if (strcmp(name, "lcpstall") == 0) { printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "cache1") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "cache2") == 0) { printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "contested") == 0) { printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "datashare") == 0) { printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; } else if (strcmp(name, "blockstorefwd") == 0) { printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "dtlbmissload") == 0) { printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "br_miss") == 0) { printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "clears") == 0) { printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); printf(" MACHINE_CLEARS.SMC + \n"); printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .02"; } else if (strcmp(name, "fpassist") == 0) { printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "otherassistavx") == 0) { printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "look for a excessive value"; } else if (strcmp(name, "microassist") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); mythresh = "thresh >= .05"; } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } #define BROADWELL_COUNT 17 static struct cpu_entry broadwell[BROADWELL_COUNT] = { /*1*/ { "eff1", "thresh < .75", "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency1, 2 }, /*2*/ { "eff2", "thresh > 1.0", "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", efficiency2, 2 }, /*3*/ { "itlbmiss", "thresh > .05", "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", itlb_miss_broad, 3 }, /*4*/ { "icachemiss", "thresh > .05", "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", icache_miss_has, 2 }, /*5*/ { "lcpstall", "thresh > .05", "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", lcp_stall, 2 }, /*6*/ { "cache1", "thresh >= .1", "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache1broad, 2 }, /*7*/ { "cache2", "thresh >= .2", "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", cache2broad, 2 }, /*8*/ { "contested", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", contestedbroad, 2 }, /*9*/ { "datashare", "thresh >= .05", "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", datasharing_has, 2 }, /*10*/ { "blockstorefwd", "thresh >= .05", "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", blockstoreforward, 2 }, /*11*/ { "aliasing_4k", "thresh >= .1", "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", aliasing_broad, 2 }, /*12*/ { "dtlbmissload", "thresh >= .1", "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", dtlb_missload, 3 }, /*13*/ { "br_miss", "thresh >= .2", "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", br_mispredict_broad, 7 }, /*14*/ { "clears", "thresh >= .02", "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", clears_broad, 5 }, /*15*/ { "fpassist", "look for a excessive value", "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", fpassists, 2 }, /*16*/ { "otherassistavx", "look for a excessive value", "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", otherassistavx, 2 }, /*17*/ { "microassist", "thresh >= .2", "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", microassist_broad, 4 }, }; static void set_sandybridge(void) { strcpy(the_cpu.cputype, "SandyBridge PMC"); the_cpu.number = SANDY_BRIDGE_COUNT; the_cpu.ents = sandy_bridge; the_cpu.explain = explain_name_sb; } static void set_ivybridge(void) { strcpy(the_cpu.cputype, "IvyBridge PMC"); the_cpu.number = IVY_BRIDGE_COUNT; the_cpu.ents = ivy_bridge; the_cpu.explain = explain_name_ib; } static void set_haswell(void) { strcpy(the_cpu.cputype, "HASWELL PMC"); the_cpu.number = HASWELL_COUNT; the_cpu.ents = haswell; the_cpu.explain = explain_name_has; } static void set_broadwell(void) { strcpy(the_cpu.cputype, "HASWELL PMC"); the_cpu.number = BROADWELL_COUNT; the_cpu.ents = broadwell; the_cpu.explain = explain_name_broad; } static int set_expression(const char *name) { int found = 0, i; for(i=0 ; i< the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; expression = the_cpu.ents[i].func; command = the_cpu.ents[i].command; threshold = the_cpu.ents[i].thresh; if (the_cpu.ents[i].counters_required > max_pmc_counters) { printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", the_cpu.ents[i].name, the_cpu.ents[i].counters_required, max_pmc_counters); printf("Sorry this test can not be run\n"); if (run_all == 0) { exit(-1); } else { return(-1); } } break; } } if (!found) { printf("For CPU type %s we have no expression:%s\n", the_cpu.cputype, name); exit(-1); } return(0); } static int validate_expression(char *name) { int i, found; found = 0; for(i=0 ; i< the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; break; } } if (!found) { return(-1); } return (0); } static void do_expression(struct counters *cpu, int pos) { if (expression == NULL) return; (*expression)(cpu, pos); } static void process_header(int idx, char *p) { struct counters *up; int i, len, nlen; /* * Given header element idx, at p in * form 's/NN/nameof' * process the entry to pull out the name and * the CPU number. */ if (strncmp(p, "s/", 2)) { printf("Check -- invalid header no s/ in %s\n", p); return; } up = &cnts[idx]; up->cpu = strtol(&p[2], NULL, 10); len = strlen(p); for (i=2; icounter_name, &p[(i+1)]); } else { strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); } } } } static void build_counters_from_header(FILE *io) { char buffer[8192], *p; int i, len, cnt; size_t mlen; /* We have a new start, lets * setup our headers and cpus. */ if (fgets(buffer, sizeof(buffer), io) == NULL) { printf("First line can't be read from file err:%d\n", errno); return; } /* * Ok output is an array of counters. Once * we start to read the values in we must * put them in there slot to match there CPU and * counter being updated. We create a mass array * of the counters, filling in the CPU and * counter name. */ /* How many do we get? */ len = strlen(buffer); for (i=0, cnt=0; inext_cpu) { /* Already laced in */ continue; } lace_cpu = cpat->cpu; if (lace_cpu >= MAX_CPU) { printf("CPU %d to big\n", lace_cpu); continue; } if (glob_cpu[lace_cpu] == NULL) { glob_cpu[lace_cpu] = cpat; } else { /* Already processed this cpu */ continue; } /* Ok look forward for cpu->cpu and link in */ for(j=(i+1); jnext_cpu) { continue; } if (at->cpu == lace_cpu) { /* Found one */ cpat->next_cpu = at; cpat = at; } } } } static void process_file(char *filename) { FILE *io; int i; int line_at, not_done; pid_t pid_of_command=0; if (filename == NULL) { io = my_popen(command, "r", &pid_of_command); } else { io = fopen(filename, "r"); if (io == NULL) { printf("Can't process file %s err:%d\n", filename, errno); return; } } build_counters_from_header(io); if (cnts == NULL) { /* Nothing we can do */ printf("Nothing to do -- no counters built\n"); if (io) { fclose(io); } return; } lace_cpus_together(); print_header(); if (verbose) { for (i=0; i= max_to_collect) { not_done = 0; } if (filename == NULL) { int cnt; /* For the ones we dynamically open we print now */ for(i=0, cnt=0; i> 12) | ((eax & 0xF0) >> 4)); printf("CPU model is 0x%x id:0x%lx\n", model, eax); switch (eax & 0xF00) { case 0x500: /* Pentium family processors */ printf("Intel Pentium P5\n"); goto not_supported; break; case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ switch (model) { case 0x1: printf("Intel Pentium P6\n"); goto not_supported; break; case 0x3: case 0x5: printf("Intel PII\n"); goto not_supported; break; case 0x6: case 0x16: printf("Intel CL\n"); goto not_supported; break; case 0x7: case 0x8: case 0xA: case 0xB: printf("Intel PIII\n"); goto not_supported; break; case 0x9: case 0xD: printf("Intel PM\n"); goto not_supported; break; case 0xE: printf("Intel CORE\n"); goto not_supported; break; case 0xF: printf("Intel CORE2\n"); goto not_supported; break; case 0x17: printf("Intel CORE2EXTREME\n"); goto not_supported; break; case 0x1C: /* Per Intel document 320047-002. */ printf("Intel ATOM\n"); goto not_supported; break; case 0x1A: case 0x1E: /* * Per Intel document 253669-032 9/2009, * pages A-2 and A-57 */ case 0x1F: /* * Per Intel document 253669-032 9/2009, * pages A-2 and A-57 */ printf("Intel COREI7\n"); goto not_supported; break; case 0x2E: printf("Intel NEHALEM\n"); goto not_supported; break; case 0x25: /* Per Intel document 253669-033US 12/2009. */ case 0x2C: /* Per Intel document 253669-033US 12/2009. */ printf("Intel WESTMERE\n"); goto not_supported; break; case 0x2F: /* Westmere-EX, seen in wild */ printf("Intel WESTMERE\n"); goto not_supported; break; case 0x2A: /* Per Intel document 253669-039US 05/2011. */ printf("Intel SANDYBRIDGE\n"); set_sandybridge(); break; case 0x2D: /* Per Intel document 253669-044US 08/2012. */ printf("Intel SANDYBRIDGE_XEON\n"); set_sandybridge(); break; case 0x3A: /* Per Intel document 253669-043US 05/2012. */ printf("Intel IVYBRIDGE\n"); set_ivybridge(); break; case 0x3E: /* Per Intel document 325462-045US 01/2013. */ printf("Intel IVYBRIDGE_XEON\n"); set_ivybridge(); break; case 0x3F: /* Per Intel document 325462-045US 09/2014. */ printf("Intel HASWELL (Xeon)\n"); set_haswell(); break; case 0x3C: /* Per Intel document 325462-045US 01/2013. */ case 0x45: case 0x46: printf("Intel HASWELL\n"); set_haswell(); break; case 0x4e: case 0x5e: printf("Intel SKY-LAKE\n"); goto not_supported; break; case 0x3D: case 0x47: printf("Intel BROADWELL\n"); set_broadwell(); break; case 0x4f: case 0x56: printf("Intel BROADWEL (Xeon)\n"); set_broadwell(); break; case 0x4D: /* Per Intel document 330061-001 01/2014. */ printf("Intel ATOM_SILVERMONT\n"); goto not_supported; break; default: printf("Intel model 0x%x is not known -- sorry\n", model); goto not_supported; break; } break; case 0xF00: /* P4 */ printf("Intel unknown model %d\n", model); goto not_supported; break; } do_cpuid(0xa, 0, reg); max_pmc_counters = (reg[3] & 0x0000000f) + 1; printf("We have %d PMC counters to work with\n", max_pmc_counters); /* Ok lets load the list of all known PMC's */ io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); if (valid_pmcs == NULL) { /* Likely */ pmc_allocated_cnt = PMC_INITIAL_ALLOC; sz = sizeof(char *) * pmc_allocated_cnt; valid_pmcs = malloc(sz); if (valid_pmcs == NULL) { printf("No memory allocation fails at startup?\n"); exit(-1); } memset(valid_pmcs, 0, sz); } while (fgets(linebuf, sizeof(linebuf), io) != NULL) { if (linebuf[0] != '\t') { /* sometimes headers ;-) */ continue; } len = strlen(linebuf); if (linebuf[(len-1)] == '\n') { /* Likely */ linebuf[(len-1)] = 0; } str = &linebuf[1]; len = strlen(str) + 1; valid_pmcs[valid_pmc_cnt] = malloc(len); if (valid_pmcs[valid_pmc_cnt] == NULL) { printf("No memory2 allocation fails at startup?\n"); exit(-1); } memset(valid_pmcs[valid_pmc_cnt], 0, len); strcpy(valid_pmcs[valid_pmc_cnt], str); valid_pmc_cnt++; if (valid_pmc_cnt >= pmc_allocated_cnt) { /* Got to expand -- unlikely */ char **more; sz = sizeof(char *) * (pmc_allocated_cnt * 2); more = malloc(sz); if (more == NULL) { printf("No memory3 allocation fails at startup?\n"); exit(-1); } memset(more, 0, sz); memcpy(more, valid_pmcs, sz); pmc_allocated_cnt *= 2; free(valid_pmcs); valid_pmcs = more; } } my_pclose(io, pid_of_command); return; not_supported: printf("Not supported\n"); exit(-1); } static void explain_all(void) { int i; printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); printf("-------------------------------------------------------------\n"); for(i=0; itype == TYPE_VALUE_PMC) { cnt_pmc++; } at = at->next; } if (cnt_pmc == 0) { printf("No PMC's in your expression -- nothing to do!!\n"); exit(0); } mal = cnt_pmc * sizeof(char *); vars = malloc(mal); if (vars == NULL) { printf("No memory\n"); exit(-1); } memset(vars, 0, mal); at = exp; while (at) { if (at->type == TYPE_VALUE_PMC) { if(add_it_to(vars, alloced_pmcs, at->name)) { alloced_pmcs++; } } at = at->next; } /* Now we have a unique list in vars so create our command */ mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ for(i=0; itype == TYPE_VALUE_PMC) { var = find_counter(cpu, at->name); if (var == NULL) { printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); exit(-1); } if (pos != -1) { at->value = var->vals[pos] * 1.0; } else { at->value = var->sum * 1.0; } } at = at->next; } res = run_expr(master_exp, 1, NULL); ret = printf("%1.3f", res); return(ret); } static void set_manual_exp(struct expression *exp) { expression = user_expr; command = build_command_for_exp(exp); threshold = "User defined threshold"; } static void run_tests(void) { int i, lenout; printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); printf("------------------------------------------------------------------------\n"); for(i=0; i MAX_COUNTER_SLOTS) { /* You can't collect more than max in array */ max_to_collect = MAX_COUNTER_SLOTS; } break; case 'v': verbose++; break; case 'h': help_only = 1; break; case 'i': filename = optarg; break; case '?': default: use: printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", argv[0]); printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); printf("-v -- verbose dump debug type things -- you don't want this\n"); printf("-m N -- maximum to collect is N measurements\n"); printf("-e expr-name -- Do expression expr-name\n"); printf("-E 'your expression' -- Do your expression\n"); printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); printf("-H -- Don't run anything, just explain all canned expressions\n"); printf("-T -- Test all PMC's defined by this processor\n"); printf("-A -- Run all canned tests\n"); return(0); break; } } if ((run_all == 0) && (name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { printf("Without setting an expression we cannot dynamically gather information\n"); printf("you must supply a filename (and you probably want verbosity)\n"); goto use; } if (run_all && max_to_collect > 10) { max_to_collect = 3; } if (test_mode) { run_tests(); return(0); } printf("*********************************\n"); if ((master_exp == NULL) && name) { (*the_cpu.explain)(name); } else if (master_exp) { printf("Examine your expression "); print_exp(master_exp); printf("User defined threshold\n"); } if (help_only) { return(0); } if (run_all) { more: name = the_cpu.ents[test_at].name; printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); test_at++; if (set_expression(name) == -1) { if (test_at >= the_cpu.number) { goto done; } else goto more; } } process_file(filename); if (verbose >= 2) { for (i=0; i 1) { for(i=0, cnt=0; i