#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <float.h>

#define LISTSIZE 1024
#define BUFSIZE LISTSIZE*1024
char buffer[BUFSIZE];
char *tag[LISTSIZE];
int tag_cnt;
int tag_set;

enum {T_EOF=0, T_PUSH, T_POP,
      T_LIST, T_GREED, T_ILIST, T_ALL,
      T_STRING
};

struct desc_t;
typedef struct {
  int is_flag;
  char *str;
  struct desc_t *desc;
  int tag;
} flag_t;

typedef struct desc_t {
  int algs;
  flag_t *flags[64];
  int flags_cnt;
} desc_t;

typedef struct {
  double r[128];
  int n;
} try_t;

typedef struct {
  char *f;
  try_t *t;
} result_t;

enum {B_SIMPLE=0, B_NET2_IP, B_NET2_TCP
};

desc_t *desc_stack[128];
int desc_top;
int verbose;
char fmdir[1024];
char *flgs;
char *bd;
char *Htools;
char *tools;
char *prefix;
char *tn;
char *tu;
char *ml;
char *build_sh;
char *result_sh;
double thres;
int rep;
int minimize;
int bmk_type;

char *token;
int algs;

static result_t exec_algs (desc_t *d, char *fbase);

/* Dump flag string and tag with identation */
static void
dump_ident (int tab, char *s, int t)
{
  int i;

  for (i = 0; i < tab; i++)
    printf ("  ");
  printf ("%s", s);
  if (t >= 0) {
    printf (" .");
    for (i = 0; i < 32; i++)
      if (t & (1 << i))
	printf ("%s ", tag[i]);
  }
  printf("\n");
}

/* Dump all descriptors structure */
static void
dump_flags (desc_t *d, int tab)
{
  int a, i;

  /* Print algs */
  a = d->algs;
  if (a & 1 << T_LIST)
    dump_ident (tab, "alg: LIST", -1);
  if (a & 1 << T_GREED)
    dump_ident (tab, "alg: GREED", -1);
  if (a & 1 << T_ILIST)
    dump_ident (tab, "alg: I-LIST", -1);
  if (a & 1 << T_ALL)
    dump_ident (tab, "alg: ALL", -1);
  
  /* Print flags */
  for (i = 0; i < d->flags_cnt; i++) {
    if (d->flags[i]->is_flag)
      dump_ident (tab+1, d->flags[i]->str, d->flags[i]->tag);
    else
      dump_flags (d->flags[i]->desc, tab+1);
  }
}

/* Install a new tag */
static int
install_tag (char *s)
{
  int i;

  for (i = 0; i < tag_cnt; i++)
    if (strcmp (tag[i], s) == 0)
      return i;
  tag[i] = strdup (s);
  tag_cnt++;
  return i;
}

static int
install_taglist (char *s)
{
  int i;
  char *p;
  int set;

  p = s;
  set = 0;
  while (*p != '\0') {
    while (*p != '\0' && *p != ',')
      p++;
    if (*p != '\0') {
      /* Found one tag, there are more after */
      *p = '\0';
      i = install_tag (s);
      set |= 1 << i;
      if (verbose > 3) printf ("         %s\n", tag[i]);
      s = p + 1;
      *p++ = ',';
    } else {
      /* Last tag */
      i = install_tag (s);
      set |= 1 << i;
      if (verbose > 3) printf ("         %s\n", tag[i]);
    }
  }
  return set;
}

/* Install a flag */
static flag_t *
install_flag (char *s, int t)
{
  flag_t *p;

  p = (flag_t *) malloc (sizeof(flag_t));
  p->is_flag = 1;
  p->str = strdup (s);
  p->desc = NULL;
  p->tag = t;
  return p;
}

/* Separate input flags descriptors into tokens */
static char *
get_token (void)
{
  if (token == NULL) {
    token = strtok (buffer, " \t\n");
    return token;
  }
  token = strtok (NULL, " \t\n");
  return token;
}

static int
get_element (void)
{
  char flag_str[1024];
  char *tk;
  char *t1, *t2;
  desc_t *d, *d2;
  flag_t *f;
  int t;
  int i;

  tk = get_token ();
 L1:
  if (tk == NULL) {
    if (verbose > 3) printf ("T_EOF\n");
    return T_EOF;
  }

  if (strcmp (tk, "{") == 0) {
    if (verbose > 3) printf ("T_PUSH\n");
    if (desc_top > 0 && (desc_stack [desc_top-1]->algs & (1 << T_ALL))) {
      printf ("We do not allow nested alg inside \"all\"\n");
      exit (1);
    }
    d = (desc_t *)malloc (sizeof (desc_t));
    desc_stack [desc_top] = d;
    desc_top++;
    d->algs = algs;
    algs = 0;
    d->flags_cnt = 0;
    return T_PUSH;

  } else if (strncmp (tk, "}", 1) == 0) {
    if (verbose > 3) printf ("T_POP\n");
    t = -1;
    if (*(tk+1) == '.') {
      t = install_taglist (tk+2);
    }
    desc_top--;
    d = desc_stack[desc_top];
    if (desc_top > 0) {
      d2 = desc_stack[desc_top - 1];
      f = (flag_t *) malloc (sizeof (flag_t));
      f->is_flag = 0;
      f->str = NULL;
      f->desc = d;
      f->tag = 0;
      d2->flags[d2->flags_cnt] = f;
      d2->flags_cnt++;
    }
    for (i = 0; i < d->flags_cnt; i++) {
      if (d->flags[i]->is_flag && d->flags[i]->tag == -1)
	d->flags[i]->tag = t;
    }
    return T_POP;

  } else if (strcmp (tk, "list") == 0) {
    if (verbose > 3) printf ("T_LIST\n");
    algs |= 1 << T_LIST;
    return T_LIST;

  } else if (strcmp (tk, "greed") == 0) {
    if (verbose > 3) printf ("T_GREED\n");
    algs |= 1 << T_GREED;
    return T_GREED;

  } else if (strcmp (tk, "i-list") == 0) {
    if (verbose > 3) printf ("T_ILIST\n");
    algs |= 1 << T_ILIST;
    return T_ILIST;

  } else if (strcmp (tk, "all") == 0) {
    if (verbose > 3) printf ("T_ALL\n");
    algs = 1 << T_ALL;
    return T_ALL;

  } else if (*tk == '"') {
    t2 = flag_str;
    *(tk+strlen(tk)) = ' ';
    t1 = tk + 1;
    while (*t1 != '\0' && *t1 != '"') {
      *t2++ = *t1++;
    }
    *t2 = '\0';
    if (verbose > 3) printf ("T_STRING \"%s\"\n", flag_str);
    t = -1;
    if (*(t1+1) == '.') {
      token = strtok (t1+2, " \t\n");
      t = install_taglist (token);
    } else {
      /* Make it point to end of string */
      token = strtok (t1, " \t\n");
    }
    d = desc_stack[desc_top - 1];
    d->flags[d->flags_cnt] = install_flag(flag_str, t);
    d->flags_cnt++;
    return T_STRING;

  } else if (strncmp (tk, "/*", 2) == 0) {
    *(tk+strlen(tk)) = ' ';
    tk += 2;
    while (strncmp (tk, "*/", 2) != 0)
      tk++;
    tk += 2;
    /* Start again after comment */
    token = strtok (tk, " \t\n");
    tk = token;
    goto L1;

  }
  printf ("Unkown token: %s\n", tk);
  return T_EOF;
}

static int
load_file (char *fname, char *cnames)
{
  FILE *fin;
  size_t fsize;
  int i, last;
  char *p;

  fin = fopen (fname, "r");
  if (fin == NULL) {
    fprintf (stderr, "Cannot open file %s\n", fname);
    return 1;
  }
  fsize = fread (buffer, 1, BUFSIZE, fin);
  if (fsize == 0 || fsize >= BUFSIZE-1) {
    fprintf (stderr, "Cannot load file %s\n", fname);
    return 1;
  }

  /* Parse flags file */
  while (get_element())
    ;

  /* Now check that all cnames exist on file */
  if (cnames) {
    last = tag_cnt;
    p = strtok (cnames, ",");
    while (p) {
      i = install_tag (p);
      if (i >= last) {
	printf ("Tag \"%s\" is not present on file: %s\n",
		p, fname);
	return 1;
      }
      tag_set |= 1 << i;
      p = strtok (NULL, ",");
    }
  }
  if (verbose > 0) printf ("Tags set:   %08X\n", tag_set);

  return 0;
}

static void
print_usage (void)
{
  printf ("fmining <options>\n");
  printf ("\n");
  printf ("Where <options> is one or more of:\n");
  printf ("  s=<file>         Strategy file to use\n");
  printf ("  flgs=<flags>     Add <flags> to all tests\n");
  printf ("                   e.g.: -O3, -m32/-m64, -static\n");
  printf ("  tn=<machine>     target machine name\n");
  printf ("  tu=<user>        target user name\n");
  printf ("\n");
  printf ("  bd=<dir>         base dir. Default is /local/`id -nu`/tools_test\n");
  printf ("  Htools=<path>    Tools installation dir\n");
  printf ("  tools=<path>     Installation sub-directory for given tools\n");
  printf ("  prefix=<triplet> Prefix for given tools\n");
  printf ("  ml=<type>        multilib type (32, 64, nof, S32, S64 Snof)\n");
  printf ("\n");
  printf ("  fm=<benchmark>   Program to flag mine\n");
  printf ("  th=<threshold>   Percentage of test results to consider a change\n");
  printf ("\n");
  printf ("  tag=<tags>       List of identifiers used in the strategy file\n");
}

static char *
glue3 (char *s1, char *s2, char *s3)
{
  char *s;

  s = malloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
  strcpy (s, s1);
  strcat (s, s2);
  strcat (s, s3);
  return s;
}

static char *
cat2 (char *s1, char *s2)
{
  char *s;

  s = malloc (strlen (s1) + strlen (s2) + 2);
  strcpy (s, s1);
  strcat (s, " ");
  strcat (s, s2);
  return s;
}

static char *
cat3 (char *s1, char *s2, char *s3)
{
  char *s;

  s = malloc (strlen (s1) + strlen (s2) + strlen (s3) + 3);
  strcpy (s, s1);
  strcat (s, " ");
  strcat (s, s2);
  strcat (s, " ");
  strcat (s, s3);
  return s;
}

static char *
cat4 (char *s1, char *s2, char *s3, char *s4)
{
  char *s;

  s = malloc (strlen (s1) + strlen (s2) + strlen (s3) + strlen (s4) + 4);
  strcpy (s, s1);
  strcat (s, " ");
  strcat (s, s2);
  strcat (s, " ");
  strcat (s, s3);
  strcat (s, " ");
  strcat (s, s4);
  return s;
}

typedef union {
  double d;
  int i[2];
} d_cast;

static const double F_PI = M_PI;
static const double fMachEps = M_E;  // ::std::numeric_limits<double>::epsilon()
static const double fMaxGammaArgument = 171.624376956302;  // found experimental
static const int false = 0;
static const int true = 1;
static double fLogDblMax;
static double fLogDblMin;

static double
lcl_getLanczosSum (double fZ)
{
  const double fNum[13] ={
    23531376880.41075968857200767445163675473,
    42919803642.64909876895789904700198885093,
    35711959237.35566804944018545154716670596,
    17921034426.03720969991975575445893111267,
    6039542586.35202800506429164430729792107,
    1439720407.311721673663223072794912393972,
    248874557.8620541565114603864132294232163,
    31426415.58540019438061423162831820536287,
    2876370.628935372441225409051620849613599,
    186056.2653952234950402949897160456992822,
    8071.672002365816210638002902272250613822,
    210.8242777515793458725097339207133627117,
    2.506628274631000270164908177133837338626
  };
  const double fDenom[13] = {
    0,
    39916800,
    120543840,
    150917976,
    105258076,
    45995730,
    13339535,
    2637558,
    357423,
    32670,
    1925,
    66,
    1
  };
  // Horner scheme
  double fSumNum;
  double fSumDenom;
  int nI;
  double fZInv;
  if (fZ <= 1.0) {
    fSumNum = fNum[12];
    fSumDenom = fDenom[12];
    for (nI = 11; nI >= 0; --nI) {
      fSumNum *= fZ;
      fSumNum += fNum[nI];
      fSumDenom *= fZ;
      fSumDenom += fDenom[nI];
    }
  } else {
    // Cancel down with fZ^12; Horner scheme with reverse coefficients
    fZInv = 1/fZ;
    fSumNum = fNum[0];
    fSumDenom = fDenom[0];
    for (nI = 1; nI <=12; ++nI) {
      fSumNum *= fZInv;
      fSumNum += fNum[nI];
      fSumDenom *= fZInv;
      fSumDenom += fDenom[nI];
    }
  }
  return fSumNum/fSumDenom;
}

static double
lcl_GetBetaHelperContFrac (double fX, double fA, double fB)
{
  // like old version
  double a1, b1, a2, b2, fnorm, apl2m, d2m, d2m1, cfnew, cf;
  a1 = 1.0; b1 = 1.0;
  b2 = 1.0 - (fA+fB)/(fA+1.0)*fX;
  if (b2 == 0.0) {
    a2 = 0.0;
    fnorm = 1.0;
    cf = 1.0;
  } else {
    a2 = 1.0;
    fnorm = 1.0/b2;
    cf = a2*fnorm;
  }
  cfnew = 1.0;
  double rm = 1.0;

  const double fMaxIter = 50000.0;
  // loop security, normal cases converge in less than 100 iterations.
  // FIXME: You will get so much iteratons for fX near mean,
  // I do not know a better algorithm.
  int bfinished = false;
  do {
    apl2m = fA + 2.0*rm;
    d2m = rm*(fB-rm)*fX/((apl2m-1.0)*apl2m);
    d2m1 = -(fA+rm)*(fA+fB+rm)*fX/(apl2m*(apl2m+1.0));
    a1 = (a2+d2m*a1)*fnorm;
    b1 = (b2+d2m*b1)*fnorm;
    a2 = a1 + d2m1*a2*fnorm;
    b2 = b1 + d2m1*b2*fnorm;
    if (b2 != 0.0) {
      fnorm = 1.0/b2;
      cfnew = a2*fnorm;
      bfinished = (fabs(cf-cfnew) < fabs(cf)*fMachEps);
    }
    cf = cfnew;
    rm += 1.0;
  }
  while (rm < fMaxIter && !bfinished);
  return cf;
}

static double
lcl_GetLogGammaHelper (double fZ)
{
  const double fg = 6.024680040776729583740234375;
  double fZgHelp = fZ + fg - 0.5;
  return log (lcl_getLanczosSum (fZ)) + (fZ-0.5) * log (fZgHelp) - fZgHelp;
}

static int
isSignBitSet (double d)
{
  d_cast x;

  x.d = d;
  return (x.i[0] & 0x80000000) != 0;
}

static int const n10Count = 16;
static double const n10s[2][16] = {
  { 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8,
    1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16 },
  { 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8,
    1e-9, 1e-10, 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, 1e-16 }
};

static double
getN10Exp (int nExp)
{
  if (nExp < 0) {
    if (-nExp <= n10Count)
      return n10s[1][-nExp-1];
    else
      return pow (10.0, nExp);
  } else if (nExp > 0) {
    if (nExp <= n10Count)
      return n10s[0][nExp-1];
    else
      return pow (10.0, nExp);
  }
  else // ( nExp == 0 )
    return 1.0;
}

double
approxValue (double fValue)
{
  if (fValue == 0.0 || fValue == HUGE_VAL)
    // We don't handle these conditions.  Bail out.
    return fValue;

  int bSign = isSignBitSet (fValue);
  if (bSign)
    fValue = -fValue;
  
  int nExp = floor (log10 (fValue));
  nExp = 14 - nExp;
  double fExpValue = getN10Exp (nExp);
  
  fValue *= fExpValue;
  fValue = round (fValue); //rtl_math_round (fValue, 0, rtl_math_RoundingMode_Corrected);
  fValue /= fExpValue;
  
  return bSign ? -fValue : fValue;
}

double
lcl_GetGammaHelper (double fZ)
{
  double fGamma = lcl_getLanczosSum(fZ);
  const double fg = 6.024680040776729583740234375;
  double fZgHelp = fZ + fg - 0.5;
  // avoid intermediate overflow
  double fHalfpower = pow (fZgHelp, fZ / 2 - 0.25);
  fGamma *= fHalfpower;
  fGamma /= exp (fZgHelp);
  fGamma *= fHalfpower;
  if (fZ <= 20.0 && fZ == floor (approxValue (fZ)))
    fGamma = round(fGamma);
  return fGamma;
}

static double
GetGamma (double fZ)
{
  const double fLogPi = log (F_PI);

  if (fZ >= 1.0)
    return lcl_GetGammaHelper (fZ);

  if (fZ >= 0.5)  // shift to x>=1 using Gamma(x)=Gamma(x+1)/x
    return lcl_GetGammaHelper (fZ+1) / fZ;

  if (fZ >= -0.5) {
    // shift to x>=1, might overflow
    double fLogTest = lcl_GetLogGammaHelper (fZ+2) - log (fZ+1) - log (fabs (fZ));
    return lcl_GetGammaHelper (fZ+2) / (fZ+1) / fZ;
  }
  // fZ<-0.5
  // Use Euler's reflection formula: gamma(x)= pi/ ( gamma(1-x)*sin(pi*x) )
  double fLogDivisor = lcl_GetLogGammaHelper (1-fZ) + log (fabs (sin (F_PI*fZ)));
  if (fLogDivisor - fLogPi >= fLogDblMax)     // underflow
    return 0.0;
  
  return exp (fLogPi - fLogDivisor) * ((sin (F_PI*fZ) < 0.0) ? -1.0 : 1.0);
}

static double
GetBeta (double fAlpha, double fBeta)
{
  double fA;
  double fB;
  if (fAlpha > fBeta) {
    fA = fAlpha; fB = fBeta;
  } else {
    fA = fBeta; fB = fAlpha;
  }

  if (fA+fB < fMaxGammaArgument) // simple case
    return GetGamma(fA) / GetGamma(fA+fB)*GetGamma(fB);
  // need logarithm
  // GetLogGamma is not accurate enough, back to Lanczos for all three
  // GetGamma and arrange factors newly.
  const double fg = 6.024680040776729583740234375; //see GetGamma
  double fgm = fg - 0.5;
  double fLanczos = lcl_getLanczosSum (fA);
  fLanczos /= lcl_getLanczosSum (fA+fB);
  fLanczos *= lcl_getLanczosSum (fB);
  double fABgm = fA+fB+fgm;
  fLanczos *= sqrt ((fABgm/(fA+fgm))/(fB+fgm));
  double fTempA = fB/(fA+fgm); // (fA+fgm)/fABgm = 1 / ( 1 + fB/(fA+fgm))
  double fTempB = fA/(fB+fgm);
  double fResult = exp (-fA * log1p(fTempA) - fB * log1p (fTempB) - fgm);
  fResult *= fLanczos;
  return fResult;
}

static double
GetLogBeta (double fAlpha, double fBeta)
{
  double fA;
  double fB;
  if (fAlpha > fBeta) {
    fA = fAlpha; fB = fBeta;
  } else {
    fA = fBeta; fB = fAlpha;
  }
  const double fg = 6.024680040776729583740234375; //see GetGamma
  double fgm = fg - 0.5;
  double fLanczos = lcl_getLanczosSum (fA);
  fLanczos /= lcl_getLanczosSum (fA+fB);
  fLanczos *= lcl_getLanczosSum (fB);
  double fLogLanczos = log (fLanczos);
  double fABgm = fA+fB+fgm;
  fLogLanczos += 0.5*(log (fABgm) - log (fA+fgm) - log (fB+fgm));
  double fTempA = fB/(fA+fgm); // (fA+fgm)/fABgm = 1 / ( 1 + fB/(fA+fgm))
  double fTempB = fA/(fB+fgm);
  double fResult = -fA * log1p(fTempA) - fB * log1p (fTempB) - fgm;
  fResult += fLogLanczos;
  return fResult;
}

static double
GetBetaDistPDF (double fX, double fA, double fB)
{
  // special cases
  if (fA == 1.0) {
    // result b*(1-x)^(b-1)
    if (fB == 1.0)
      return 1.0;
    if (fB == 2.0)
      return -2.0*fX + 2.0;
    if (fX <= 0.01)
      return fB + fB * expm1 ((fB-1.0) * log1p (-fX));
    else
      return fB * pow (0.5-fX+0.5, fB-1.0);
  }
  if (fB == 1.0) {
    // result a*x^(a-1)
    if (fA == 2.0)
      return fA * fX;
    return fA * pow(fX, fA-1);
  }
  if (fX <= 0.0) {
    return 0.0;
  }
  if (fX >= 1.0) {
    return 0.0;
  }

  // normal cases; result x^(a-1)*(1-x)^(b-1)/Beta(a,b)
  double fLogY = (fX < 0.1) ? log1p (-fX) : log (0.5-fX+0.5);
  double fLogX = log (fX);
  double fAm1 = fA-1.0;
  double fBm1 = fB-1.0;
  double fLogBeta = GetLogBeta (fA,fB);
  // check whether parts over- or underflow
  if (   fAm1 * fLogX < fLogDblMax  && fAm1 * fLogX > fLogDblMin
	 && fBm1 * fLogY < fLogDblMax  && fBm1* fLogY > fLogDblMin
	 && fLogBeta < fLogDblMax      && fLogBeta > fLogDblMin )
    return pow (fX,fA-1.0) * pow (0.5-fX+0.5,fB-1.0) / GetBeta (fA,fB);
  else // need logarithm;
    // might overflow as a whole, but seldom, not worth to pre-detect it
    return exp((fA-1.0)*fLogX + (fB-1.0)* fLogY - fLogBeta);
}

static double
GetBetaDist (double fXin, double fAlpha, double fBeta)
{
  // special cases
  if (fXin <= 0.0)  // values are valid, see spec
    return 0.0;
  if (fXin >= 1.0)  // values are valid, see spec
    return 1.0;
  if (fBeta == 1.0)
    return pow(fXin, fAlpha);
  if (fAlpha == 1.0)
    //            1.0 - pow(1.0-fX,fBeta) is not accurate enough
    return expm1 (fBeta * log1p (-fXin));
  //FIXME: need special algorithm for fX near fP for large fA,fB
  double fResult;
  // I use always continued fraction, power series are neither
  // faster nor more accurate.
  double fY = (0.5-fXin)+0.5;
  double flnY = log1p (-fXin);
  double fX = fXin;
  double flnX = log (fXin);
  double fA = fAlpha;
  double fB = fBeta;
  int bReflect = fXin > fAlpha/(fAlpha+fBeta);
  if (bReflect) {
    fA = fBeta;
    fB = fAlpha;
    fX = fY;
    fY = fXin;
    flnX = flnY;
    flnY = log(fXin);
  }
  fResult = lcl_GetBetaHelperContFrac (fX,fA,fB);
  fResult = fResult/fA;
  double fP = fA/(fA+fB);
  double fQ = fB/(fA+fB);
  double fTemp;
  if (fA > 1.0 && fB > 1.0 && fP < 0.97 && fQ < 0.97) //found experimental
    fTemp = GetBetaDistPDF (fX,fA,fB)*fX*fY;
  else
    fTemp = exp (fA*flnX + fB*flnY - GetLogBeta (fA,fB));
  fResult *= fTemp;
  if (bReflect)
    fResult = 0.5 - fResult + 0.5;
  if (fResult > 1.0) // ensure valid range
    fResult = 1.0;
  if (fResult < 0.0)
    fResult = 0.0;
  return fResult;
}

static double
GetTDist(double fT, double fDF)
{
    return 0.5 * GetBetaDist (fDF/(fDF+fT*fT), fDF/2.0, 0.5);
}

static int
CalculateTest (double *pMat1, double *pMat2, int n, double *fT, double *fDF)
{
  double fCount1  = 0.0;
  double fCount2  = 0.0;
  double fSum1    = 0.0;
  double fSumSqr1 = 0.0;
  double fSum2    = 0.0;
  double fSumSqr2 = 0.0;
  double fVal;
  int i;

  for (i = 0; i < n; i++) {
    fVal = pMat1[i];
    fSum1    += fVal;
    fSumSqr1 += fVal * fVal;
    fCount1++;
  }
  for (i = 0; i < n; i++) {
    fVal = pMat2[i];
    fSum2    += fVal;
    fSumSqr2 += fVal * fVal;
    fCount2++;
  }
  //  laut Bronstein-Semendjajew
  double fS1 = (fSumSqr1 - fSum1*fSum1/fCount1) / (fCount1 - 1.0);    // Varianz
  double fS2 = (fSumSqr2 - fSum2*fSum2/fCount2) / (fCount2 - 1.0);
  *fT = fabs (fSum1/fCount1 - fSum2/fCount2) /
    sqrt ((fCount1-1.0)*fS1 + (fCount2-1.0)*fS2) *
    sqrt (fCount1*fCount2*(fCount1+fCount2-2)/(fCount1+fCount2));
  *fDF = fCount1 + fCount2 - 2;

  return true;
}

static double
average (double *A, int n)
{
  int i;
  double m;

  m = 0.0;
  for (i = 0; i < n; i++)
    m += A[i];
  return m / n;
}

static double
stdev (double *A, int n)
{
  int i;
  double avg;
  double dev;
  double s;

  avg = average (A, n);
  dev = 0.0;
  for (i = 0; i < n; i++) {
    s = avg - A[i];
    dev += s * s;
  }
  return sqrt (dev / (n - 1));
}

static double
tvalue (double *A, double *B, int n)
{
  double aA, aB;
  double dA, dB;
  double s, t;

  aA = average (A, n);
  aB = average (B, n);
  dA = stdev (A, n);
  dB = stdev (B, n);

  s = sqrt ((dA*dA + dB*dB) / 2.0);

  t = (aA - aB) / (s * sqrt (2.0 / n));

  return t;
}

static int
degrees_freedom (double *A, double *B, int n)
{
  double aA, aB;
  double dA, dB;
  double fA, fB;
  double df;

  aA = average (A, n);
  aB = average (B, n);
  dA = stdev (A, n);
  dB = stdev (B, n);

  fA = dA * dA / n;
  fB = dB * dB / n;
  df = ((fA + fB) * (fA + fB)) / ((fA*fA)/(n - 1) + (fB*fB)/(n - 1));
  return round(df);
}

static int
scan_arg (char *s, double *vec)
{
  int i;
  char *p;

  i = 0;
  p = strtok (s, " \t\n");
  while (p) {
    vec[i] = atof (p);
    // printf ("vec[%d]=%s %f\n", i, p, vec[i]);
    i++;
    p = strtok (NULL, " \t\n");
  }

  return i;
}

/* Return true if we have a better result */
static int
is_better (try_t *t, try_t *tbest)
{
  int r;
  double fT, fDF;
  double p_value;

  r = 0;
  /* Two methods to compare results:
     - t-test (thres = 0)
     - Best result with threshold
  */
  if (thres == 0.0) {
    CalculateTest (&(t->r[0]), &(tbest->r[0]), t->n, &fT, &fDF);
    p_value = GetTDist (fT, fDF);
    if (verbose > 0) printf ("  [is_better] t-test: %.0f%\n", p_value * 100.0);
    if (p_value <= 0.1) {
      if (minimize) {
	if (t->r[0] < tbest->r[0])
	  r = 1;
      } else {
	if (t->r[0] > tbest->r[0])
	  r = 1;
      }
    }
  } else {
    if (minimize) {
      if (t->r[0] < (tbest->r[0] * (1.0 - thres/100.0)))
	r = 1;
    } else {
      if (t->r[0] > (tbest->r[0] * (1.0 + thres/100.0)))
	r = 1;
    }
  }

  return r;
}

int
try_sort (const void *d1, const void *d2)
{
  if (minimize) {
    if (*((double *)d1) < *((double *)d2))
      return -1;
    else if (*((double *)d1) > *((double *)d2))
      return 1;
    return 0;
  }
  if (*((double *)d1) > *((double *)d2))
    return -1;
  else if (*((double *)d1) < *((double *)d2))
    return 1;
  return 0;
}

/* Build / run and return result of a set of flags */
static try_t *
try (char *flags)
{
  int ret;
  char *s, *args1, *args2, *args3, *arg2a;
  FILE *fres;
  char buffer[1024], *b;
  try_t *tr;
  int i;
  double x;

  tr = (try_t *) malloc (sizeof (try_t));
  if (thres == 0.0)
    tr->n = 5;
  else
    tr->n = rep;
  if (verbose > 1) printf ("  [try] flags: %s\n", flags);

  args1 = cat4 (ml, bd, Htools, Htools);
  args2 = cat4 (args1, tools, prefix, "fm");
  arg2a = cat2 (args2, "all");
  args3 = cat4 (arg2a, "\"", flags, "\"");
  s = cat3 (build_sh, args3, "> /dev/null 2>&1");
  if (verbose > 2) printf ("system: %s\n", s);
  ret = system (s);
  if (ret == -1) {
    printf ("build_sh returned -1\n%s\n", s);
    exit (1);
  }
  free (args1);
  free (args2);
  free (args3);
  free (s);

  sprintf (buffer, "%d", tr->n);
  args1 = cat3 (tn, tu, buffer);
  s = cat3 (result_sh, bd, args1);
  if (verbose > 2) printf ("popen: %s\n", s);
  fres = popen (s, "r");
  if (fres == NULL) {
    printf ("result_sh returned NULL\n%s\n", s);
    exit (1);
  }
  if (bmk_type == B_NET2_IP) {
    for (i = 0; i < 11*(tr->n); i++) {
      b = fgets (buffer, 1024, fres);
      if (b == NULL)
	printf ("Unexpected error reading buffer\n");
      tr->r[i] = atof (buffer);
      if (verbose > 2) printf ("Independent results: %f\n", tr->r[i]);
    }
    /* Sort times */
    qsort (&(tr->r[0]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[tr->n]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[2*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[3*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[4*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[5*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[6*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[7*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[8*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[9*(tr->n)]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[10*(tr->n)]), tr->n, sizeof (double), try_sort);
    if (verbose > 2) {
      for (i = 0; i < 11*(tr->n); i++) {
	printf ("Sorted results: %f\n", tr->r[i]);
      }
    }
    /* Calculate IPMark */
    for (i = 0; i < tr->n; i++) {
      /*First we have to fix number of iterations for ip_reassembly(7,8) and nat(9,10) */
      tr->r[i + 7*(tr->n)] = 1.0 / (1.0/tr->r[i + 7*(tr->n)] - 1.0/tr->r[i + 8*(tr->n)]);
      tr->r[i + 9*(tr->n)] = 1.0 / (1.0/tr->r[i + 9*(tr->n)] - 1.0/tr->r[i + 10*(tr->n)]);
      /* Do the geomean of pktcheck results */
      x = tr->r[i] * tr->r[i + tr->n] * tr->r[i + 2*(tr->n)] * tr->r[i + 3*(tr->n)];
      x = pow (x, 1.0/4.0);
      /* Do the geomean with rest of results */
      x = x * tr->r[i + 4*(tr->n)] * tr->r[i + 5*(tr->n)] * tr->r[i + 6*(tr->n)] * tr->r[i + 7*(tr->n)] * tr->r[i + 9*(tr->n)];
      tr->r[i] = pow (x, 1.0/6.0) / 10.0;
    }
    /* After calculating the indice, sort them */
    qsort (&(tr->r[0]), tr->n, sizeof (double), try_sort);
  } else if (bmk_type == B_NET2_TCP) {
    for (i = 0; i < 3*(tr->n); i++) {
      b = fgets (buffer, 1024, fres);
      if (b == NULL)
	printf ("Unexpected error reading buffer\n");
      tr->r[i] = atof (buffer);
      if (verbose > 2) printf ("Independent results: %f\n", tr->r[i]);
    }
    /* Sort times */
    qsort (&(tr->r[0]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[tr->n]), tr->n, sizeof (double), try_sort);
    qsort (&(tr->r[2*(tr->n)]), tr->n, sizeof (double), try_sort);
    if (verbose > 2) {
      for (i = 0; i < 3*(tr->n); i++) {
	printf ("Sorted results: %f\n", tr->r[i]);
      }
    }
    /* Calculate TCPMark */
    for (i = 0; i < tr->n; i++) {
      x = tr->r[i] * tr->r[i + tr->n] * tr->r[i + 2*(tr->n)];
      tr->r[i] = pow (x, 1.0/3.0) / 100.0;
    }    
    /* After calculating the indice, sort them */
    qsort (&(tr->r[0]), tr->n, sizeof (double), try_sort);
  } else {
    /* It must be B_SIMPLE */
    for (i = 0; i < tr->n; i++) {
      fgets (buffer, 1024, fres);
      tr->r[i] = atof (buffer);
    }
    /* Sort times */
    qsort (&(tr->r[0]), tr->n, sizeof (double), try_sort);
  }
  fclose (fres);
  free (args1);
  free (s);

  if (verbose > 1) {
    printf ("  [try] result: ");
    for (i = 0; i < tr->n; i++)
      printf (" %f", tr->r[i]);
    printf ("\n");
  }
  return tr;
}

static result_t
exec_list (desc_t *d, char *fbase)
{
  result_t best, r;
  int i;
  flag_t *f;

  best.f = fbase;
  best.t = try (fbase);
  printf ("[list] Base flags:  %s\n", best.f);
  printf ("[list] Base result: %f\n", best.t->r[0]);
  fflush (NULL);

  for (i = 0; i < d->flags_cnt; i++) {
    f = d->flags[i];
    if (f->is_flag) {
      if ((f->tag & tag_set) == 0)
	continue;
      r.f = cat2 (best.f, f->str);
      r.t = try (r.f);
      if (is_better (r.t, best.t)) {
	//free (best.f);
	best = r;
	printf ("  Better flags:  %s\n", best.f);
	printf ("  Better result: %f\n", best.t->r[0]);
	fflush (NULL);
      }
    } else {
      r = exec_algs (f->desc, best.f);
      if (is_better (r.t, best.t)) {
	//free (best.f);
	best = r;
	printf ("  Better flags:  %s\n", best.f);
	printf ("  Better result: %f\n", best.t->r[0]);
	fflush (NULL);
      }
    }
  }

  return best;
}

static result_t
exec_ilist (desc_t *d, char *fbase)
{
  result_t best, r;
  int i, j;
  flag_t *f;
  int ilist[1024];

  memset (ilist, 0, sizeof(ilist));
  best.f = strdup (fbase);
  for (j = 0; j < d->flags_cnt; j++) {
    if (d->flags[j]->is_flag) {
      best.f = cat2 (best.f, d->flags[j]->str);
      ilist[j] = 1;
    }
  }
  best.t = try (fbase);
  printf ("[i-list] Base flags:  %s\n", best.f);
  printf ("[i-list] Base result: %f\n", best.t->r[0]);
  fflush (NULL);

  for (i = 0; i < d->flags_cnt; i++) {
    f = d->flags[i];
    if (f->is_flag) {
      if (ilist[i]) {
	if ((f->tag & tag_set) == 0)
	  continue;
	/* Will make a try with all flags except the current one */
	r.f = strdup (fbase);
	for (j = 0; j < d->flags_cnt; j++) {
	  if (j != i && ilist[j])
	    r.f = cat2 (r.f, d->flags[j]->str);
	}
	r.t = try (r.f);
	if (is_better (r.t, best.t)) {
	  //free (best.f);
	  ilist[i] = 0;
	  best = r;
	  printf ("  Better flags:  %s\n", best.f);
	  printf ("  Better result: %f\n", best.t->r[0]);
	  fflush (NULL);
	}
      }
    } else {
      r.f = strdup (fbase);
      for (j = 0; j < d->flags_cnt; j++) {
	if (ilist[j])
	  r.f = cat2 (r.f, d->flags[j]->str);
      }
      r = exec_algs (f->desc, r.f);
      if (is_better (r.t, best.t)) {
	//free (best.f);
	best = r;
	memset (ilist, 0, sizeof(ilist));
	fbase = best.f;
	printf ("  Better flags:  %s\n", best.f);
	printf ("  Better result: %f\n", best.t->r[0]);
	fflush (NULL);
      }
    }
  }

  return best;
}

static result_t
exec_greed (desc_t *d, char *fbase)
{
  result_t best, r, round;
  int i, round_i;
  flag_t *f;
  int in_greed[1024];

  memset (in_greed, 0, sizeof(in_greed));

  best.f = fbase;
  best.t = try (fbase);
  printf ("[greed] Base flags:  %s\n", best.f);
  printf ("[greed] Base result: %f\n", best.t->r[0]);
  fflush (NULL);

  round = best;
  do {
    best = round;
    round_i = -1;
    for (i = 0; i < d->flags_cnt; i++) {
      if (in_greed[i] == 1)
	continue;
      f = d->flags[i];
      if (f->is_flag) {
	if ((f->tag & tag_set) == 0)
	  continue;
	r.f = cat2 (best.f, f->str);
	r.t = try (r.f);
	if (is_better (r.t, round.t)) {
	  //free (best.f);
	  round = r;
	  round_i = i;
	  printf ("  Better flags:  %s\n", round.f);
	  printf ("  Better result: %f\n", round.t->r[0]);
	  fflush (NULL);
	}
      } else {
	r = exec_algs (f->desc, best.f);
	if (is_better (r.t, round.t)) {
	  //free (best.f);
	  round = r;
	  round_i = i;
	  printf ("  Better flags:  %s\n", round.f);
	  printf ("  Better result: %f\n", round.t->r[0]);
	  fflush (NULL);
	}
      }
    }
    if (round_i != -1)
      in_greed [round_i] = 1;
  } while (round_i != -1);

  return best;
  //TODO
}

static char *
build_list (desc_t *d, int m)
{
  char buffer[1024];
  int i;

  buffer[0] = '\0';
  for (i = 0; i < d->flags_cnt; i++) {
    if (m & (1 << i)) {
      strcat (buffer, d->flags[i]->str);
      strcat (buffer, " ");
    }
  }
  return strdup (buffer);
}

static result_t
exec_all (desc_t *d, char *fbase)
{
  result_t r, best;
  int i, tot_combs;

  tot_combs = 1 << d->flags_cnt;
  for (i = 0; i < tot_combs; i++) {
    r.f = cat2 (fbase, build_list (d, i));
    r.t = try (r.f);
    if (i == 0 || is_better (r.t, best.t)) {
      best = r;
      if (i == 0) {
	printf ("[all] Base flags:  %s\n", best.f);
	printf ("[all] Base result: %f\n", best.t->r[0]);
	fflush (NULL);
      } else {
	printf ("  Better flags:  %s\n", best.f);
	printf ("  Better result: %f\n", best.t->r[0]);
	fflush (NULL);
      }
    }
  }
  return best;
}

static result_t
exec_algs (desc_t *d, char *fbase)
{
  result_t r, b;
  int a;

  b.f = NULL;
  b.t = NULL;

  a = d->algs;
  if (a & (1 << T_LIST))
    b = exec_list (d, fbase);

  if (a & (1 << T_ILIST)) {
    r = exec_ilist (d, fbase);
    if (b.f == NULL)
      b = r;
    else if (is_better (r.t, b.t))
      b = r;
  }

  if (a & (1 << T_GREED)) {
    r = exec_greed (d, fbase);
    if (b.f == NULL)
      b = r;
    else if (is_better (r.t, b.t))
      b = r;
  }

  if (a & (1 << T_ALL)) {
    r = exec_all (d, fbase);
    if (b.f == NULL)
      b = r;
    else if (is_better (r.t, b.t))
      b = r;
  }

  return b;
}

static void
setup_bmk (char *bmk)
{
  bmk_type = B_SIMPLE;
  if (strcmp (bmk, "dhrystone") == 0) {
    build_sh = glue3 (fmdir, "/f-test/", "dhrystone_build.sh");
    result_sh = glue3 (fmdir, "/f-test/", "dhrystone_fm.sh");
    rep = 20;
    minimize = 0;
  } else if (strcmp (bmk, "coremark") == 0) {
    build_sh = glue3 (fmdir, "/f-test/", "coremark_build.sh");
    result_sh = glue3 (fmdir, "/f-test/", "coremark_fm.sh");
    rep = 10;
    minimize = 0;
  } else if (strcmp (bmk, "eembc2-IP") == 0) {
    build_sh = glue3 (fmdir, "/f-test/", "net-2_build.sh");
    result_sh = glue3 (fmdir, "/f-test/", "net-2-IP_fm.sh");
    rep = 3;
    minimize = 0;
    bmk_type = B_NET2_IP;
  } else if (strcmp (bmk, "eembc2-TCP") == 0) {
    build_sh = glue3 (fmdir, "/f-test/", "net-2_build.sh");
    result_sh = glue3 (fmdir, "/f-test/", "net-2-TCP_fm.sh");
    rep = 3;
    minimize = 0;
    bmk_type = B_NET2_TCP;
  } else if (strcmp (bmk, "random") == 0) {
    build_sh = glue3 (fmdir, "/f-test/", "random_build.sh");
    result_sh = glue3 (fmdir, "/f-test/", "random_fm.sh");
    rep = 5;
    minimize = 1;
  } else {
    //TODO add more benchmarks and individual tests
    printf ("Unkonwn benchmark: %s\n\n\n", bmk);
    print_usage ();
    exit (1);
  }
}

int
main (int argc, char *argv[])
{
  int i;
  char *strategy;
  char *bmk;
  char *tags;
  result_t best;
  char *s;
  FILE *fdir;

  /* Initialize stacks and global variables */
  tag_cnt = 0;
  tag_set = 1 << 31;
  desc_top = 0;
  token = NULL;
  algs = 0;
  rep = 1;
  fLogDblMax = log (DBL_MAX);
  fLogDblMin = log (DBL_MIN);

  s = cat3 ("dirname `readlink -e ", getenv("_"), "`");
  fdir = popen (s, "r");
  fgets (fmdir, 1024, fdir);
  fmdir[strlen(fmdir)-1] = '\0';
  fclose (fdir);

  /* Parse the command arguments */
  verbose = 0;
  strategy = NULL;
  flgs = strdup ("");
  tn = "localhost";
  tu = getenv("USER");
  bd = glue3 ("/local/", tu, "/tools_test");
  Htools = NULL;
  tools = NULL;
  prefix = "";
  ml = NULL;
  build_sh = NULL;
  result_sh = NULL;
  bmk = NULL;
  minimize = 1;
  thres = 0.1;
  tags = NULL;
  for (i = 1; i < argc; i++) {
    if (strcmp (argv[i], "-h") == 0) {
      print_usage ();
      exit (0);
    } else if (strcmp (argv[i], "-v") == 0) {
      verbose++;
    } else if (strncmp (argv[i], "s=", 2) == 0) {
      strategy = glue3(fmdir, "/f-test/", argv[i]+2);
    } else if (strncmp (argv[i], "flgs=", 5) == 0) {
      flgs = strdup (argv[i]+5);
    } else if (strncmp (argv[i], "bd=", 3) == 0) {
      bd = argv[i]+3;
    } else if (strncmp (argv[i], "Htools=", 7) == 0) {
      Htools = argv[i]+7;
    } else if (strncmp (argv[i], "tools=", 6) == 0) {
      tools = argv[i]+6;
    } else if (strncmp (argv[i], "prefix=", 7) == 0) {
      prefix = argv[i]+7;
    } else if (strncmp (argv[i], "ml=", 3) == 0) {
      ml = argv[i]+3;
    } else if (strncmp (argv[i], "tn=", 3) == 0) {
      tn = argv[i]+3;
    } else if (strncmp (argv[i], "tu=", 3) == 0) {
      tu = argv[i]+3;
    } else if (strncmp (argv[i], "fm=", 3) == 0) {
      bmk = argv[i] + 3;
    } else if (strncmp (argv[i], "th=", 3) == 0) {
      thres = atof (argv[i]+3);
    } else if (strncmp (argv[i], "tag=", 4) == 0) {
      tags = argv[i]+4;
    } else {
      printf ("Unknown argument: %s\n", argv[i]);
      print_usage ();
      exit (1);
    }
  }
  if (strategy == NULL) {
    printf ("Need s=<file> argument\n\n\n");
    print_usage ();
    exit (1);
  }
  if (bmk == NULL) {
    printf ("Need b=<bmk> argument\n\n\n");
    print_usage ();
    exit (1);
  }
  if (Htools == NULL) {
    printf ("Need Htools=<path> argument\n\n\n");
    print_usage ();
    exit (1);
  }
  if (tools == NULL) {
    printf ("Need tools=<path> argument\n\n\n");
    print_usage ();
    exit (1);
  }
  if (ml == NULL) {
    printf ("Need ml=<type> argument\n\n\n");
    print_usage ();
    exit (1);
  }
  printf ("\\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/  \\/\n");
  printf ("Using Htools:     %s\n", Htools);
  printf ("Using tools:      %s\n", tools);
  printf ("Using strategy:   %s\n", strategy);
  printf ("Using flgs:       %s\n", flgs);
  printf ("Target bord:      %s\n", tn);
  printf ("Target multilib:  %s\n", ml);
  printf ("Target benchmark: %s\n\n", bmk);
  if (verbose > 0) printf ("Threshold to be used: %4.1f\n", thres);
  fflush (NULL);

  /* Load flags file */
  if (load_file (strategy, tags)) {
    exit (1);
  }
  if (verbose > 1) dump_flags (desc_stack[0], 0);

  /* Prepare benchmark scripts and parameters */
  setup_bmk (bmk);

  /* Execute flag mining */
  best = exec_algs (desc_stack[0], flgs);
  printf ("Best flags:  %s\n", best.f);
  printf ("Best result: %f\n", best.t->r[0]);
  printf ("/\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\\  /\n");

  return 0;
}
