/* $B%Q%$%W%i%$%s(B */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

void create_randam_array(int n, double *x){
  int i;

  for(i = 0; i < n; i++){
    x[i] = 123.45*(i+1);
  }
}

/* $B%"%s%m!<%j%s%0(B */
double norm(int n, double *x){
  int i;
  double s;
  
  s = 0.0;
  for(i=0; i<n; i++){
    s += x[i]*x[i];
  }

  return s;
}

double norm2(int n, double *x){
  int i;
  double s;
  
  s = 0.0;
  for(i=0; i+1<n; i+=2){
    s += x[i]*x[i] + x[i+1]*x[i+1];
  }

  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double norm3(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+2<n; i+=3){
    s += x[i]*x[i] + x[i+1]*x[i+1] + x[i+2]*x[i+2];
  }
                                                             
  for(; i<n; i++)
    s += x[i]*x[i];                                                            
  
  return s;
}

double norm4(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+3<n; i+=4){
    s += x[i]*x[i] + x[i+1]*x[i+1] +
         x[i+2]*x[i+2] + x[i+3]*x[i+3];
  }
  
  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double norm5(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+4<n; i+=5){
    s += x[i]*x[i] + x[i+1]*x[i+1] +
      x[i+2]*x[i+2] + x[i+3]*x[i+3] + x[i+4]*x[i+4];
  }

  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double norm6(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+5<n; i+=6){
    s += x[i]*x[i] + x[i+1]*x[i+1] + x[i+2]*x[i+2] + 
         x[i+3]*x[i+3] + x[i+4]*x[i+4] + x[i+5]*x[i+5];
  }

  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double norm7(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+6<n; i+=7){
    s += x[i]*x[i] + x[i+1]*x[i+1] + x[i+2]*x[i+2] + 
         x[i+3]*x[i+3] + x[i+4]*x[i+4] + x[i+5]*x[i+5] +
         x[i+6]*x[i+6];
  }

  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double norm8(int n, double *x){
  int i;
  double s;

  s = 0.0;
  for(i=0; i+7<n; i+=8){
    s += x[i]*x[i] + x[i+1]*x[i+1] + x[i+2]*x[i+2] + 
         x[i+3]*x[i+3] + x[i+4]*x[i+4] + x[i+5]*x[i+5] +
         x[i+6]*x[i+6] + x[i+7]*x[i+7];
  }

  for(; i<n; i++)
    s += x[i]*x[i];

  return s;
}

double perf(int n, int i, double *x){
  int k, t;
  double s;
  
  t=clock();

  if(i==1) for(k = 0; k < 1000000000/n; k++) s = norm(n, x);
  else if(i==2) for(k = 0; k < 1000000000/n; k++) s = norm2(n, x);
  else if(i==3) for(k = 0; k < 1000000000/n; k++) s = norm3(n, x);
  else if(i==4) for(k = 0; k < 1000000000/n; k++) s = norm4(n, x);
  else if(i==5) for(k = 0; k < 1000000000/n; k++) s = norm5(n, x);
  else if(i==6) for(k = 0; k < 1000000000/n; k++) s = norm6(n, x);
  else if(i==7) for(k = 0; k < 1000000000/n; k++) s = norm7(n, x);
  else if(i==8) for(k = 0; k < 1000000000/n; k++) s = norm8(n, x);

  return 2.0 * n * k / (clock() - t);

}

/* $B7k9gJQ49(B */
double norm_2(int n, double *x){
  int i;
  double s0, s1;

  s0=s1=0.0;
  for(i=0; i+1<n; i+=2){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1;
}

double norm_3(int n, double *x){
  int i;
  double s0, s1, s2;

  s0=s1=s2=0.0;
  for(i=0; i+2<n; i+=3){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2;
}

double norm_4(int n, double *x){
  int i;
  double s0, s1, s2, s3;

  s0=s1=s2=s3=0.0;
  for(i=0; i+3<n; i+=4){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
    s3 += x[i+3]*x[i+3];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2+s3;
}

double norm_5(int n, double *x){
  int i;
  double s0, s1, s2, s3, s4;

  s0=s1=s2=s3=s4=0.0;
  for(i=0; i+4<n; i+=5){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
    s3 += x[i+3]*x[i+3];
    s4 += x[i+4]*x[i+4];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2+s3+s4;
}

double norm_6(int n, double *x){
  int i;
  double s0, s1, s2, s3, s4, s5;

  s0=s1=s2=s3=s4=s5=0.0;
  for(i=0; i+5<n; i+=6){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
    s3 += x[i+3]*x[i+3];
    s4 += x[i+4]*x[i+4];
    s5 += x[i+5]*x[i+5];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2+s3+s4+s5;
}

double norm_7(int n, double *x){
  int i;
  double s0, s1, s2, s3, s4, s5, s6;

  s0=s1=s2=s3=s4=s5=s6=0.0;
  for(i=0; i+6<n; i+=7){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
    s3 += x[i+3]*x[i+3];
    s4 += x[i+4]*x[i+4];
    s5 += x[i+5]*x[i+5];
    s6 += x[i+6]*x[i+6];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2+s3+s4+s5+s6;
}

double norm_8(int n, double *x){
  int i;
  double s0, s1, s2, s3, s4, s5, s6, s7;

  s0=s1=s2=s3=s4=s5=s6=s7=0.0;
  for(i=0; i+7<n; i+=8){
    s0 += x[i]*x[i];
    s1 += x[i+1]*x[i+1];
    s2 += x[i+2]*x[i+2];
    s3 += x[i+3]*x[i+3];
    s4 += x[i+4]*x[i+4];
    s5 += x[i+5]*x[i+5];
    s6 += x[i+6]*x[i+6];
    s7 += x[i+7]*x[i+7];
  }

  for(; i<n; i++)
    s0 += x[i]*x[i];

  return s0+s1+s2+s3+s4+s5+s6+s7;
}

double perf2(int n, int i, double *x){
  int k, t;
  double s;

  t=clock();

  if(i==1) for(k = 0; k < 1000000000/n; k++) s = norm(n, x);
  else if(i==2) for(k = 0; k < 1000000000/n; k++) s = norm_2(n, x);
  else if(i==3) for(k = 0; k < 1000000000/n; k++) s = norm_3(n, x);
  else if(i==4) for(k = 0; k < 1000000000/n; k++) s = norm_4(n, x);
  else if(i==5) for(k = 0; k < 1000000000/n; k++) s = norm_5(n, x);
  else if(i==6) for(k = 0; k < 1000000000/n; k++) s = norm_6(n, x);
  else if(i==7) for(k = 0; k < 1000000000/n; k++) s = norm_7(n, x);
  else if(i==8) for(k = 0; k < 1000000000/n; k++) s = norm_8(n, x);

  return 2.0 * n * k / (clock() - t);

}

int main() {
  FILE *fp, *fp2;
  int num, i;
  double *data;
  double result, result2;

  if((fp = fopen("unroll-1.dat","w")) == NULL){
    printf("file open error\n");
    exit(1);
  }

  if((fp2 = fopen("unroll-2.dat","w")) == NULL){
    printf("file open error\n");
    exit(1);
  }

  printf("i\tMflops\n");
  fprintf(fp, "unroll\tMflops\n");
  fprintf(fp2, "unroll\tMflops\n");

  /* n=1024$B$G8GDj(B */
  num=1024;

  for(i=1; i<=8; i++){
    data = (double*)malloc(num * sizeof(double));
    create_randam_array(num, data);
    result = perf(num, i, data);
    result2 = perf2(num, i, data);

    printf("%d\t%f\n", i, result);
    printf("%d\t%f\n", i, result2);
    fprintf(fp, "%d\t%f\n", i, result);
    fprintf(fp2, "%d\t%f\n", i, result2);

    free(data);

  }
  fclose(fp);
  fclose(fp2);
}
