Simple C example of add/sub/mul/div operations in double-precision floating-points using a single-precision Floating-point system -


i working on algorithm requires calculations in large numbers, upto e+30. using 32 bit system compiler support of 32 bits long/float/double. far, searching online, i've learned single-precision floating points (fps) can used double-precision fps.

from question asked earlier (emulate “double” using 2 “float”s) found paper has algorithm work double-precision fps in gpus. confusing me implement in c. need 4 basic mathematical operations. there way find example me understand better?

thanks in advance.

here code working on. might have errors can not see, suggestions appreciated rectify error preety trying implement. in algorithm, polynomial_order should able go forth order (can settle @ third order if standard deviation smaller). few things not sure 1) procedures make_float() , make_float() correct or not, 2) use of make_float() in program.

#define polynomial_order    (3) #define tc_table_size   (14)  typedef struct vector_float2{ float x; float y; }float2;  typedef struct {     float tc0;     float tc1;     float tc2;     float tc3; }polynomials;  typedef struct  {     int16_t temp;     int16_t comp;    } tempcomppair;  volatile tempcomppair tctable[tc_table_size] = {{22452,1651},                                                 {25318,1444},                                                 {28268,1133},                                                 {31120,822},                                                 {34027,511},                                                 {36932,185},                                                 {39770,-81},                                                 {42685,-288},                                                 {45531,-407},                                                 {48425,-632},                                                 {51401,-703},                                                 {54460,-1143},                                                 {57202,-1420},                                                 {60027,-1652}};  polynomials polynomials; float matrix[tc_table_size][tc_table_size] = {0}; float average[tc_table_size] = {0};  float make_float(float x, float y) {     return x+y; }  float2 make_float2(float a, float b) {     float2 f2 = {a,b};     return f2; }  float2 quicktwosum(float a, float b) {     float s = a+b;     float e = b - (s - a);      float2 result = {s, e};     return result; }  float2 twosum(float a, float b) {     volatile float s = + b;     float v = s - a;     float e = (a - (s - v)) + (b - v);     float2 result = {s , e};     return result; }  float2 df64_add(float2 a, float2 b) {     float2 s,t;     s = twosum(a.x, b.x);     t = twosum(a.y, b.y);     s.y += t.x;     s = quicktwosum(s.x, s.y);     s.y += t.y;     s = quicktwosum(s.x, s.y);     return s; }  float2 split(float a) {     const float split = 4097;       //(1<<12) + 1     float t = *split;     float a_hi = t - (t - a);     float a_lo = - a_hi;     float2 result = {a_hi, a_lo};     return result; }  float2 twoprod(float a, float b) {     float p = a*b;     float2 = split(a);     float2 bs = split(b);     float err = ((as.x * bs.x - p)                 + as.x * bs.y + as.y * bs.x)                 + as.y * bs.y;      float2 result = {p, err};     return result; }  float2 df64_mult(float2 a, float2 b) {     float2 p;      p = twoprod(a.x,b.x);     p.y += a.x * b.y;     p.y += a.y * b.x;     p = quicktwosum(p.x,p.y);      return p; }  float2 calculate_power(float base, int pow) {     int = 0;      float2 base_f2 = make_float2(base,0);     float2 result_f2 = {1,0};      if(pow == 0)     {         return result_f2;     }      if(pow > 0)     {         if(pow == 1)         {             return base_f2;         }         else         {             for(i = 0; < pow; i++)             {                 result_f2 = df64_mult(result_f2,base_f2);             }             return result_f2;         }     }     else     {         return result_f2;         //mechanism negative powers     }  }  void tcomp_polynomial() {     int i;     int j;     int k;     int size;     float temp;     float2 sum = {0,0};     float2 result0 = {0,0};     float2 result1 = {0,0};      float x[tc_table_size];     float y[tc_table_size];      for(i = 0; < tc_table_size; i++)     {         x[i] = (float) tctable[i].temp;         y[i] = (float) tctable[i].comp;     }      size = i;      for(i = 0; <= polynomial_order; i++)     {         for(j = 0; j <= polynomial_order; j++)         {             sum.x = 0;             sum.y = 0;              for(k = 0; k < size; k++)             {                 // expression simplified below:  **sum += pow(x[k],i+j)**                  result0 = calculate_power(x[k], i+j);                 sum = df64_add(result0,sum);             }              matrix[i][j] = make_float(sum.x,sum.y);         }     }      for(i = 0; <= polynomial_order; i++)     {         sum.x = 0;         sum.y = 0;          for(j = 0; j < size; j++)         {             // expression simplified below: **sum += y[j] * pow(x[j],i)**             result0 = calculate_power(x[j], i);             result1 = df64_mult( result0 , make_float2(y[j],0) );             sum = df64_add(result1,sum);         }          average[i] = make_float(sum.x,sum.y);     }      for(i = 0; <= polynomial_order; i++)     {         for(j = 0; j <= polynomial_order; j++)         {             if(j != i)             {                 if(matrix[i][i]!= 0)                 {                     temp = matrix[j][i]/matrix[i][i];                 }                  for(k = i; k < polynomial_order; k++)                 {                     matrix[j][k] -= temp*matrix[i][k];                 }                 average[j] -= temp*average[i];              }         }     }      if(matrix[0][0] != 0)     {         polynomials.tc0 = average[0]/matrix[0][0];     }     if(matrix[1][1] != 0)     {         polynomials.tc1 = average[1]/matrix[1][1];     }      if(matrix[2][2] != 0)     {         polynomials.tc2 = average[2]/matrix[2][2];     }      if(matrix[3][3] != 0)     {         polynomials.tc3 = average[3]/matrix[3][3];     } } 

and use struct polynomials.tc0/1/2/3 in below expression

// y = t^3 * x3 + t^2 * x2 + t^1 * x1 + x0 ;  double calculate_equation(uint16_t temp) {     double y;      if(polynomial_order == 1)     {         y = polynomials.tc1*(double)temp + polynomials.tc0;      }     else if(polynomial_order == 2)     {         y = (polynomials.tc2 * (double)temp + polynomials.tc1)*(double)temp + polynomials.tc0;       }     else if(polynomial_order == 3)     {         y = ((polynomials.tc3 * (double)temp + polynomials.tc2)*(double)temp + polynomials.tc1)*(double)temp + polynomials.tc0;      }     else if(polynomial_order == 4)     {         y = (((polynomials.tc4 * (double)temp + polynomials.tc3)*(double)temp + polynomials.tc2)*(double)temp + polynomials.tc1)*(double)temp + polynomials.tc0;         }      return y; } 

and standard deviation calculated follows:

//sqrt(sigma(error^2)) for(i = 0; < tc_table_size; i++)     {         actual_comp[i] =(int) calculate_equation(tctable[i].temp);         error[i] = tctable[i].comp - actual_comp[i] ;         error_sqr += error[i]*error[i];          printf("%u\t%d\t\t%e\n", tctable[i].temp, tctable[i].comp, actual_comp[i] );     }     error_sqrt = sqrt(error_sqr); 

reference: http://hal.archives-ouvertes.fr/docs/00/06/33/56/pdf/float-float.pdf guillaume da graça, david defour implementation of float-float operators on graphics hardware, 7th conference on real numbers , computers, rnc7.

i able implement code without using double precision calculations in range of float. here's implementation, let me know if can optimize better.

typedef struct {   int64_t tc0;     int64_t tc1;     int64_t tc2;     int64_t tc3;     int64_t tc4; }polynomials;  polynomials polynomials = {0,0,0,0,0}; int16_t tempcompindex; int64_t x[tc_table_size]; int64_t y[tc_table_size];  float matrix[polynomial_order+1][polynomial_order+1] = {0}; float average[polynomial_order+1] = {0};  void tcomp_polynomial() {     int i;     int j;     int k;     int size;     float temp;     float sum = 0;     float powr = 0;     float prod;      int64_t x[tc_table_size];     int64_t y[tc_table_size];      for(i = 0; < tc_table_size; i++)     {         x[i] = (int64_t) tctable[i].temp;         y[i] = (int64_t) tctable[i].comp<<precision;         printf("x: %lld, y:%lld\n",x[i],y[i]);     }      size = i;      for(i = 0; <= polynomial_order; i++)     {         for(j = 0; j <= polynomial_order; j++)         {             sum = 0;             powr = 0;             for(k = 0; k < size; k++)             {                        //printf("x[%d]: %ld, i: %d ,j: %d ", k, x[k],i,j);                 powr = pow(x[k],i+j);                 //printf("power: %f, sum: %f\n ",powr,sum);                 sum +=  powr;                 //printf("%f\r\n",powr);                 //printf("sum: %lf\n",sum );             }              matrix[i][j] = sum;             printf("sum: %g\n",sum);         }     }      for(i = 0; <= polynomial_order; i++)     {         sum = 0;         powr = 0;          for(j = 0; j < size; j++)         {             //sum += y[j] * pow(x[j],i)             //printf("sum: %lf, y[%d]: %lf, x[%d]: %lf^%d  ",sum,j,y[j], i, x[j],i);             //printf("x[%d]:%lld ^ %d\t",j,x[j],i);             powr = (float) pow(x[j],i);             printf("powr: %f\t",powr);              prod = (float) y[j] * powr;             printf("prod:%f \t %lld \t", prod,y[j]);              sum += (float) prod;             printf("sum: %f \n",sum);         }          average[i] = sum;         //printf("#avg: %f\n",average[i]);     }     printf("\n\n");      for(i = 0; <= polynomial_order; i++)     {         for(j = 0; j <= polynomial_order; j++)         {             if(j != i)             {                    if(matrix[i][i]!= 0)                 {                     //printf("matrix%d%d: %g / matrix%d%d: %g =\t ",j,i,matrix[j][i],i,i,matrix[i][i]);                     temp = matrix[j][i]/matrix[i][i];                     //printf("temp: %g\n",temp);                 }                     for(k = i; k < polynomial_order; k++)                 {                        matrix[j][k] -= temp*matrix[i][k];                     //printf("matrix[%d][%d]:%g, %g, matrix[%d][%d]:%g\n",j,k,matrix[j][k], temp,i,k,matrix[i][k]);                 }                 //printf("\n\n");                 //print_matrix();                 printf("\n\n");                  //printf("avg%d: %g\ttemp: %g\tavg%d: %g\n\n",j,average[j],temp,i,average[i]);                 average[j] -= temp*average[i];                 printf("#avg%d:%g\n",j,average[j]);                 //print_average();             }         }     }      print_matrix();     print_average();     /* calculate polynomial coefficients (n+1) based on polynomial_order (n) */ #ifndef polynomial_order  #elif polynomial_order == 0     if(matrix[0][0] != 0)     {         polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);     } #elif polynomial_order == 1     if(matrix[1][1] != 0)     {         polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);         polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);     } #elif polynomial_order == 2     if(matrix[2][2] != 0)     {         polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);         polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);         polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);     } #elif polynomial_order == 3     if(matrix[3][3] != 0)     {         polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);         polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);         polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);         polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);     } #elif polynomial_order == 4     if(matrix[4][4] != 0)     {         polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);         polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);         polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);         polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);         polynomials.tc4 = (int64_t) (average[4]/matrix[4][4]);     } #endif      }     int16_t calculate_equation(uint16_t temp) {     int64_t y = 0;     int16_t tempcomp = 0;  #ifndef polynomial_order #elif polynomial_order == 0         y = polynomials.tc0; #elif polynomial_order == 1         y = polynomials.tc1* ((int64_t)temp) + polynomials.tc0; #elif polynomial_order == 2         y = (polynomials.tc2 * ((int64_t)temp) + polynomials.tc1)*(int64_t)temp + polynomials.tc0; #elif polynomial_order == 3         y = ((polynomials.tc3 * ((int64_t)temp) + polynomials.tc2)*((int64_t)temp) + polynomials.tc1)*((int64_t)temp) + polynomials.tc0; #elif polynomial_order == 4         y = (((polynomials.tc4 * (int64_t)temp + polynomials.tc3)*(int64_t)temp + polynomials.tc2)*(int64_t)temp + polynomials.tc1)*(int64_t)temp + polynomials.tc0; #endif     tempcomp = (int16_t) (y>>precision_bits);      return tempcomp; }  void main(){ int16_t tempcomp = 0; tempcompvalue = (int16_t) calculate_equation(mon_temp); } 

note: calculate_equation() being called once second , required not use float in order avoid floating point arithmetic, hence using non-float variables in function.

it working right me , haven't discovered bug after initial testing. every 1 taking interest in post, if not answer, got learn new techniques. , @chux.


Comments

Popular posts from this blog

networking - Vagrant-provisioned VirtualBox VM is not reachable from Ubuntu host -

c# - ASP.NET Core - There is already an object named 'AspNetRoles' in the database -

ruby on rails - ArgumentError: Missing host to link to! Please provide the :host parameter, set default_url_options[:host], or set :only_path to true -