public class Regression {
   static double rsquared = 0;

   // do some simple tests with the regression techniques
   public static void main(String argv[]) {

      // do a simple equation: Y = 1 + 2X
      double rawData[][] = {{1, 0}, {3, 1}, {5, 2}};
      double coef[] = linear_equation(rawData, 1);
      print_equation(coef);

      rawData = new double[100][2];

      // Second order equation: Y = 1 + 2X + 3X^2
      for (int i = 0; i < rawData.length; i++) {
         rawData[i][0] = 1 + 2*i + 3*i*i;
         rawData[i][1] = i;
      }
      print_equation(linear_equation(rawData, 2));

      // Third order equation: Y = 4 + 3X + 2X^2 + 1X^3
      for (int i = 0; i < rawData.length; i++) {
         rawData[i][0] = 4 + 3*i + 2*i*i + 1*i*i*i;
         rawData[i][1] = i;
      }
      print_equation(linear_equation(rawData, 3));

   }

   // Apply least squares to raw data to determine the coefficients for
   // an n-order equation: y = a0*X^0 + a1*X^1 + ... + an*X^n.
   // Returns the coefficients for the solved equation, given a number
   // of y and x data points. The rawData input is given in the form of
   // {{y0, x0}, {y1, x1},...,{yn, xn}}.   The coefficients returned by
   // the regression are {a0, a1,...,an} which corresponds to
   // {X^0, X^1,...,X^n}. The number of coefficients returned is the
   // requested equation order (norder) plus 1.
   static double[] linear_equation(double rawData[][], int norder) {
      double a[][] = new double[norder+1][norder+1];
      double b[] = new double[norder+1];
      double term[] = new double[norder+1];
      double ysquare = 0;

      // step through each raw data entries
      for (int i = 0; i < rawData.length; i++) {

         // sum the y values
         b[0] += rawData[i][0];
         ysquare += rawData[i][0] * rawData[i][0];

         // sum the x power values
         double xpower = 1;
         for (int j = 0; j < norder+1; j++) {
            term[j] = xpower;
            a[0][j] += xpower;
            xpower = xpower * rawData[i][1];
         }

         // now set up the rest of rows in the matrix - multiplying each row by each term
         for (int j = 1; j < norder+1; j++) {
            b[j] += rawData[i][0] * term[j];
            for (int k = 0; k < b.length; k++) {
               a[j][k] += term[j] * term[k];
            }
         }
      }

      // solve for the coefficients
      double coef[] = gauss(a, b);

      // calculate the r-squared statistic
      double ss = 0;
      double yaverage = b[0] / rawData.length;
      for (int i = 0; i < norder+1; i++) {
         double xaverage = a[0][i] / rawData.length;
         ss += coef[i] * (b[i] - (rawData.length * xaverage * yaverage));
      }
      rsquared = ss / (ysquare - (rawData.length * yaverage * yaverage));

      // solve the simultaneous equations via gauss
      return coef;
   }

   // it's been so long since I wrote this, that I don't recall the math
   // logic behind it. IIRC, it's just a standard gaussian technique for
   // solving simultaneous equations of the form: |A| = |B| * |C| where we
   // know the values of |A| and |B|, and we are solving for the coefficients
   // in |C|
   static double[] gauss(double ax[][], double bx[]) {
      double a[][] = new double[ax.length][ax[0].length];
      double b[] = new double[bx.length];
      double pivot;
      double mult;
      double top;
      int n = b.length;
      double coef[] = new double[n];

      // copy over the array values - inplace solution changes values
      for (int i = 0; i < ax.length; i++) {
         for (int j = 0; j < ax[i].length; j++) {
            a[i][j] = ax[i][j];
         }
         b[i] = bx[i];
      }

      for (int j = 0; j < (n-1); j++) {
         pivot = a[j][j];
         for (int i = j+1; i < n; i++) {
            mult = a[i][j] / pivot;
            for (int k = j+1; k < n; k++) {
               a[i][k] = a[i][k] - mult * a[j][k];
            }
            b[i] = b[i] - mult * b[j];
         }
      }

      coef[n-1] = b[n-1] / a[n-1][n-1];
      for (int i = n-2; i >= 0; i--) {
         top = b[i];
         for (int k = i+1; k < n; k ++) {
            top = top - a[i][k] * coef[k];
         }
         coef[i] = top / a[i][i];
      }
      return coef;
   }

   // simple routine to print the equation for inspection
   static void print_equation(double coef[]) {
      for (int i = 0; i < coef.length; i++) {
         if (i == 0) {
            System.out.print("Y = ");
         } else {
            System.out.print(" + ");
         }
         System.out.print(coef[i] + "*X^" + i);
      }
      System.out.println("      [r^2 = " + rsquared + "]");
   }
}

Chris Rathman / Chris.Rathman@tx.rr.com