User:NorwegianBlue/cpp program for calculating d3

From Wikipedia, the free encyclopedia

[edit] Calculating d3 - supplementary info to refdesk question

The program below is an attempt at calculating the control chart constants d2 and d3. It works for d2, but fails miserably for d3. The approximation of the cumulative normal distribution is something I wrote 15 years ago, and I checked it thoroughly then, but haven't done so now. However, the fact that it produces highly accurate results for d2, suggests that an error in the implementation of cumulative_normal_distribution() is unlikely to be the reason why d3() does not work correctly.

#include <iostream>
#include <string>
#include <math.h>

const double Pi = 3.14159265358979323846;
const double ALMOST_ZERO = 9.0e-307;
const double ALMOST_ONE  = 0.9999999999999997;


std::string progname(const char* fullname)
{
    const char* p;
    p = strrchr(fullname, '\\');
    if (p != 0)
    {
        p = p+1;
    }
    else
    {
        p = strrchr(fullname, '/');
        if (p != 0)
        {
            p = p+1;
        }
        else
        {
            p = fullname;
        }
    }
    std::string retval(p);
    return retval;
}



//--------------------------------------------------------------------
//
//  Name: FastNorm - Approximation for the area under the normal distribution
//
//  Syntax:         double FastNorm(double z);
//  Description:    Area under the normal distribution to the left of z.
//                  Adapted from Statistical computation,
//                  J.H. Maindonald, 1984 Wiley & Sons, New York 1984.
//                  (ISBN 0-471-86452-8)
// Note:            This function should usually NOT be called by user
//                  programs, because it is not very accurate for the central
//                  part of the normal distribution (i.e. +/- ~4). Therefore,
//                  user programs should normally use cumulative_normal_distribution()
//                  unless speed is essential. However, in the tails of the distribution,
//                  cumulative_normal_distribution() calls FastNorm().
// ---------------------------------------------------------------------------------

static double FastNorm(double Z)
{   double X, M0, M1, M2, M3, M4, P;

    //  Tail calculation: Peizer & Pratt 1968
    //  Maindonald fig. 9.4 p. 292

    X = fabs(Z);
    if (X < 1.9)
    {   M0 = 1.0/(1+0.33267*X);
        M1 = 0.4361836;
        M2 = -0.1201676;
        M3 = 0.937298;
        M4 = (X*X)/2.0;
        if (M4 <= ALMOST_ZERO) P = 0.5; // Avoid log(zero)
        else
        {   M4 = exp(M4) * sqrt(2.0*Pi);
            P  = M0*(M1+M2*M0+M3*sqrt(M0))/M4;
        }
    }
    else
    {   M1 = X*X;
        M2 = 1.0-1.0/(M1+3-1.0/(0.22*(M1+3.2)));
        M3 = log(M2) - log(X) - M1/2.0 - 0.9189386848;
        P  = exp(M3);
    }
    return (Z < 0.0) ? (P) : (1.0-P);
}




// ---------------------------------------------------------------------------------
//
//  Name:           cumulative_normal_distribution
//  Purpose:        Approximation for the area under the normal distribution
//  Syntax:         double cumulative_normal_distribution(double z);
//  Description:    Calculates the area under the normal distribution to the left of z.
//                  Adapted from Statistical computation, J.H. Maindonald,
//                  1984 Wiley & Sons, New York 1984, ISBN 0-471-86452-8
// ---------------------------------------------------------------------------------

double cumulative_normal_distribution(double X)
{   // Calculates area to the left of X, i.e. from (-infinity) to X.
    int Negative_Argument;
    double P,S,C,I2;
    int I;

    Negative_Argument         = (X < 0.0);
    if (Negative_Argument)  X = -X; // calculate for positive argument

    if (fabs(X) < 4.1)
    {   // Morans approximation; fig 9.3 p 292, Maindonald
        S      = 0.0;
        C      = X * sqrt(2.0)/3.0;
        for (I = 0; I <= 12; I++)
        {   I2 = I+0.5;
            S  = S+sin(I2*C)*exp(-I2*I2/9)/I2;
        }
        P = 0.5 - S/Pi;
    }
    else P = FastNorm(-X);
    return (Negative_Argument) ? (P) : (1.0-P);
}



double d2(int n)
{
    double x, dx;
    dx = 0.01;
    double sum = 0.0;
    for (x = -12; x <= 12; x = x + dx)
    {
        double alpha = cumulative_normal_distribution(x);
        sum = sum + (1 - pow(1-alpha,n) - pow(alpha,n))*dx;
    }
    return sum;
}


double d3(int n)
{
    double x_1, x_n, dx_1, dx_n;
    dx_1 = dx_n = 0.01;
    double sum = 0.0;
    for (x_n = -12; x_n <= 12; x_n = x_n + dx_n)
    {
        double alpha_n = cumulative_normal_distribution(x_n);
        for (x_1 = -12; x_1 <= x_n; x_1 = x_1 + dx_1)
        {
            double alpha_1 = cumulative_normal_distribution(x_1);
            // ERROR - mistake in original paper! sum = sum + (1 - pow(alpha_1,n) - pow(1 - alpha_n,n) - pow(alpha_1 - alpha_n, n))*dx_1*dx_n;
            sum = sum + (1 - pow(alpha_1,n) - pow(1 - alpha_n,n) + pow(alpha_1 - alpha_n, n))*dx_1*dx_n;
        }
    }
    double variance = 2*sum - pow(d2(n),2);
    return sqrt(variance);
}



main(int argc, char *argv[])
{
    if (argc != 3)
    {
        std::cerr << "Syntax: " << progname(argv[0]) << " <option> " << "<integer>" << '\n';
        std::cerr << "where <option> is either -d2 or -d3, and <integer> is an integer greater than 2";
        exit(2);
    }
    std::string option(argv[1]);
    int integer = atoi(argv[2]);

    if (integer < 2)
    {
        std::cerr << "Error: the second argument should be an integer greater than 2." << '\n';
        exit(2);
    }

    double result(-1);
    if (option == "-d2")
    {
        result = d2(integer);
    }
    else if (option == "-d3")
    {
        result = d3(integer);
    }
    else
    {
        std::cerr << "Error: invalid option: " << option << '\n';
        exit(2);
    }
    std::cout << result;
    return 0;
}