JKQtPlotter/jkqtpstatregression_8h_source.html

/*

    Copyright (c) 2008-2024 Jan W. Krieger (<jan@jkrieger.de>)


    last modification: $LastChangedDate$  (revision $Rev$)


    This software is free software: you can redistribute it and/or modify

    it under the terms of the GNU Lesser General Public License (LGPL) as published by

    the Free Software Foundation, either version 2.1 of the License, or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU Lesser General Public License (LGPL) for more details.


    You should have received a copy of the GNU Lesser General Public License (LGPL)

    along with this program.  If not, see <http://www.gnu.org/licenses/>.

*/


#ifndef JKQTPSTATREGRESSION_H_INCLUDED

#define JKQTPSTATREGRESSION_H_INCLUDED


#include <stdint.h>

#include <cmath>

#include <stdlib.h>

#include <string.h>

#include <iostream>

#include <stdio.h>

#include <limits>

#include <vector>

#include <utility>

#include <cfloat>

#include <ostream>

#include <iomanip>

#include <sstream>

#include "jkqtmath/jkqtmath_imexport.h"

#include "jkqtmath/jkqtplinalgtools.h"

#include "jkqtmath/jkqtparraytools.h"

#include "jkqtcommon/jkqtpdebuggingtools.h"

#include "jkqtmath/jkqtpstatbasics.h"

#include "jkqtmath/jkqtpstatpoly.h"


/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is \f$ f(x)=a+b\cdot x \f$

           So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-(a+b\cdot x_i)\right)^2 \f]

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used


    This function computes internally:

        \f[ a=\overline{y}-b\cdot\overline{x} \f]

        \f[ b=\frac{\sum x_iy_i-N\cdot\overline{x}\cdot\overline{y}}{\sum x_i^2-N\cdot(\overline{x})^2} \f]


    \image html datastore_regression_lin.png

*/

template <class InputItX, class InputItY>


inline void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {

    if (fixA&&fixB) return;

    const int Nx=std::distance(firstX,lastX);

    const int Ny=std::distance(firstY,lastY);


    JKQTPASSERT(Nx>1 && Ny>1);


    double sumx=0, sumy=0, sumxy=0, sumx2=0;

    size_t N=0;

    auto itX=firstX;

    auto itY=firstY;

    for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {

            sumx=sumx+fit_x;

            sumy=sumy+fit_y;

            sumxy=sumxy+fit_x*fit_y;

            sumx2=sumx2+fit_x*fit_x;

            N++;

        }

    }

    const double NN=static_cast<double>(N);

    JKQTPASSERT_M(NN>1, "too few datapoints");

    if (!fixA && !fixB) {

        coeffB=(double(sumxy)-double(sumx)*double(sumy)/NN)/(double(sumx2)-double(sumx)*double(sumx)/NN);;

        coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;

    } else if (fixA && !fixB) {

        coeffB=(double(sumy)/NN-coeffA)/(double(sumx)/NN);

    } else if (!fixA && fixB) {

        coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;

    }

}


/*! \brief calculate the weighted linear regression coefficients for a given for a given data range \a firstX / \a firstY / \a firstW ... \a lastX / \a lastY / \a lastW  where the model is \f$ f(x)=a+b\cdot x \f$

           So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\cdot\left(y_i-(a+b\cdot x_i)\right)^2 \f]

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \tparam InputItW standard iterator type of \a firstW and \a lastW.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$

    \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used

    \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)

                           e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.

                           In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight

                           is directly proportional to the given value.


    This function internally computes:

        \f[ a=\frac{\overline{y}-b\cdot\overline{x}}{\overline{w^2}} \f]

        \f[ b=\frac{\overline{w^2}\cdot\overline{x\cdot y}-\overline{x}\cdot\overline{y}}{\overline{x^2}\cdot\overline{w^2}-\overline{x}^2} \f]


    Here the averages are defined in terms of a weight vector \f$ w_i\f$:

        \f[ \overline{x}=\sum\limits_iw_i^2\cdot x_i \f]

        \f[ \overline{y}=\sum\limits_iw_i^2\cdot y_i \f]

        \f[ \overline{x\cdot y}=\sum\limits_iw_i^2\cdot x_i\cdot y_i \f]

        \f[ \overline{x^2}=\sum\limits_iw_i^2\cdot x_i^2 \f]

        \f[ \overline{w^2}=\sum\limits_iw_i^2 \f]


    \image html datastore_regression_linweight.png


*/

template <class InputItX, class InputItY, class InputItW>


inline void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {

    if (fixA&&fixB) return;

    const int Nx=std::distance(firstX,lastX);

    const int Ny=std::distance(firstY,lastY);

    const int Nw=std::distance(firstW,lastW);


    JKQTPASSERT(Nx>1 && Ny>1 && Nw>1);


    double  sumx=0, sumy=0, sumxy=0, sumx2=0, sumw2=0;

    size_t N=0;

    auto itX=firstX;

    auto itY=firstY;

    auto itW=firstW;

    for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));

        if (JKQTPIsOKFloat(fit_x)&&JKQTPIsOKFloat(fit_y)&&JKQTPIsOKFloat(fit_w2)) {

            sumx=sumx+fit_w2*fit_x;

            sumy=sumy+fit_w2*fit_y;

            sumxy=sumxy+fit_w2*fit_x*fit_y;

            sumx2=sumx2+fit_w2*fit_x*fit_x;

            sumw2=sumw2+fit_w2;

            N++;

        }

    }

    const double NN=static_cast<double>(N);

    JKQTPASSERT_M(NN>1, "too few datapoints");

    if (!fixA && !fixB) {

        coeffB=(double(sumxy)*double(sumw2)-double(sumx)*double(sumy))/(double(sumx2)*double(sumw2)-double(sumx)*double(sumx));

        coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);

    } else if (fixA && !fixB) {

        coeffB=(double(sumy)-coeffA*double(sumw2))/double(sumx);

    } else if (!fixA && fixB) {

        coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);

    }

}


/*! \brief calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the model \f$ f(x)=a+b\cdot x \f$

           for a given data range \a firstX / \a firstY ... \a lastX / \a lastY

           So this function finds an outlier-robust solution to the optimization problem:

           \f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]

    \ingroup jkqtptools_math_statistics_regression


    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used

    \param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)

    \param iterations the number of iterations the IRLS algorithm performs


    This is a simple form of the IRLS algorithm to estimate the parameters a and b in a linear model \f$ f(x)=a+b\cdot x \f$.

    This algorithm solves the optimization problem for a \f$ L_p\f$-norm:

      \f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]

    by iteratively optimization weights \f$ \vec{w} \f$ and solving a weighted least squares problem in each iteration:

      \f[ (a_n,b_n)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^{(p-2)}\cdot|a+b\cdot x_i-y_i|^2 \f]


    The IRLS-algorithm works as follows:

      - calculate initial \f$ a_0\f$ and \f$ b_0\f$ with unweighted regression from x and y

      - perform a number of iterations (parameter \a iterations ). In each iteration \f$ n\f$:

          - calculate the error vector \f$\vec{e}\f$: \f[ e_i = a+b\cdot x_i -y_i \f]

          - estimate new weights \f$\vec{w}\f$: \f[ w_i=|e_i|^{(p-2)/2} \f]

          - calculate new estimates \f$ a_n\f$ and \f$ b_n\f$ with weighted regression from \f$ \vec{x}\f$ and \f$ \vec{y}\f$ and \f$ \vec{w}\f$

        .

      - return the last estimates \f$ a_n\f$ and \f$ b_n\f$

    .


    \image html irls.png


    \image html datastore_regression_linrobust_p.png


    \see https://en.wikipedia.org/wiki/Iteratively_reweighted_least_squares, C. Sidney Burrus: "Iterative Reweighted Least Squares", <a href="http://cnx.org/content/m45285/latest/">http://cnx.org/content/m45285/latest/</a>

*/

template <class InputItX, class InputItY>


inline void jkqtpstatRobustIRLSLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {

    if (fixA&&fixB) return;

    const int Nx=std::distance(firstX,lastX);

    const int Ny=std::distance(firstY,lastY);

    const int N=std::min(Nx,Ny);


    JKQTPASSERT(Nx>1 && Ny>1);


    std::vector<double> weights;

    std::fill_n(std::back_inserter(weights), N, 1.0);


    double alast=coeffA, blast=coeffB;

    jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);

    for (int it=0; it<iterations-1; it++) {

        // calculate weights

        auto itX=firstX;

        auto itY=firstY;

        for (double& w: weights) {

            const double fit_x=*itX;

            const double fit_y=*itY;

            const double e=alast+blast*fit_x-fit_y;

            w=pow(std::max<double>(JKQTP_EPSILON*100.0, fabs(e)), (p-2.0)/2.0);

            ++itX;

            ++itY;

        }

        // solve weighted linear least squares

        jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);

    }

    coeffA=alast;

    coeffB=blast;

}


/*! \brief when performing linear regression, different target functions can be fitted, if the input data is transformed accordingly. This library provides the options in this enum by default.

    \ingroup jkqtptools_math_statistics_regression

    */


enum class JKQTPStatRegressionModelType {

    Linear,       /*!< \brief linear model \f$ f(x)=a+b\cdot x \f$ */

    PowerLaw,     /*!< \brief power law model \f$ f(x)=a\cdot x^b \f$ */

    Exponential,  /*!< \brief exponential model \f$ f(x)=a\cdot \exp(b\cdot x) \f$ */

    Logarithm,   /*!< \brief exponential model \f$ f(x)=a+b\cdot \ln(x) \f$ */

};


/*! \brief Generates functors \c f(x,a,b) for the models from JKQTPStatRegressionModelType in \a type

    \ingroup jkqtptools_math_statistics_regression

    */

jkqtmath_LIB_EXPORT std::function<double(double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type);


/*! \brief Generates a LaTeX string for the models from JKQTPStatRegressionModelType in \a type

    \ingroup jkqtptools_math_statistics_regression

    */

jkqtmath_LIB_EXPORT QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b);


/*! \brief Generates functors \c f(x) for the models from JKQTPStatRegressionModelType in \a type and binds the parameter values \a and \a b to the returned function

    \ingroup jkqtptools_math_statistics_regression

    */

jkqtmath_LIB_EXPORT std::function<double(double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type, double a, double b);


/*! \brief Generates the transformation function for x-data (\c result.first ) and y-data (\c result.second ) for each regression model in JKQTPStatRegressionModelType in \a type

    \ingroup jkqtptools_math_statistics_regression

    \internal

    */

jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type);


/*! \brief Generates the transformation function for a-parameter (offset, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type

    \ingroup jkqtptools_math_statistics_regression

    \internal

    */

jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type);


/*! \brief Generates the transformation function for b-parameter (slope, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type

    \ingroup jkqtptools_math_statistics_regression

    \internal

    */

jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type);


/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type

           So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]

           by reducing it to a linear fit by transforming x- and/or y-data

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param type model to be fitted

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used


    This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearRegression()

    to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor

    that evaluates the model


    \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearRegression(), jkqtpStatGenerateTransformation()

*/

template <class InputItX, class InputItY>


inline void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {

    std::vector<double> x, y;

    auto trafo=jkqtpStatGenerateTransformation(type);

    auto aTrafo =jkqtpStatGenerateParameterATransformation(type);

    auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);


    std::transform(firstX, lastX, std::back_inserter(x), trafo.first);

    std::transform(firstY, lastY, std::back_inserter(y), trafo.second);


    double a=aTrafo.first(coeffA);

    double b=bTrafo.first(coeffB);


    jkqtpstatLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB);


    coeffA=aTrafo.second(a);

    coeffB=bTrafo.second(b);

}


/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type

           So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|y_i-f_{\text{type}}(x_i,a,b)|^p \f]

           by reducing it to a linear fit by transforming x- and/or y-data

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param type model to be fitted

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used

    \param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)

    \param iterations the number of iterations the IRLS algorithm performs


    This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatRobustIRLSLinearRegression()

    to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor

    that evaluates the model


    \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatRobustIRLSLinearRegression(), jkqtpStatGenerateTransformation()

*/

template <class InputItX, class InputItY>


inline void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {

    std::vector<double> x, y;

    auto trafo=jkqtpStatGenerateTransformation(type);

    auto aTrafo =jkqtpStatGenerateParameterATransformation(type);

    auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);


    std::transform(firstX, lastX, std::back_inserter(x), trafo.first);

    std::transform(firstY, lastY, std::back_inserter(y), trafo.second);


    double a=aTrafo.first(coeffA);

    double b=bTrafo.first(coeffB);


    jkqtpstatRobustIRLSLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB, p, iterations);


    coeffA=aTrafo.second(a);

    coeffB=bTrafo.second(b);

}


/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type

           So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]

           by reducing it to a linear fit by transforming x- and/or y-data

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \tparam InputItW standard iterator type of \a firstW and \a lastW.

    \param type model to be fitted

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$

    \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$

    \param[in,out] coeffA returns the offset of the linear model

    \param[in,out] coeffB returns the slope of the linear model

    \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used

    \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used

    \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)

                           e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.

                           In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight

                           is directly proportional to the given value.


    This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearWeightedRegression()

    to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor

    that evaluates the model


    \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearWeightedRegression(), jkqtpStatGenerateTransformation()

*/

template <class InputItX, class InputItY, class InputItW>


inline void jkqtpstatWeightedRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {

    std::vector<double> x, y;

    auto trafo=jkqtpStatGenerateTransformation(type);

    auto aTrafo =jkqtpStatGenerateParameterATransformation(type);

    auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);


    std::transform(firstX, lastX, std::back_inserter(x), trafo.first);

    std::transform(firstY, lastY, std::back_inserter(y), trafo.second);


    double a=aTrafo.first(coeffA);

    double b=bTrafo.first(coeffB);


    jkqtpstatLinearWeightedRegression(x.begin(), x.end(), y.begin(), y.end(), firstW, lastW, a, b, fixA, fixB, fWeightDataToWi);


    coeffA=aTrafo.second(a);

    coeffB=bTrafo.second(b);

}


/*! \brief calculates the coefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param f function \f$ f(x) \f$, result of a fit to the data

    \return coeffcicient of determination \f[ R^2=1-\frac{\sum_i\bigl[y_i-f(x_i)\bigr]^2}{\sum_i\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iy_i \f]


    \see https://en.wikipedia.org/wiki/Coefficient_of_determination

*/

template <class InputItX, class InputItY>


inline double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {


    auto itX=firstX;

    auto itY=firstY;


    const double yMean=jkqtpstatAverage(firstX,lastX);

    double SSres=0;

    double SStot=0;

    for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {

            SStot+=jkqtp_sqr(fit_y-yMean);

            SSres+=jkqtp_sqr(fit_y-f(fit_x));

        }

    }


    return 1.0-SSres/SStot;

}


/*! \brief calculates the weightedcoefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \tparam InputItW standard iterator type of \a firstW and \a lastW.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$

    \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$

    \param f function \f$ f(x) \f$, result of a fit to the data

    \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)

                           e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.

                           In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight

                           is directly proportional to the given value.

    \return weighted coeffcicient of determination \f[ R^2=1-\frac{\sum_iw_i^2\bigl[y_i-f(x_i)\bigr]^2}{\sum_iw_i^2\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iw_iy_i \f]

            with \f[ \sum_iw_i=1 \f]


    \see https://en.wikipedia.org/wiki/Coefficient_of_determination

*/

template <class InputItX, class InputItY, class InputItW>


inline double jkqtpstatWeightedCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {


    auto itX=firstX;

    auto itY=firstY;

    auto itW=firstW;


    const double yMean=jkqtpstatWeightedAverage(firstX,lastX,firstW);

    double SSres=0;

    double SStot=0;

    for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));

        if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {

            SSres+=(fit_w2*jkqtp_sqr(fit_y-f(fit_x)));

            SStot+=(fit_w2*jkqtp_sqr(fit_y-yMean));

        }

    }


    return 1.0-SSres/SStot;

}


/*! \brief calculates the sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param f function \f$ f(x) \f$, result of a fit to the data

    \return sum of deviations  \f[ \chi^2=\sum_i\bigl[y_i-f(x_i)\bigr]^2 \f]


    \see https://en.wikipedia.org/wiki/Coefficient_of_determination

*/

template <class InputItX, class InputItY>


inline double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {


    auto itX=firstX;

    auto itY=firstY;


    double SSres=0;

    for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {

            SSres+=jkqtp_sqr(fit_y-f(fit_x));

        }

    }


    return SSres;

}


/*! \brief calculates the weighted sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$

    \ingroup jkqtptools_math_statistics_regression


    \tparam InputItX standard iterator type of \a firstX and \a lastX.

    \tparam InputItY standard iterator type of \a firstY and \a lastY.

    \tparam InputItW standard iterator type of \a firstW and \a lastW.

    \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$

    \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$

    \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$

    \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$

    \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$

    \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$

    \param f function \f$ f(x) \f$, result of a fit to the data

    \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)

                           e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.

                           In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight

                           is directly proportional to the given value.

    \return weighted sum of deviations  \f[ \chi^2=\sum_iw_i^2\cdot\bigl[y_i-f(x_i)\bigr]^2 \f]


    \see https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic

*/

template <class InputItX, class InputItY, class InputItW>


inline double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {


    auto itX=firstX;

    auto itY=firstY;

    auto itW=firstW;


    double SSres=0;

    for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {

        const double fit_x=jkqtp_todouble(*itX);

        const double fit_y=jkqtp_todouble(*itY);

        const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));

        if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {

            SSres+=fit_w2*jkqtp_sqr(fit_y-f(fit_x));

        }

    }


    return SSres;

}


#endif // JKQTPSTATREGRESSION_H_INCLUDED


jkqtmath_LIB_EXPORT
#define jkqtmath_LIB_EXPORT
Definition jkqtmath_imexport.h:87

JKQTPASSERT_M
#define JKQTPASSERT_M(condition, message)
dynamic assertion, throws an exception with the given message, when the given condition condition eva...
Definition jkqtpdebuggingtools.h:73

JKQTPASSERT
#define JKQTPASSERT(condition)
dynamic assertion, throws an exception with the given message, when the given condition condition eva...
Definition jkqtpdebuggingtools.h:77

JKQTP_EPSILON
#define JKQTP_EPSILON
double-value NotANumber
Definition jkqtpmathtools.h:102

jkqtp_todouble
constexpr double jkqtp_todouble(const T &d)
converts a boolean to a double, is used to convert boolean to double by JKQTPDatastore
Definition jkqtpmathtools.h:113

jkqtp_sqr
T jkqtp_sqr(const T &v)
returns the quare of the value v, i.e. v*v
Definition jkqtpmathtools.h:327

JKQTPIsOKFloat
bool JKQTPIsOKFloat(T v)
check whether the dlotaing point number is OK (i.e. non-inf, non-NAN)
Definition jkqtpmathtools.h:496

jkqtpstatWeightedAverage
double jkqtpstatWeightedAverage(InputIt first, InputIt last, InputWeightIt firstWeight, size_t *Noutput=nullptr)
calculates the weighted average of a given data range first ... last
Definition jkqtpstatbasics.h:101

jkqtpstatAverage
double jkqtpstatAverage(InputIt first, InputIt last, size_t *Noutput=nullptr)
calculates the average of a given data range first ... last
Definition jkqtpstatbasics.h:62

jkqtpstatWeightedCoefficientOfDetermination
double jkqtpstatWeightedCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function< double(double)> f, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculates the weightedcoefficient of determination  for a set of measurements  with a fit function
Definition jkqtpstatregression.h:520

jkqtpstatRegressionModel2Latex
jkqtmath_LIB_EXPORT QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b)
Generates a LaTeX string for the models from JKQTPStatRegressionModelType in type.

jkqtpStatGenerateParameterBTransformation
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for b-parameter (slope, result.first : transform,...

jkqtpStatGenerateTransformation
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for x-data (result.first ) and y-data (result....

jkqtpstatRobustIRLSRegression
void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100)
calculate the robust linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:384

jkqtpstatRegression
void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false)
calculate the linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:338

jkqtpstatWeightedSumOfDeviations
double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function< double(double)> f, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculates the weighted sum of deviations  for a set of measurements  with a fit function
Definition jkqtpstatregression.h:605

jkqtpstatRobustIRLSLinearRegression
void jkqtpstatRobustIRLSLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100)
calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the...
Definition jkqtpstatregression.h:229

jkqtpStatGenerateParameterATransformation
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for a-parameter (offset, result.first : transform,...

jkqtpstatCoefficientOfDetermination
double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function< double(double)> f)
calculates the coefficient of determination  for a set of measurements  with a fit function
Definition jkqtpstatregression.h:474

jkqtpstatWeightedRegression
void jkqtpstatWeightedRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculate the robust linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:436

JKQTPStatRegressionModelType
JKQTPStatRegressionModelType
when performing linear regression, different target functions can be fitted, if the input data is tra...
Definition jkqtpstatregression.h:270

jkqtpstatLinearWeightedRegression
void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculate the weighted linear regression coefficients for a given for a given data range firstX / fir...
Definition jkqtpstatregression.h:144

jkqtpStatGenerateRegressionModel
jkqtmath_LIB_EXPORT std::function< double(double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type)
Generates functors f(x,a,b) for the models from JKQTPStatRegressionModelType in type.

jkqtpstatSumOfDeviations
double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function< double(double)> f)
calculates the sum of deviations  for a set of measurements  with a fit function
Definition jkqtpstatregression.h:562

jkqtpstatLinearRegression
void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false)
calculate the linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:71

JKQTPStatRegressionModelType::PowerLaw
@ PowerLaw
power law model

JKQTPStatRegressionModelType::Logarithm
@ Logarithm
exponential model

JKQTPStatRegressionModelType::Linear
@ Linear
linear model

JKQTPStatRegressionModelType::Exponential
@ Exponential
exponential model