JKQTPlotter trunk/v5.0.0
an extensive Qt5+Qt6 Plotter framework (including a feature-richt plotter widget, a speed-optimized, but limited variant and a LaTeX equation renderer!), written fully in C/C++ and without external dependencies
Loading...
Searching...
No Matches
jkqtpstatregression.h
1/*
2 Copyright (c) 2008-2024 Jan W. Krieger (<jan@jkrieger.de>)
3
4 last modification: $LastChangedDate$ (revision $Rev$)
5
6 This software is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License (LGPL) as published by
8 the Free Software Foundation, either version 2.1 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License (LGPL) for more details.
15
16 You should have received a copy of the GNU Lesser General Public License (LGPL)
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21#ifndef JKQTPSTATREGRESSION_H_INCLUDED
22#define JKQTPSTATREGRESSION_H_INCLUDED
23
24#include <stdint.h>
25#include <cmath>
26#include <stdlib.h>
27#include <string.h>
28#include <iostream>
29#include <stdio.h>
30#include <limits>
31#include <vector>
32#include <utility>
33#include <cfloat>
34#include <ostream>
35#include <iomanip>
36#include <sstream>
37#include "jkqtmath/jkqtmath_imexport.h"
38#include "jkqtmath/jkqtplinalgtools.h"
39#include "jkqtmath/jkqtparraytools.h"
40#include "jkqtcommon/jkqtpdebuggingtools.h"
41#include "jkqtmath/jkqtpstatbasics.h"
42#include "jkqtmath/jkqtpstatpoly.h"
43
44
45
46
47
48
49/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is \f$ f(x)=a+b\cdot x \f$
50 So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-(a+b\cdot x_i)\right)^2 \f]
51 \ingroup jkqtptools_math_statistics_regression
52
53 \tparam InputItX standard iterator type of \a firstX and \a lastX.
54 \tparam InputItY standard iterator type of \a firstY and \a lastY.
55 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
56 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
57 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
58 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
59 \param[in,out] coeffA returns the offset of the linear model
60 \param[in,out] coeffB returns the slope of the linear model
61 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
62 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
63
64 This function computes internally:
65 \f[ a=\overline{y}-b\cdot\overline{x} \f]
66 \f[ b=\frac{\sum x_iy_i-N\cdot\overline{x}\cdot\overline{y}}{\sum x_i^2-N\cdot(\overline{x})^2} \f]
67
68 \image html datastore_regression_lin.png
69*/
70template <class InputItX, class InputItY>
71inline void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {
72 if (fixA&&fixB) return;
73 const int Nx=std::distance(firstX,lastX);
74 const int Ny=std::distance(firstY,lastY);
75
76 JKQTPASSERT(Nx>1 && Ny>1);
77
78 double sumx=0, sumy=0, sumxy=0, sumx2=0;
79 size_t N=0;
80 auto itX=firstX;
81 auto itY=firstY;
82 for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
83 const double fit_x=jkqtp_todouble(*itX);
84 const double fit_y=jkqtp_todouble(*itY);
85 if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
86 sumx=sumx+fit_x;
87 sumy=sumy+fit_y;
88 sumxy=sumxy+fit_x*fit_y;
89 sumx2=sumx2+fit_x*fit_x;
90 N++;
91 }
92 }
93 const double NN=static_cast<double>(N);
94 JKQTPASSERT_M(NN>1, "too few datapoints");
95 if (!fixA && !fixB) {
96 coeffB=(double(sumxy)-double(sumx)*double(sumy)/NN)/(double(sumx2)-double(sumx)*double(sumx)/NN);;
97 coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;
98 } else if (fixA && !fixB) {
99 coeffB=(double(sumy)/NN-coeffA)/(double(sumx)/NN);
100 } else if (!fixA && fixB) {
101 coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;
102 }
103}
104
105
106/*! \brief calculate the weighted linear regression coefficients for a given for a given data range \a firstX / \a firstY / \a firstW ... \a lastX / \a lastY / \a lastW where the model is \f$ f(x)=a+b\cdot x \f$
107 So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\cdot\left(y_i-(a+b\cdot x_i)\right)^2 \f]
108 \ingroup jkqtptools_math_statistics_regression
109
110 \tparam InputItX standard iterator type of \a firstX and \a lastX.
111 \tparam InputItY standard iterator type of \a firstY and \a lastY.
112 \tparam InputItW standard iterator type of \a firstW and \a lastW.
113 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
114 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
115 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
116 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
117 \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
118 \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
119 \param[in,out] coeffA returns the offset of the linear model
120 \param[in,out] coeffB returns the slope of the linear model
121 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
122 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
123 \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
124 e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
125 In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
126 is directly proportional to the given value.
127
128
129 This function internally computes:
130 \f[ a=\frac{\overline{y}-b\cdot\overline{x}}{\overline{w^2}} \f]
131 \f[ b=\frac{\overline{w^2}\cdot\overline{x\cdot y}-\overline{x}\cdot\overline{y}}{\overline{x^2}\cdot\overline{w^2}-\overline{x}^2} \f]
132
133 Here the averages are defined in terms of a weight vector \f$ w_i\f$:
134 \f[ \overline{x}=\sum\limits_iw_i^2\cdot x_i \f]
135 \f[ \overline{y}=\sum\limits_iw_i^2\cdot y_i \f]
136 \f[ \overline{x\cdot y}=\sum\limits_iw_i^2\cdot x_i\cdot y_i \f]
137 \f[ \overline{x^2}=\sum\limits_iw_i^2\cdot x_i^2 \f]
138 \f[ \overline{w^2}=\sum\limits_iw_i^2 \f]
139
140 \image html datastore_regression_linweight.png
141
142*/
143template <class InputItX, class InputItY, class InputItW>
144inline void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
145 if (fixA&&fixB) return;
146 const int Nx=std::distance(firstX,lastX);
147 const int Ny=std::distance(firstY,lastY);
148 const int Nw=std::distance(firstW,lastW);
149
150 JKQTPASSERT(Nx>1 && Ny>1 && Nw>1);
151
152 double sumx=0, sumy=0, sumxy=0, sumx2=0, sumw2=0;
153 size_t N=0;
154 auto itX=firstX;
155 auto itY=firstY;
156 auto itW=firstW;
157 for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
158 const double fit_x=jkqtp_todouble(*itX);
159 const double fit_y=jkqtp_todouble(*itY);
160 const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
161 if (JKQTPIsOKFloat(fit_x)&&JKQTPIsOKFloat(fit_y)&&JKQTPIsOKFloat(fit_w2)) {
162 sumx=sumx+fit_w2*fit_x;
163 sumy=sumy+fit_w2*fit_y;
164 sumxy=sumxy+fit_w2*fit_x*fit_y;
165 sumx2=sumx2+fit_w2*fit_x*fit_x;
166 sumw2=sumw2+fit_w2;
167 N++;
168 }
169 }
170 const double NN=static_cast<double>(N);
171 JKQTPASSERT_M(NN>1, "too few datapoints");
172 if (!fixA && !fixB) {
173 coeffB=(double(sumxy)*double(sumw2)-double(sumx)*double(sumy))/(double(sumx2)*double(sumw2)-double(sumx)*double(sumx));
174 coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);
175 } else if (fixA && !fixB) {
176 coeffB=(double(sumy)-coeffA*double(sumw2))/double(sumx);
177 } else if (!fixA && fixB) {
178 coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);
179 }
180}
181
182
183
184/*! \brief calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the model \f$ f(x)=a+b\cdot x \f$
185 for a given data range \a firstX / \a firstY ... \a lastX / \a lastY
186 So this function finds an outlier-robust solution to the optimization problem:
187 \f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
188 \ingroup jkqtptools_math_statistics_regression
189
190 \ingroup jkqtptools_math_statistics_regression
191
192 \tparam InputItX standard iterator type of \a firstX and \a lastX.
193 \tparam InputItY standard iterator type of \a firstY and \a lastY.
194 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
195 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
196 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
197 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
198 \param[in,out] coeffA returns the offset of the linear model
199 \param[in,out] coeffB returns the slope of the linear model
200 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
201 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
202 \param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)
203 \param iterations the number of iterations the IRLS algorithm performs
204
205 This is a simple form of the IRLS algorithm to estimate the parameters a and b in a linear model \f$ f(x)=a+b\cdot x \f$.
206 This algorithm solves the optimization problem for a \f$ L_p\f$-norm:
207 \f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
208 by iteratively optimization weights \f$ \vec{w} \f$ and solving a weighted least squares problem in each iteration:
209 \f[ (a_n,b_n)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^{(p-2)}\cdot|a+b\cdot x_i-y_i|^2 \f]
210
211
212 The IRLS-algorithm works as follows:
213 - calculate initial \f$ a_0\f$ and \f$ b_0\f$ with unweighted regression from x and y
214 - perform a number of iterations (parameter \a iterations ). In each iteration \f$ n\f$:
215 - calculate the error vector \f$\vec{e}\f$: \f[ e_i = a+b\cdot x_i -y_i \f]
216 - estimate new weights \f$\vec{w}\f$: \f[ w_i=|e_i|^{(p-2)/2} \f]
217 - calculate new estimates \f$ a_n\f$ and \f$ b_n\f$ with weighted regression from \f$ \vec{x}\f$ and \f$ \vec{y}\f$ and \f$ \vec{w}\f$
218 .
219 - return the last estimates \f$ a_n\f$ and \f$ b_n\f$
220 .
221
222 \image html irls.png
223
224 \image html datastore_regression_linrobust_p.png
225
226 \see https://en.wikipedia.org/wiki/Iteratively_reweighted_least_squares, C. Sidney Burrus: "Iterative Reweighted Least Squares", <a href="http://cnx.org/content/m45285/latest/">http://cnx.org/content/m45285/latest/</a>
227*/
228template <class InputItX, class InputItY>
229inline void jkqtpstatRobustIRLSLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {
230 if (fixA&&fixB) return;
231 const int Nx=std::distance(firstX,lastX);
232 const int Ny=std::distance(firstY,lastY);
233 const int N=std::min(Nx,Ny);
234
235 JKQTPASSERT(Nx>1 && Ny>1);
236
237 std::vector<double> weights;
238 std::fill_n(std::back_inserter(weights), N, 1.0);
239
240 double alast=coeffA, blast=coeffB;
241 jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);
242 for (int it=0; it<iterations-1; it++) {
243 // calculate weights
244 auto itX=firstX;
245 auto itY=firstY;
246 for (double& w: weights) {
247 const double fit_x=*itX;
248 const double fit_y=*itY;
249 const double e=alast+blast*fit_x-fit_y;
250 w=pow(std::max<double>(JKQTP_EPSILON*100.0, fabs(e)), (p-2.0)/2.0);
251 ++itX;
252 ++itY;
253 }
254 // solve weighted linear least squares
255 jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);
256 }
257 coeffA=alast;
258 coeffB=blast;
259}
260
261
262
263
264
265
266
267/*! \brief when performing linear regression, different target functions can be fitted, if the input data is transformed accordingly. This library provides the options in this enum by default.
268 \ingroup jkqtptools_math_statistics_regression
269 */
271 Linear, /*!< \brief linear model \f$ f(x)=a+b\cdot x \f$ */
272 PowerLaw, /*!< \brief power law model \f$ f(x)=a\cdot x^b \f$ */
273 Exponential, /*!< \brief exponential model \f$ f(x)=a\cdot \exp(b\cdot x) \f$ */
274 Logarithm, /*!< \brief exponential model \f$ f(x)=a+b\cdot \ln(x) \f$ */
275};
276
277
278/*! \brief Generates functors \c f(x,a,b) for the models from JKQTPStatRegressionModelType in \a type
279 \ingroup jkqtptools_math_statistics_regression
280 */
282
283/*! \brief Generates a LaTeX string for the models from JKQTPStatRegressionModelType in \a type
284 \ingroup jkqtptools_math_statistics_regression
285 */
287
288/*! \brief Generates functors \c f(x) for the models from JKQTPStatRegressionModelType in \a type and binds the parameter values \a and \a b to the returned function
289 \ingroup jkqtptools_math_statistics_regression
290 */
291jkqtmath_LIB_EXPORT std::function<double(double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type, double a, double b);
292
293/*! \brief Generates the transformation function for x-data (\c result.first ) and y-data (\c result.second ) for each regression model in JKQTPStatRegressionModelType in \a type
294 \ingroup jkqtptools_math_statistics_regression
295 \internal
296 */
297jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type);
298
299
300/*! \brief Generates the transformation function for a-parameter (offset, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type
301 \ingroup jkqtptools_math_statistics_regression
302 \internal
303 */
304jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type);
305
306
307/*! \brief Generates the transformation function for b-parameter (slope, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type
308 \ingroup jkqtptools_math_statistics_regression
309 \internal
310 */
311jkqtmath_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type);
312
313
314/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
315 So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]
316 by reducing it to a linear fit by transforming x- and/or y-data
317 \ingroup jkqtptools_math_statistics_regression
318
319 \tparam InputItX standard iterator type of \a firstX and \a lastX.
320 \tparam InputItY standard iterator type of \a firstY and \a lastY.
321 \param type model to be fitted
322 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
323 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
324 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
325 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
326 \param[in,out] coeffA returns the offset of the linear model
327 \param[in,out] coeffB returns the slope of the linear model
328 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
329 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
330
331 This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearRegression()
332 to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
333 that evaluates the model
334
335 \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearRegression(), jkqtpStatGenerateTransformation()
336*/
337template <class InputItX, class InputItY>
338inline void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {
339 std::vector<double> x, y;
340 auto trafo=jkqtpStatGenerateTransformation(type);
343
344 std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
345 std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
346
347 double a=aTrafo.first(coeffA);
348 double b=bTrafo.first(coeffB);
349
350 jkqtpstatLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB);
351
352 coeffA=aTrafo.second(a);
353 coeffB=bTrafo.second(b);
354}
355
356
357
358/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
359 So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|y_i-f_{\text{type}}(x_i,a,b)|^p \f]
360 by reducing it to a linear fit by transforming x- and/or y-data
361 \ingroup jkqtptools_math_statistics_regression
362
363 \tparam InputItX standard iterator type of \a firstX and \a lastX.
364 \tparam InputItY standard iterator type of \a firstY and \a lastY.
365 \param type model to be fitted
366 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
367 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
368 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
369 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
370 \param[in,out] coeffA returns the offset of the linear model
371 \param[in,out] coeffB returns the slope of the linear model
372 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
373 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
374 \param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)
375 \param iterations the number of iterations the IRLS algorithm performs
376
377 This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatRobustIRLSLinearRegression()
378 to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
379 that evaluates the model
380
381 \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatRobustIRLSLinearRegression(), jkqtpStatGenerateTransformation()
382*/
383template <class InputItX, class InputItY>
384inline void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {
385 std::vector<double> x, y;
386 auto trafo=jkqtpStatGenerateTransformation(type);
389
390 std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
391 std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
392
393 double a=aTrafo.first(coeffA);
394 double b=bTrafo.first(coeffB);
395
396 jkqtpstatRobustIRLSLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB, p, iterations);
397
398 coeffA=aTrafo.second(a);
399 coeffB=bTrafo.second(b);
400}
401
402
403
404
405/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
406 So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]
407 by reducing it to a linear fit by transforming x- and/or y-data
408 \ingroup jkqtptools_math_statistics_regression
409
410 \tparam InputItX standard iterator type of \a firstX and \a lastX.
411 \tparam InputItY standard iterator type of \a firstY and \a lastY.
412 \tparam InputItW standard iterator type of \a firstW and \a lastW.
413 \param type model to be fitted
414 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
415 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
416 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
417 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
418 \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
419 \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
420 \param[in,out] coeffA returns the offset of the linear model
421 \param[in,out] coeffB returns the slope of the linear model
422 \param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
423 \param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
424 \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
425 e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
426 In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
427 is directly proportional to the given value.
428
429 This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearWeightedRegression()
430 to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
431 that evaluates the model
432
433 \see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearWeightedRegression(), jkqtpStatGenerateTransformation()
434*/
435template <class InputItX, class InputItY, class InputItW>
436inline void jkqtpstatWeightedRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
437 std::vector<double> x, y;
438 auto trafo=jkqtpStatGenerateTransformation(type);
441
442 std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
443 std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
444
445 double a=aTrafo.first(coeffA);
446 double b=bTrafo.first(coeffB);
447
448 jkqtpstatLinearWeightedRegression(x.begin(), x.end(), y.begin(), y.end(), firstW, lastW, a, b, fixA, fixB, fWeightDataToWi);
449
450 coeffA=aTrafo.second(a);
451 coeffB=bTrafo.second(b);
452}
453
454
455
456
457/*! \brief calculates the coefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
458 \ingroup jkqtptools_math_statistics_regression
459
460 \tparam InputItX standard iterator type of \a firstX and \a lastX.
461 \tparam InputItY standard iterator type of \a firstY and \a lastY.
462 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
463 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
464 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
465 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
466 \param f function \f$ f(x) \f$, result of a fit to the data
467 \return coeffcicient of determination \f[ R^2=1-\frac{\sum_i\bigl[y_i-f(x_i)\bigr]^2}{\sum_i\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iy_i \f]
468
469
470
471 \see https://en.wikipedia.org/wiki/Coefficient_of_determination
472*/
473template <class InputItX, class InputItY>
474inline double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
475
476 auto itX=firstX;
477 auto itY=firstY;
478
479 const double yMean=jkqtpstatAverage(firstX,lastX);
480 double SSres=0;
481 double SStot=0;
482 for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
483 const double fit_x=jkqtp_todouble(*itX);
484 const double fit_y=jkqtp_todouble(*itY);
485 if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
486 SStot+=jkqtp_sqr(fit_y-yMean);
487 SSres+=jkqtp_sqr(fit_y-f(fit_x));
488 }
489 }
490
491 return 1.0-SSres/SStot;
492}
493
494
495/*! \brief calculates the weightedcoefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$
496 \ingroup jkqtptools_math_statistics_regression
497
498 \tparam InputItX standard iterator type of \a firstX and \a lastX.
499 \tparam InputItY standard iterator type of \a firstY and \a lastY.
500 \tparam InputItW standard iterator type of \a firstW and \a lastW.
501 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
502 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
503 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
504 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
505 \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
506 \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
507 \param f function \f$ f(x) \f$, result of a fit to the data
508 \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
509 e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
510 In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
511 is directly proportional to the given value.
512 \return weighted coeffcicient of determination \f[ R^2=1-\frac{\sum_iw_i^2\bigl[y_i-f(x_i)\bigr]^2}{\sum_iw_i^2\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iw_iy_i \f]
513 with \f[ \sum_iw_i=1 \f]
514
515
516
517 \see https://en.wikipedia.org/wiki/Coefficient_of_determination
518*/
519template <class InputItX, class InputItY, class InputItW>
520inline double jkqtpstatWeightedCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
521
522 auto itX=firstX;
523 auto itY=firstY;
524 auto itW=firstW;
525
526 const double yMean=jkqtpstatWeightedAverage(firstX,lastX,firstW);
527 double SSres=0;
528 double SStot=0;
529 for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
530 const double fit_x=jkqtp_todouble(*itX);
531 const double fit_y=jkqtp_todouble(*itY);
532 const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
533 if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {
534 SSres+=(fit_w2*jkqtp_sqr(fit_y-f(fit_x)));
535 SStot+=(fit_w2*jkqtp_sqr(fit_y-yMean));
536 }
537 }
538
539 return 1.0-SSres/SStot;
540}
541
542
543
544
545/*! \brief calculates the sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
546 \ingroup jkqtptools_math_statistics_regression
547
548 \tparam InputItX standard iterator type of \a firstX and \a lastX.
549 \tparam InputItY standard iterator type of \a firstY and \a lastY.
550 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
551 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
552 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
553 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
554 \param f function \f$ f(x) \f$, result of a fit to the data
555 \return sum of deviations \f[ \chi^2=\sum_i\bigl[y_i-f(x_i)\bigr]^2 \f]
556
557
558
559 \see https://en.wikipedia.org/wiki/Coefficient_of_determination
560*/
561template <class InputItX, class InputItY>
562inline double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
563
564 auto itX=firstX;
565 auto itY=firstY;
566
567 double SSres=0;
568 for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
569 const double fit_x=jkqtp_todouble(*itX);
570 const double fit_y=jkqtp_todouble(*itY);
571 if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
572 SSres+=jkqtp_sqr(fit_y-f(fit_x));
573 }
574 }
575
576 return SSres;
577}
578
579
580
581
582/*! \brief calculates the weighted sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$
583 \ingroup jkqtptools_math_statistics_regression
584
585 \tparam InputItX standard iterator type of \a firstX and \a lastX.
586 \tparam InputItY standard iterator type of \a firstY and \a lastY.
587 \tparam InputItW standard iterator type of \a firstW and \a lastW.
588 \param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
589 \param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
590 \param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
591 \param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
592 \param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
593 \param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
594 \param f function \f$ f(x) \f$, result of a fit to the data
595 \param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
596 e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
597 In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
598 is directly proportional to the given value.
599 \return weighted sum of deviations \f[ \chi^2=\sum_iw_i^2\cdot\bigl[y_i-f(x_i)\bigr]^2 \f]
600
601
602 \see https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic
603*/
604template <class InputItX, class InputItY, class InputItW>
605inline double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
606
607 auto itX=firstX;
608 auto itY=firstY;
609 auto itW=firstW;
610
611 double SSres=0;
612 for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
613 const double fit_x=jkqtp_todouble(*itX);
614 const double fit_y=jkqtp_todouble(*itY);
615 const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
616 if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {
617 SSres+=fit_w2*jkqtp_sqr(fit_y-f(fit_x));
618 }
619 }
620
621 return SSres;
622}
623
624
625
626
627
628
629
630#endif // JKQTPSTATREGRESSION_H_INCLUDED
631
632
#define jkqtmath_LIB_EXPORT
Definition jkqtmath_imexport.h:87
#define JKQTPASSERT_M(condition, message)
dynamic assertion, throws an exception with the given message, when the given condition condition eva...
Definition jkqtpdebuggingtools.h:73
#define JKQTPASSERT(condition)
dynamic assertion, throws an exception with the given message, when the given condition condition eva...
Definition jkqtpdebuggingtools.h:77
#define JKQTP_EPSILON
double-value NotANumber
Definition jkqtpmathtools.h:102
constexpr double jkqtp_todouble(const T &d)
converts a boolean to a double, is used to convert boolean to double by JKQTPDatastore
Definition jkqtpmathtools.h:113
T jkqtp_sqr(const T &v)
returns the quare of the value v, i.e. v*v
Definition jkqtpmathtools.h:327
bool JKQTPIsOKFloat(T v)
check whether the dlotaing point number is OK (i.e. non-inf, non-NAN)
Definition jkqtpmathtools.h:496
double jkqtpstatWeightedAverage(InputIt first, InputIt last, InputWeightIt firstWeight, size_t *Noutput=nullptr)
calculates the weighted average of a given data range first ... last
Definition jkqtpstatbasics.h:101
double jkqtpstatAverage(InputIt first, InputIt last, size_t *Noutput=nullptr)
calculates the average of a given data range first ... last
Definition jkqtpstatbasics.h:62
double jkqtpstatWeightedCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function< double(double)> f, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculates the weightedcoefficient of determination for a set of measurements with a fit function
Definition jkqtpstatregression.h:520
jkqtmath_LIB_EXPORT QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b)
Generates a LaTeX string for the models from JKQTPStatRegressionModelType in type.
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for b-parameter (slope, result.first : transform,...
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for x-data (result.first ) and y-data (result....
void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100)
calculate the robust linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:384
void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false)
calculate the linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:338
double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function< double(double)> f, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculates the weighted sum of deviations for a set of measurements with a fit function
Definition jkqtpstatregression.h:605
void jkqtpstatRobustIRLSLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100)
calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the...
Definition jkqtpstatregression.h:229
jkqtmath_LIB_EXPORT std::pair< std::function< double(double)>, std::function< double(double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type)
Generates the transformation function for a-parameter (offset, result.first : transform,...
double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function< double(double)> f)
calculates the coefficient of determination for a set of measurements with a fit function
Definition jkqtpstatregression.h:474
void jkqtpstatWeightedRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculate the robust linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:436
JKQTPStatRegressionModelType
when performing linear regression, different target functions can be fitted, if the input data is tra...
Definition jkqtpstatregression.h:270
void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false, std::function< double(double)> fWeightDataToWi=&jkqtp_identity< double >)
calculate the weighted linear regression coefficients for a given for a given data range firstX / fir...
Definition jkqtpstatregression.h:144
jkqtmath_LIB_EXPORT std::function< double(double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type)
Generates functors f(x,a,b) for the models from JKQTPStatRegressionModelType in type.
double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function< double(double)> f)
calculates the sum of deviations for a set of measurements with a fit function
Definition jkqtpstatregression.h:562
void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double &coeffA, double &coeffB, bool fixA=false, bool fixB=false)
calculate the linear regression coefficients for a given data range firstX / firstY ....
Definition jkqtpstatregression.h:71
@ Logarithm
exponential model
@ Exponential
exponential model