JKQTPlotter trunk/v5.0.0
an extensive Qt5+Qt6 Plotter framework (including a feature-richt plotter widget, a speed-optimized, but limited variant and a LaTeX equation renderer!), written fully in C/C++ and without external dependencies
Loading...
Searching...
No Matches
jkqtpstatkde.h
1/*
2 Copyright (c) 2008-2024 Jan W. Krieger (<jan@jkrieger.de>)
3
4 last modification: $LastChangedDate$ (revision $Rev$)
5
6 This software is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License (LGPL) as published by
8 the Free Software Foundation, either version 2.1 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License (LGPL) for more details.
15
16 You should have received a copy of the GNU Lesser General Public License (LGPL)
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21#ifndef JKQTPSTATKDE_H_INCLUDED
22#define JKQTPSTATKDE_H_INCLUDED
23
24#include <stdint.h>
25#include <cmath>
26#include <stdlib.h>
27#include <string.h>
28#include <iostream>
29#include <stdio.h>
30#include <limits>
31#include <vector>
32#include <utility>
33#include <cfloat>
34#include <ostream>
35#include <iomanip>
36#include <sstream>
37#include "jkqtmath/jkqtmath_imexport.h"
38#include "jkqtmath/jkqtplinalgtools.h"
39#include "jkqtmath/jkqtparraytools.h"
40#include "jkqtcommon/jkqtpdebuggingtools.h"
41#include "jkqtmath/jkqtpstatbasics.h"
42
43
44
45
46
47/*! \brief a 1D Gaussian kernel function, e.g. for Kernel Density Estimation
48 \ingroup jkqtptools_math_statistics_1dkde_kernels
49
50 \f[ k(t):=\frac{1}{\sqrt{2\pi}}\exp \left(-\frac{1}{2}t^2\right) \f]
51*/
52inline double jkqtpstatKernel1DGaussian(double t) {
53 return exp(-0.5*t*t)/JKQTPSTATISTICS_SQRT_2PI;
54}
55/*! \brief a 1D Cauchy kernel function, e.g. for Kernel Density Estimation
56 \ingroup jkqtptools_math_statistics_1dkde_kernels
57
58 \f[ k(t):=\frac{1}{\pi(1+t^2)} \f]
59*/
60inline double jkqtpstatKernel1DCauchy(double t) {
61 return 1.0/(JKQTPSTATISTICS_PI*(1.0+t*t));
62}
63
64/*! \brief a 1D Picard kernel function, e.g. for Kernel Density Estimation
65 \ingroup jkqtptools_math_statistics_1dkde_kernels
66
67 \f[ k(t):=\frac{1}{2}\exp(-|t|) \f]
68*/
69inline double jkqtpstatKernel1DPicard(double t) {
70 return exp(-0.5*fabs(t))/2.0;
71}
72/*! \brief a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
73 \ingroup jkqtptools_math_statistics_1dkde_kernels
74
75 \f[ k(t) :=\begin{cases}\frac{3}{4} ( 1- t^2 ), & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
76*/
77inline double jkqtpstatKernel1DEpanechnikov(double t) {
78 return (fabs(t)<1.0)?(0.75*(1.0-t*t)):0.0;
79}
80/*! \brief a 1D uniform kernel function, e.g. for Kernel Density Estimation
81 \ingroup jkqtptools_math_statistics_1dkde_kernels
82
83 \f[ k(t) :=\begin{cases}0.5, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
84*/
85inline double jkqtpstatKernel1DUniform(double t) {
86 return (fabs(t)<=1.0)?0.5:0.0;
87}
88/*! \brief a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
89 \ingroup jkqtptools_math_statistics_1dkde_kernels
90
91 \f[ k(t) :=\begin{cases}1-|t|, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
92*/
93inline double jkqtpstatKernel1DTriangle(double t) {
94 return (fabs(t)<=1.0)?(1.0-fabs(t)):0.0;
95}
96
97/*! \brief a 1D quartic kernel function, e.g. for Kernel Density Estimation
98 \ingroup jkqtptools_math_statistics_1dkde_kernels
99
100 \f[ k(t) :=\begin{cases}\frac{15}{16}(1-t^2)^2, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
101*/
102inline double jkqtpstatKernel1DQuartic(double t) {
103 return (fabs(t)<=1.0)?(15.0/16.0*jkqtp_sqr(1.0-t*t)):0.0;
104}
105/*! \brief a 1D triweight kernel function, e.g. for Kernel Density Estimation
106 \ingroup jkqtptools_math_statistics_1dkde_kernels
107
108 \f[ k(t) :=\begin{cases}\frac{35}{32}(1-t^2)^3, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
109*/
110inline double jkqtpstatKernel1DTriweight(double t) {
111 return (fabs(t)<1.0)?(35.0/32.0*jkqtp_cube(1.0-t*t)):0.0;
112}
113
114/*! \brief a 1D tricube kernel function, e.g. for Kernel Density Estimation
115 \ingroup jkqtptools_math_statistics_1dkde_kernels
116
117 \f[ k(t) :=\begin{cases}\frac{70}{81}(1-|t|^3)^3, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
118*/
119inline double jkqtpstatKernel1DTricube(double t) {
120 return (fabs(t)<1.0)?(70.0/81.0*jkqtp_cube(1.0-jkqtp_cube(fabs(t)))):0.0;
121}
122/*! \brief a 1D cosine kernel function, e.g. for Kernel Density Estimation
123 \ingroup jkqtptools_math_statistics_1dkde_kernels
124
125 \f[ k(t) :=\begin{cases}\frac{\pi}{4}\cos\left(\frac{\pi}{2}t\right), & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
126*/
127inline double jkqtpstatKernel1DCosine(double t) {
128 return (fabs(t)<1.0)?(JKQTPSTATISTICS_PI/4.0*cos(t*JKQTPSTATISTICS_PI/2.0)):0.0;
129}
130
131
132
133
134
135
136
137
138
139
140
141
142
143/*! \brief a 2D Gaussian kernel function, e.g. for Kernel Density Estimation
144 \ingroup jkqtptools_math_statistics_2dkde_kernels
145
146 \f[ k(t_x, t_y):=\frac{1}{2\pi}\exp \left(-\frac{t_x^2+t_y^2}{2}\right) \f]
147*/
148inline double jkqtpstatKernel2DGaussian(double tx, double ty)
149{
150 return exp(-0.5*(tx*tx+ty*ty))/(2.0*JKQTPSTATISTICS_PI);
151}
152
153/*! \brief a 2D Uniform kernel function, e.g. for Kernel Density Estimation
154 \ingroup jkqtptools_math_statistics_2dkde_kernels
155
156 \f[ k(t_x, t_y):=\begin{cases}\frac{1}{4}, & \text{if }t_x,t_y\in [-1;1]\\0, & \text{else}\end{cases} \f]
157*/
158inline double jkqtpstatKernel2DUniform(double tx, double ty) {
159 return (fabs(tx)<1.0 && fabs(ty)<=1.0)?0.25:0.0;
160}
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180/*! \brief estimates a bandwidth for a Kernel Density Estimator (KDE) of the given data \a first ... \a last
181 \ingroup jkqtptools_math_statistics_1dkde
182
183 evaluates \f[ h = \left(\frac{4\hat{\sigma}^5}{3n}\right)^{\frac{1}{5}} \approx 1.06 \hat{\sigma} n^{-1/5} \f]
184
185 \tparam InputIt standard iterator type of \a first and \a last.
186 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
187 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
188 \return the estimated bandwidth
189
190*/
191template <class InputIt>
192inline double jkqtpstatEstimateKDEBandwidth(InputIt first, InputIt last) {
193 size_t N=0;
194 const double sigma=jkqtpstatStdDev(first, last, nullptr, &N);
195 return 1.06*sigma/pow(static_cast<double>(N), 1.0/5.0);
196}
197
198
199
200
201/*! \brief evaluates the Kernel Density Estimator (KDE) at a given position
202 \ingroup jkqtptools_math_statistics_1dkde
203
204 evaluates \f[ \tilde{f}(t):=\frac{1}{N\cdot\text{bandwidth}}\cdot\sum\limits_{i=0}^{N-1}K\left(\frac{t-x_i}{\text{bandwidth}}\right) \f]
205
206 \tparam InputIt standard iterator type of \a first and \a last.
207 \param t where to evaluate the kernel sum
208 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
209 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
210 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
211 \param bandwidth bandwidth used for the KDE
212
213*/
214template <class InputIt>
215inline double jkqtpstatEvaluateKernelSum(double t, InputIt first, InputIt last, const std::function<double(double)>& kernel, double bandwidth) {
216 double res=0;
217 size_t cnt=0;
218 for (auto it=first; it!=last; ++it) {
219 const double v=jkqtp_todouble(*it);
220 if (JKQTPIsOKFloat(v)) {
221 const double vx=(t-v)/bandwidth;
222 res+=kernel(vx);
223 cnt++;
224 }
225 }
226 if (cnt==0) return 0.0;
227 return res/static_cast<double>(cnt)/bandwidth;
228}
229
230
231
232
233/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined by their number
234 \ingroup jkqtptools_math_statistics_1dkde
235
236 \tparam InputIt standard iterator type of \a first and \a last.
237 \tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
238 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
239 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
240 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
241 \param bandwidth bandwidth used for the KDE
242 \param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
243 \param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
244 \param Nout number datapoints in the output KDE
245 \param cummulative if \c true, a cummulative KDE is calculated
246
247 This function performs <a href="https://en.wikipedia.org/wiki/Kernel_density_estimation">Kernel Density Estimation</a> for a given data array.
248 Then the resulting density is evaluated on a regular grid spanning [min(X)...max(X)] with bins datapoints in between.
249
250 \warning this functions is getting very slow for large dataset, as for each point in the resulting histogram N kernel functions have to be evaluated.
251
252 \see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
253*/
254template <class InputIt, class OutputIt>
255inline void jkqtpstatKDE1DAutoranged(InputIt first, InputIt last, OutputIt KDEXOut, OutputIt KDEYOut, int Nout=100, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
256 double minV=0, maxV=0;
257 size_t N=0;
258 jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
259
260 std::vector<double> histX;
261 std::vector<double> histY;
262
263 const double range=maxV-minV;
264 const double binw=range/static_cast<double>(Nout);
265
266 // calculate the KDE
267 for (double xi=minV; xi<=maxV; xi+=binw) {
268 histX.push_back(xi);
269 histY.push_back(jkqtpstatEvaluateKernelSum(xi, first, last, kernel, bandwidth));
270 }
271 if (histX.size()>0 && histX[histX.size()-1]<maxV) {
272 histX.push_back(maxV);
273 histY.push_back(jkqtpstatEvaluateKernelSum(maxV, first, last, kernel, bandwidth));
274 }
275
276
277
278 // output the KDE
279 double h=0;
280 for (size_t i=0; i<histX.size(); i++) {
281 *KDEXOut=histX[i];
282 if (cummulative) h+=histY[i];
283 else h=histY[i];
284 *KDEYOut=h;
285 ++KDEXOut;
286 ++KDEYOut;
287 }
288}
289
290
291/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined by their number
292 \ingroup jkqtptools_math_statistics_1dkde
293
294 \tparam InputIt standard iterator type of \a first and \a last.
295 \tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
296 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
297 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
298 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
299 \param bandwidth bandwidth used for the KDE
300 \param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
301 \param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
302 \param binWidth width of the bins
303 \param cummulative if \c true, a cummulative KDE is calculated
304
305 This function performs <a href="https://en.wikipedia.org/wiki/Kernel_density_estimation">Kernel Density Estimation</a> for a given data array.
306 Then the resulting density is evaluated on a regular grid spanning [min(X)...max(X)] with bins datapoints in between.
307
308 \warning this functions is getting very slow for large dataset, as for each point in the resulting histogram N kernel functions have to be evaluated.
309
310 \see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
311*/
312template <class InputIt, class OutputIt>
313inline void jkqtpstatKDE1DAutoranged(InputIt first, InputIt last, OutputIt KDEXOut, OutputIt KDEYOut, double binWidth, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
314 double minV=0, maxV=0;
315 size_t N=0;
316 jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
317
318 std::vector<double> histX;
319 std::vector<double> histY;
320
321 const double binw=binWidth;
322
323 // calculate the KDE
324 for (double xi=minV; xi<=maxV; xi+=binw) {
325 histX.push_back(xi);
326 histY.push_back(jkqtpstatEvaluateKernelSum(xi, first, last, kernel, bandwidth));
327 }
328 if (histX.size()>0 && histX[histX.size()-1]<maxV) {
329 histX.push_back(maxV);
330 histY.push_back(jkqtpstatEvaluateKernelSum(maxV, first, last, kernel, bandwidth));
331 }
332
333
334 // output the KDE
335 double h=0;
336 for (size_t i=0; i<histX.size(); i++) {
337 *KDEXOut=histX[i];
338 if (cummulative) h+=histY[i];
339 else h=histY[i];
340 *KDEYOut=h;
341 ++KDEXOut;
342 ++KDEYOut;
343 }
344
345}
346
347
348
349/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined the range \a binsFirst ... \a binsLast
350 \ingroup jkqtptools_math_statistics_1dkde
351
352 \tparam InputIt standard iterator type of \a first and \a last.
353 \tparam BinsInputIt standard iterator type of \a binsFirst and \a binsLast.
354 \tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
355 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
356 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
357 \param binsFirst iterator pointing to the first item in the set of KDE bins
358 \param binsLast iterator pointing behind the last item in the set of KDE bins
359 \param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
360 \param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
361 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
362 \param bandwidth bandwidth used for the KDE
363 \param cummulative if \c true, a cummulative KDE is calculated
364
365 \see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
366*/
367template <class InputIt, class BinsInputIt, class OutputIt>
368inline void jkqtpstatKDE1D(InputIt first, InputIt last, BinsInputIt binsFirst, BinsInputIt binsLast, OutputIt KDEXOut, OutputIt KDEYOut, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
369 double minV=0, maxV=0;
370 size_t N=0;
371 jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
372
373 std::vector<double> histX;
374 std::vector<double> histY;
375
376
377 // initialize the KDE
378 for (auto it=binsFirst; it!=binsLast; ++it) {
379 histX.push_back(jkqtp_todouble(*it));
380 }
381 std::sort(histX.begin(), histX.end());
382
383 // calculate the KDE
384 for (auto it=histX.begin(); it!=histX.end(); ++it) {
385 histY.push_back(jkqtpstatEvaluateKernelSum(*it, first, last, kernel, bandwidth));
386 }
387
388
389 // output the KDE
390 double h=0;
391 for (size_t i=0; i<histX.size(); i++) {
392 *KDEXOut=histX[i];
393 if (cummulative) h+=histY[i];
394 else h=histY[i];
395 *KDEYOut=h;
396 ++KDEXOut;
397 ++KDEYOut;
398 }
399
400}
401
402
403
404
405/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, evaluation positions are given by the range \a binXLeft ... \a binXRight (in steps of \a binxDelta )
406 \ingroup jkqtptools_math_statistics_1dkde
407
408 \tparam InputIt standard iterator type of \a first and \a last.
409 \tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
410 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
411 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
412 \param binXLeft first x-position, where to evaluate the KDE
413 \param binXDelta distance between two x-positions at which the KDE is evaluated
414 \param binXRight last x-position, where to evaluate the KDE
415 \param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
416 \param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
417 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
418 \param bandwidth bandwidth used for the KDE
419 \param cummulative if \c true, a cummulative KDE is calculated
420
421 \see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
422*/
423template <class InputIt, class OutputIt>
424inline void jkqtpstatKDE1D(InputIt first, InputIt last, double binXLeft, double binXDelta, double binXRight, OutputIt KDEXOut, OutputIt KDEYOut, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
425 double minV=0, maxV=0;
426 size_t N=0;
427 jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
428
429 std::vector<double> histX;
430 std::vector<double> histY;
431
432
433 // calculate the KDE
434 for (double x=binXLeft; x<=binXRight; x+=binXDelta) {
435 histX.push_back(x);
436 histY.push_back(jkqtpstatEvaluateKernelSum(x, first, last, kernel, bandwidth));
437 }
438
439
440 // output the KDE
441 double h=0;
442 for (size_t i=0; i<histX.size(); i++) {
443 *KDEXOut=histX[i];
444 if (cummulative) h+=histY[i];
445 else h=histY[i];
446 *KDEYOut=h;
447 ++KDEXOut;
448 ++KDEYOut;
449 }
450
451}
452
453
454
455
456
457
458
459
460
461/*! \brief evaluates the Kernel Density Estimator (KDE) at a given position
462 \ingroup jkqtptools_math_statistics_2dkde
463
464 evaluates \f[ \tilde{f}(x,y):=\frac{1}{N\cdot\sqrt{\text{bandwidthx}}\cdot\sqrt{\text{bandwidthy}}}\cdot\sum\limits_{i=0}^{N-1}K\left(\frac{x-x_i}{\text{bandwidthx}},\frac{y-y_i}{\text{bandwidthy}}\right) \f]
465
466 \tparam InputItX standard iterator type of \a firstX and \a lastX.
467 \tparam InputItY standard iterator type of \a firstY and \a lastY.
468 \param x where to evaluate the kernel sum, x-coordinate
469 \param y where to evaluate the kernel sum, y-coordinate
470 \param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
471 \param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
472 \param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
473 \param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
474 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
475 \param bandwidthX x-bandwidth used for the KDE
476 \param bandwidthY y-bandwidth used for the KDE
477
478*/
479template <class InputItX, class InputItY>
480inline double jkqtpstatEvaluateKernelSum2D(double x, double y, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, const std::function<double(double,double)>& kernel, double bandwidthX, double bandwidthY) {
481 double res=0;
482 size_t cnt=0;
483 auto itX=firstX;
484 auto itY=firstY;
485 for (; (itX!=lastX)&&(itY!=lastY); ++itX, ++itY) {
486 const double vx=jkqtp_todouble(*itX);
487 const double vy=jkqtp_todouble(*itY);
488 if (JKQTPIsOKFloat(vx) && JKQTPIsOKFloat(vy)) {
489 const double vvx=(x-vx)/bandwidthX;
490 const double vvy=(y-vy)/bandwidthY;
491 res+=kernel(vvx,vvy);
492 cnt++;
493 }
494 }
495 if (cnt==0) return 0.0;
496 return res/static_cast<double>(cnt)/sqrt(bandwidthX*bandwidthY);
497}
498
499
500
501/*! \brief stores a part-result of the Kernel Density Estimator (KDE) at a given position
502 \ingroup jkqtptools_math_statistics_2dkde
503 \internal
504
505 Iterates over all datapoints in firstX..lastX and firstY..lastY, for each one determines whether the floating-point values are valid
506 and the calls fStore, which is a functor that is supposed to iterate over the KDE 2D array and over calls sum up the KDE.
507
508 This is internally used by jkqtpstatKDE2D(). The construction is not as straight-forward, as iterating over all positions in the 2D-KJDE
509 and the calling jkqtpstatEvaluateKernelSum2D(), bt by reordering the loops it is significatyl faster (~20%)
510
511 \tparam InputItX standard iterator type of \a firstX and \a lastX.
512 \tparam InputItY standard iterator type of \a firstY and \a lastY.
513 \tparam FF functor
514 \param x where to evaluate the kernel sum, x-coordinate
515 \param y where to evaluate the kernel sum, y-coordinate
516 \param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
517 \param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
518 \param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
519 \param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
520 \param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
521 \param bandwidthX x-bandwidth used for the KDE
522 \param bandwidthY y-bandwidth used for the KDE
523
524*/
525template <class InputItX, class InputItY, class FF>
526inline int jkqtpstatStoreKernelSum2D(FF&& fStore, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, const std::function<double(double,double)>& kernel, double bandwidthX, double bandwidthY) {
527 auto itX=firstX;
528 auto itY=firstY;
529 int cnt=0;
530 for (; (itX!=lastX)&&(itY!=lastY); ++itX, ++itY) {
531 const double vx=jkqtp_todouble(*itX);
532 const double vy=jkqtp_todouble(*itY);
533 if (JKQTPIsOKFloat(vx) && JKQTPIsOKFloat(vy)) {
534 fStore(vx,vy, kernel,bandwidthX, bandwidthY);
535 cnt++;
536 }
537 }
538 return cnt;
539}
540
541/*! \brief calculate an autoranged 2-dimensional Kernel Density Estimation (KDE) from the given data range \a firstX / \a firstY ... \a lastY / \a lastY
542 \ingroup jkqtptools_math_statistics_2dkde
543
544 \tparam InputItX standard iterator type of \a firstX and \a lastX.
545 \tparam InputItY standard iterator type of \a firstY and \a lastY.
546 \tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
547 \param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
548 \param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
549 \param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
550 \param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
551 \param[out] histogramImgOut output iterator that receives counts of the histogram bins in row-major ordering
552 \param xmin position of the first histogram bin in x-direction
553 \param xmax position of the last histogram bin in x-direction
554 \param ymin position of the first histogram bin in y-direction
555 \param ymax position of the last histogram bin in y-direction
556 \param xbins number of bins in x-direction (i.e. width of the output histogram \a histogramImgOut )
557 \param ybins number of bins in y-direction (i.e. height of the output histogram \a histogramImgOut )
558 \param kernel the kernel function to use (e.g. jkqtpstatKernel2DGaussian() )
559 \param bandwidthX x-bandwidth used for the KDE
560 \param bandwidthY y-bandwidth used for the KDE
561
562 \see https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
563*/
564
565template <class InputItX, class InputItY, class OutputIt>
566inline void jkqtpstatKDE2D(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, OutputIt histogramImgOut, double xmin, double xmax, double ymin, double ymax, size_t xbins, size_t ybins, const std::function<double(double,double)>& kernel=std::function<double(double,double)>(&jkqtpstatKernel2DGaussian), double bandwidthX=1.0, double bandwidthY=1.0) {
567
568 const double binwx=fabs(xmax-xmin)/static_cast<double>(xbins);
569 const double binwy=fabs(ymax-ymin)/static_cast<double>(ybins);
570
571 {
572 auto itOut=histogramImgOut;
573 for (size_t by=0; by<ybins; by++) {
574 for (size_t bx=0; bx<xbins; bx++) {
575 *itOut=0;
576 ++itOut;
577 }
578 }
579 }
580
581 const double cnt=jkqtpstatStoreKernelSum2D([&](double vx, double vy, const std::function<double(double,double)>& kernel, double bandwidthX, double bandwidthY){
582 double y=ymin;
583 auto itOut=histogramImgOut;
584
585
586 for (size_t by=0; by<ybins; by++) {
587 double x=xmin;
588 for (size_t bx=0; bx<xbins; bx++) {
589 const double vvx=(x-vx)/bandwidthX;
590 const double vvy=(y-vy)/bandwidthY;
591 *itOut+=kernel(vvx,vvy)/sqrt(bandwidthX*bandwidthY);
592 //std::cout<<x<<","<<y<<","<<vv<<*itOut<<std::endl;
593 x+=binwx;
594 ++itOut;
595 }
596 y+=binwy;
597 }
598 }, firstX, lastX, firstY, lastY, kernel, bandwidthX, bandwidthY);
599
600
601 {
602 auto itOut=histogramImgOut;
603 for (size_t by=0; by<ybins; by++) {
604 for (size_t bx=0; bx<xbins; bx++) {
605 *itOut/=cnt;
606 ++itOut;
607 }
608 }
609 }
610}
611
612
613
614
615/*! \brief estimates a bandwidth for a 2-dimensional Kernel Density Estimator (KDE) of the given data \a first ... \a last using Scott's rule
616 \ingroup jkqtptools_math_statistics_2dkde
617
618 evaluates \f[ h = \hat{\sigma} n^{-1/(d+4)},\ \ \ \ \ d=2 \f]
619
620 \tparam InputIt standard iterator type of \a first and \a last.
621 \param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
622 \param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
623 \return the estimated bandwidth
624
625 \see https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation#Rule_of_thumb
626
627*/
628template <class InputIt>
629inline double jkqtpstatEstimateKDEBandwidth2D(InputIt first, InputIt last) {
630 size_t N=0;
631 const double sigma=jkqtpstatStdDev(first, last, nullptr, &N);
632 return sigma/pow(static_cast<double>(N), 1.0/(2.0+4.0));
633}
634
635
636
637#endif // JKQTPSTATKDE_H_INCLUDED
638
639
#define JKQTPSTATISTICS_SQRT_2PI
Definition jkqtpmathtools.h:60
constexpr double jkqtp_todouble(const T &d)
converts a boolean to a double, is used to convert boolean to double by JKQTPDatastore
Definition jkqtpmathtools.h:113
T jkqtp_sqr(const T &v)
returns the quare of the value v, i.e. v*v
Definition jkqtpmathtools.h:327
T jkqtp_cube(T x)
cube of a number
Definition jkqtpmathtools.h:357
#define JKQTPSTATISTICS_PI
Definition jkqtpmathtools.h:52
bool JKQTPIsOKFloat(T v)
check whether the dlotaing point number is OK (i.e. non-inf, non-NAN)
Definition jkqtpmathtools.h:496
double jkqtpstatKernel1DUniform(double t)
a 1D uniform kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:85
double jkqtpstatKernel1DCosine(double t)
a 1D cosine kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:127
double jkqtpstatKernel1DQuartic(double t)
a 1D quartic kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:102
double jkqtpstatKernel1DTricube(double t)
a 1D tricube kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:119
double jkqtpstatKernel1DTriweight(double t)
a 1D triweight kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:110
double jkqtpstatKernel1DCauchy(double t)
a 1D Cauchy kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:60
double jkqtpstatKernel1DEpanechnikov(double t)
a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:77
double jkqtpstatKernel1DGaussian(double t)
a 1D Gaussian kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:52
double jkqtpstatKernel1DTriangle(double t)
a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:93
double jkqtpstatKernel1DPicard(double t)
a 1D Picard kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:69
void jkqtpstatKDE1DAutoranged(InputIt first, InputIt last, OutputIt KDEXOut, OutputIt KDEYOut, int Nout=100, const std::function< double(double)> &kernel=std::function< double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false)
calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range first...
Definition jkqtpstatkde.h:255
double jkqtpstatEstimateKDEBandwidth(InputIt first, InputIt last)
estimates a bandwidth for a Kernel Density Estimator (KDE) of the given data first ....
Definition jkqtpstatkde.h:192
double jkqtpstatEvaluateKernelSum(double t, InputIt first, InputIt last, const std::function< double(double)> &kernel, double bandwidth)
evaluates the Kernel Density Estimator (KDE) at a given position
Definition jkqtpstatkde.h:215
void jkqtpstatKDE1D(InputIt first, InputIt last, BinsInputIt binsFirst, BinsInputIt binsLast, OutputIt KDEXOut, OutputIt KDEYOut, const std::function< double(double)> &kernel=std::function< double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false)
calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range first...
Definition jkqtpstatkde.h:368
double jkqtpstatKernel2DUniform(double tx, double ty)
a 2D Uniform kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:158
double jkqtpstatKernel2DGaussian(double tx, double ty)
a 2D Gaussian kernel function, e.g. for Kernel Density Estimation
Definition jkqtpstatkde.h:148
double jkqtpstatEvaluateKernelSum2D(double x, double y, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, const std::function< double(double, double)> &kernel, double bandwidthX, double bandwidthY)
evaluates the Kernel Density Estimator (KDE) at a given position
Definition jkqtpstatkde.h:480
int jkqtpstatStoreKernelSum2D(FF &&fStore, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, const std::function< double(double, double)> &kernel, double bandwidthX, double bandwidthY)
stores a part-result of the Kernel Density Estimator (KDE) at a given position
Definition jkqtpstatkde.h:526
void jkqtpstatKDE2D(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, OutputIt histogramImgOut, double xmin, double xmax, double ymin, double ymax, size_t xbins, size_t ybins, const std::function< double(double, double)> &kernel=std::function< double(double, double)>(&jkqtpstatKernel2DGaussian), double bandwidthX=1.0, double bandwidthY=1.0)
calculate an autoranged 2-dimensional Kernel Density Estimation (KDE) from the given data range first...
Definition jkqtpstatkde.h:566
double jkqtpstatEstimateKDEBandwidth2D(InputIt first, InputIt last)
estimates a bandwidth for a 2-dimensional Kernel Density Estimator (KDE) of the given data first ....
Definition jkqtpstatkde.h:629
double jkqtpstatStdDev(InputIt first, InputIt last, double *averageOut=nullptr, size_t *Noutput=nullptr)
calculates the standard deviation of a given data range first ... last
Definition jkqtpstatbasics.h:515