mirror of
https://github.com/subsurface/subsurface.git
synced 2025-02-19 22:16:15 +00:00
Plot proper confidence regions
I was coninced that that rather than doing an order of magnitude estimate of the confidence region it's better to have the correct concave shapes that indicate the 95% confidence level for the regression line. It also turned out that the previous expression was missing a factor of 1/sqrt(n). Signed-off-by: Robert C. Helling <helling@atdotde.de>
This commit is contained in:
parent
5775bd7b27
commit
d6712bc5ac
2 changed files with 38 additions and 36 deletions
|
@ -723,10 +723,10 @@ void StatsView::QuartileMarker::updatePosition()
|
||||||
x + quartileMarkerSize / 2.0, y);
|
x + quartileMarkerSize / 2.0, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
StatsView::RegressionLine::RegressionLine(double a, double b, double width, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis) :
|
StatsView::RegressionLine::RegressionLine(const struct regression_data reg, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis) :
|
||||||
item(createItemPtr<QGraphicsPolygonItem>(scene)),
|
item(createItemPtr<QGraphicsPolygonItem>(scene)),
|
||||||
xAxis(xAxis), yAxis(yAxis),
|
xAxis(xAxis), yAxis(yAxis),
|
||||||
a(a), b(b), width(width)
|
reg(reg)
|
||||||
{
|
{
|
||||||
item->setZValue(ZValues::chartFeatures);
|
item->setZValue(ZValues::chartFeatures);
|
||||||
item->setPen(Qt::NoPen);
|
item->setPen(Qt::NoPen);
|
||||||
|
@ -740,12 +740,14 @@ void StatsView::RegressionLine::updatePosition()
|
||||||
auto [minX, maxX] = xAxis->minMax();
|
auto [minX, maxX] = xAxis->minMax();
|
||||||
auto [minY, maxY] = yAxis->minMax();
|
auto [minY, maxY] = yAxis->minMax();
|
||||||
|
|
||||||
|
// Draw the confidence interval according to http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf p.5 with t*=2 for 95% confidence
|
||||||
QPolygonF poly;
|
QPolygonF poly;
|
||||||
poly << QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b + width))
|
for (double x = minX; x <= maxX + 1; x += (maxX - minX) / 100)
|
||||||
<< QPointF(xAxis->toScreen(maxX), yAxis->toScreen(a * maxX + b + width))
|
poly << QPointF(xAxis->toScreen(x),
|
||||||
<< QPointF(xAxis->toScreen(maxX), yAxis->toScreen(a * maxX + b - width))
|
yAxis->toScreen(reg.a * x + reg.b + 2.0 * sqrt(reg.res2 / (reg.n - 2) * (1.0 / reg.n + (x - reg.xavg) * (x - reg.xavg) / (reg.n - 1) * (reg.n -2) / reg.sx2))));
|
||||||
<< QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b - width))
|
for (double x = maxX; x >= minX - 1; x -= (maxX - minX) / 100)
|
||||||
<< QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b + width));
|
poly << QPointF(xAxis->toScreen(x),
|
||||||
|
yAxis->toScreen(reg.a * x + reg.b - 2.0 * sqrt(reg.res2 / (reg.n - 2) * (1.0 / reg.n + (x - reg.xavg) * (x - reg.xavg) / (reg.n - 1) * (reg.n -2) / reg.sx2))));
|
||||||
QRectF box(QPoint(xAxis->toScreen(minX), yAxis->toScreen(minY)), QPoint(xAxis->toScreen(maxX), yAxis->toScreen(maxY)));
|
QRectF box(QPoint(xAxis->toScreen(minX), yAxis->toScreen(minY)), QPoint(xAxis->toScreen(maxX), yAxis->toScreen(maxY)));
|
||||||
|
|
||||||
item->setPolygon(poly.intersected(box));
|
item->setPolygon(poly.intersected(box));
|
||||||
|
@ -780,15 +782,15 @@ void StatsView::addHistogramMarker(double pos, const QPen &pen, bool isHorizonta
|
||||||
histogramMarkers.emplace_back(pos, isHorizontal, pen, &scene, xAxis, yAxis);
|
histogramMarkers.emplace_back(pos, isHorizontal, pen, &scene, xAxis, yAxis);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StatsView::addLinearRegression(double a, double b, double res2, double r2, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis)
|
void StatsView::addLinearRegression(const struct regression_data reg, StatsAxis *xAxis, StatsAxis *yAxis)
|
||||||
{
|
{
|
||||||
QColor red = QColor(Qt::red);
|
QColor red = QColor(Qt::red);
|
||||||
red.setAlphaF(r2);
|
red.setAlphaF(reg.r2);
|
||||||
QPen pen(red);
|
QPen pen(red);
|
||||||
QBrush brush(red);
|
QBrush brush(red);
|
||||||
brush.setStyle(Qt::SolidPattern);
|
brush.setStyle(Qt::SolidPattern);
|
||||||
|
|
||||||
regressionLines.emplace_back(a, b, sqrt(res2), brush, &scene, xAxis, yAxis);
|
regressionLines.emplace_back(reg, brush, &scene, xAxis, yAxis);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Yikes, we get our data in different kinds of (bin, value) pairs.
|
// Yikes, we get our data in different kinds of (bin, value) pairs.
|
||||||
|
@ -1027,11 +1029,6 @@ static bool is_linear_regression(int sample_size, double cov, double sx2, double
|
||||||
return true; // can't happen, as we tested for sample_size above.
|
return true; // can't happen, as we tested for sample_size above.
|
||||||
}
|
}
|
||||||
|
|
||||||
struct regression_data {
|
|
||||||
double a,b;
|
|
||||||
double res2, r2;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Returns the coefficients a,b of the line y = ax + b
|
// Returns the coefficients a,b of the line y = ax + b
|
||||||
// as well as the variance of the residuals (averaged residual squared) as res2
|
// as well as the variance of the residuals (averaged residual squared) as res2
|
||||||
// and r^2 = 1.0 - variance of data / res2 which is the fraction of the variance of
|
// and r^2 = 1.0 - variance of data / res2 which is the fraction of the variance of
|
||||||
|
@ -1040,17 +1037,18 @@ struct regression_data {
|
||||||
|
|
||||||
static struct regression_data linear_regression(const std::vector<StatsScatterItem> &v)
|
static struct regression_data linear_regression(const std::vector<StatsScatterItem> &v)
|
||||||
{
|
{
|
||||||
if (v.size() < 2)
|
struct regression_data ret = { .a = NaN, .b = NaN, .res2 = 0.0, .r2 = 0.0, .sx2 = 0.0, .xavg = 0.0};
|
||||||
return { .a = NaN, .b = NaN, .res2 = 0.0, .r2 = 0.0};
|
ret.n = v.size();
|
||||||
|
if (ret.n < 2)
|
||||||
|
return ret;
|
||||||
// First, calculate the x and y average
|
// First, calculate the x and y average
|
||||||
double avg_x = 0.0, avg_y = 0.0;
|
double avg_x = 0.0, avg_y = 0.0;
|
||||||
for (auto [x, y, d]: v) {
|
for (auto [x, y, d]: v) {
|
||||||
avg_x += x;
|
avg_x += x;
|
||||||
avg_y += y;
|
avg_y += y;
|
||||||
}
|
}
|
||||||
avg_x /= (double)v.size();
|
avg_x /= ret.n;
|
||||||
avg_y /= (double)v.size();
|
avg_y /= ret.n;
|
||||||
|
|
||||||
double cov = 0.0, sx2 = 0.0, sy2 = 0.0;
|
double cov = 0.0, sx2 = 0.0, sy2 = 0.0;
|
||||||
for (auto [x, y, d]: v) {
|
for (auto [x, y, d]: v) {
|
||||||
|
@ -1062,15 +1060,16 @@ static struct regression_data linear_regression(const std::vector<StatsScatterIt
|
||||||
bool is_linear = is_linear_regression((int)v.size(), cov, sx2, sy2);
|
bool is_linear = is_linear_regression((int)v.size(), cov, sx2, sy2);
|
||||||
|
|
||||||
if (fabs(sx2) < 1e-10 || !is_linear) // If t is not statistically significant, do not plot the regression line.
|
if (fabs(sx2) < 1e-10 || !is_linear) // If t is not statistically significant, do not plot the regression line.
|
||||||
return { .a = NaN, .b = NaN, .res2 = 0.0, .r2 = 0.0};
|
return ret;
|
||||||
double a = cov / sx2;
|
ret.xavg = avg_x;
|
||||||
double b = avg_y - a * avg_x;
|
ret.sx2 = sx2;
|
||||||
|
ret.a = cov / sx2;
|
||||||
|
ret.b = avg_y - ret.a * avg_x;
|
||||||
|
|
||||||
double res2 = 0.0;
|
|
||||||
for (auto [x, y, d]: v)
|
for (auto [x, y, d]: v)
|
||||||
res2 += (y - a * x - b) * (y - a * x - b);
|
ret.res2 += (y - ret.a * x - ret.b) * (y - ret.a * x - ret.b);
|
||||||
double r2 = sy2 > 0.0 ? 1.0 - res2 / sy2 : 1.0;
|
ret.r2 = sy2 > 0.0 ? 1.0 - ret.res2 / sy2 : 1.0;
|
||||||
return { .a = a, .b = b, .res2 = res2 / v.size(), .r2 = r2 };
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable)
|
void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable)
|
||||||
|
@ -1101,9 +1100,6 @@ void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariabl
|
||||||
|
|
||||||
// y = ax + b
|
// y = ax + b
|
||||||
struct regression_data reg = linear_regression(points);
|
struct regression_data reg = linear_regression(points);
|
||||||
if (!std::isnan(reg.a)) {
|
if (!std::isnan(reg.a))
|
||||||
auto [minx, maxx] = axisX->minMax();
|
addLinearRegression(reg, xAxis, yAxis);
|
||||||
auto [miny, maxy] = axisY->minMax();
|
|
||||||
addLinearRegression(reg.a, reg.b, reg.res2, reg.r2, minx, maxx, miny, maxy, xAxis, yAxis);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,13 @@ class QSGTexture;
|
||||||
enum class ChartSubType : int;
|
enum class ChartSubType : int;
|
||||||
enum class StatsOperation : int;
|
enum class StatsOperation : int;
|
||||||
|
|
||||||
|
struct regression_data {
|
||||||
|
double a,b;
|
||||||
|
double res2, r2, sx2, xavg;
|
||||||
|
int n;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class StatsView : public QQuickItem {
|
class StatsView : public QQuickItem {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
|
@ -120,10 +127,9 @@ private:
|
||||||
struct RegressionLine {
|
struct RegressionLine {
|
||||||
std::unique_ptr<QGraphicsPolygonItem> item;
|
std::unique_ptr<QGraphicsPolygonItem> item;
|
||||||
StatsAxis *xAxis, *yAxis;
|
StatsAxis *xAxis, *yAxis;
|
||||||
double a, b; // y = ax + b
|
const struct regression_data reg;
|
||||||
double width;
|
|
||||||
void updatePosition();
|
void updatePosition();
|
||||||
RegressionLine(double a, double b, double width, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
|
RegressionLine(const struct regression_data reg, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
|
||||||
};
|
};
|
||||||
|
|
||||||
// A line marking median or mean in histograms
|
// A line marking median or mean in histograms
|
||||||
|
@ -136,7 +142,7 @@ private:
|
||||||
HistogramMarker(double val, bool horizontal, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
|
HistogramMarker(double val, bool horizontal, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
|
||||||
};
|
};
|
||||||
|
|
||||||
void addLinearRegression(double a, double b, double res2, double r2, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis);
|
void addLinearRegression(const struct regression_data reg, StatsAxis *xAxis, StatsAxis *yAxis);
|
||||||
void addHistogramMarker(double pos, const QPen &pen, bool isHorizontal, StatsAxis *xAxis, StatsAxis *yAxis);
|
void addHistogramMarker(double pos, const QPen &pen, bool isHorizontal, StatsAxis *xAxis, StatsAxis *yAxis);
|
||||||
|
|
||||||
StatsState state;
|
StatsState state;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue