statistics: implement StatsView

The StatsView shows the chart described by the StatsState structure.
It is based on a QML ChartView. This should make it possible to
easily port to mobile. It does not include any of the UI around
the chart, viz. the variable and chart selection, etc.

The code checking for the statistical significance of the regression
line was written by Willem.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
Signed-off-by: willemferguson <willemferguson@zoology.up.ac.za>
This commit is contained in:
Berthold Stoeger 2021-01-01 22:43:21 +01:00 committed by Dirk Hohndel
parent a034014a6a
commit 995100a540
6 changed files with 1139 additions and 1 deletions

View file

@ -301,7 +301,7 @@ endif()
#set up the subsurface_link_libraries variable
set(SUBSURFACE_LINK_LIBRARIES ${SUBSURFACE_LINK_LIBRARIES} ${LIBDIVECOMPUTER_LIBRARIES} ${LIBGIT2_LIBRARIES} ${LIBUSB_LIBRARIES} ${LIBMTP_LIBRARIES})
if (NOT SUBSURFACE_TARGET_EXECUTABLE MATCHES "DownloaderExecutable")
qt5_add_resources(SUBSURFACE_RESOURCES subsurface.qrc map-widget/qml/map-widget.qrc)
qt5_add_resources(SUBSURFACE_RESOURCES subsurface.qrc map-widget/qml/map-widget.qrc stats/qml/statsview.qrc)
endif()
# hack to build successfully on LGTM
@ -391,6 +391,7 @@ elseif (SUBSURFACE_TARGET_EXECUTABLE MATCHES "DesktopExecutable")
subsurface_models_desktop
subsurface_commands
subsurface_corelib
subsurface_stats
${SUBSURFACE_LINK_LIBRARIES}
)
add_dependencies(subsurface_desktop_preferences subsurface_generated_ui)

View file

@ -27,6 +27,8 @@ set(SUBSURFACE_STATS_SRCS
statsstate.cpp
statsvariables.h
statsvariables.cpp
statsview.h
statsview.cpp
zvalues.h
)

8
stats/qml/statsview.qml Normal file
View file

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
import QtQuick 2.0
import QtCharts 2.0
ChartView {
antialiasing: true
localizeNumbers: true
}

5
stats/qml/statsview.qrc Normal file
View file

@ -0,0 +1,5 @@
<RCC>
<qresource prefix="/qml">
<file>statsview.qml</file>
</qresource>
</RCC>

984
stats/statsview.cpp Normal file
View file

@ -0,0 +1,984 @@
// SPDX-License-Identifier: GPL-2.0
#include "statsview.h"
#include "barseries.h"
#include "boxseries.h"
#include "legend.h"
#include "pieseries.h"
#include "scatterseries.h"
#include "statsaxis.h"
#include "statsstate.h"
#include "statstranslations.h"
#include "statsvariables.h"
#include "zvalues.h"
#include "core/divefilter.h"
#include "core/subsurface-qt/divelistnotifier.h"
#include <cmath>
#include <QQuickItem>
#include <QAbstractSeries>
#include <QChart>
#include <QGraphicsSceneHoverEvent>
#include <QLocale>
// Constants that control the graph layouts
static const QColor quartileMarkerColor(Qt::red);
static const double quartileMarkerSize = 15;
static const QUrl urlStatsView = QUrl(QStringLiteral("qrc:/qml/statsview.qml"));
// We use QtQuick's ChartView so that we can show the statistics on mobile.
// However, accessing the ChartView from C++ is maliciously cumbersome and
// the full QChart interface is not exported. Fortunately, the interface
// leaks the QChart object: We can create a dummy-series and access the chart
// object via the chart() accessor function. By creating a "PieSeries", the
// ChartView does not automatically add axes.
static QtCharts::QChart *getChart(QQuickItem *item)
{
QtCharts::QAbstractSeries *abstract_series;
if (!item)
return nullptr;
if (!QMetaObject::invokeMethod(item, "createSeries", Qt::AutoConnection,
Q_RETURN_ARG(QtCharts::QAbstractSeries *, abstract_series),
Q_ARG(int, QtCharts::QAbstractSeries::SeriesTypePie),
Q_ARG(QString, QString()))) {
qWarning("Couldn't call createSeries()");
return nullptr;
}
QtCharts::QChart *res = abstract_series->chart();
res->removeSeries(abstract_series);
delete abstract_series;
return res;
}
bool StatsView::EventFilter::eventFilter(QObject *o, QEvent *event)
{
if (event->type() == QEvent::GraphicsSceneHoverMove) {
QGraphicsSceneHoverEvent *hover = static_cast<QGraphicsSceneHoverEvent *>(event);
view->hover(hover->pos());
return true;
}
return QObject::eventFilter(o, event);
}
StatsView::StatsView(QWidget *parent) : QQuickWidget(parent),
highlightedSeries(nullptr),
eventFilter(this)
{
setResizeMode(QQuickWidget::SizeRootObjectToView);
setSource(urlStatsView);
chart = getChart(rootObject());
connect(chart, &QtCharts::QChart::plotAreaChanged, this, &StatsView::plotAreaChanged);
connect(&diveListNotifier, &DiveListNotifier::numShownChanged, this, &StatsView::replotIfVisible);
chart->installEventFilter(&eventFilter);
chart->setAcceptHoverEvents(true);
chart->legend()->setVisible(false);
}
StatsView::~StatsView()
{
}
void StatsView::plotAreaChanged(const QRectF &)
{
for (auto &axis: axes)
axis->updateLabels(chart);
for (auto &series: series)
series->updatePositions();
for (QuartileMarker &marker: quartileMarkers)
marker.updatePosition();
for (LineMarker &marker: lineMarkers)
marker.updatePosition();
if (legend)
legend->resize();
}
void StatsView::replotIfVisible()
{
if (isVisible())
plot(state);
}
void StatsView::hover(QPointF pos)
{
for (auto &series: series) {
if (series->hover(pos)) {
if (series.get() != highlightedSeries) {
if (highlightedSeries)
highlightedSeries->unhighlight();
highlightedSeries = series.get();
}
return;
}
}
// No series was highlighted -> unhighlight any previously highlighted series.
if (highlightedSeries) {
highlightedSeries->unhighlight();
highlightedSeries = nullptr;
}
}
template <typename T, class... Args>
T *StatsView::createSeries(Args&&... args)
{
StatsAxis *xAxis = axes.size() >= 2 ? axes[0].get() : nullptr;
StatsAxis *yAxis = axes.size() >= 2 ? axes[1].get() : nullptr;
T *res = new T(chart, xAxis, yAxis, std::forward<Args>(args)...);
series.emplace_back(res);
series.back()->updatePositions();
return res;
}
void StatsView::setTitle(const QString &s)
{
chart->setTitle(s);
}
template <typename T, class... Args>
T *StatsView::createAxis(const QString &title, Args&&... args)
{
T *res = new T(std::forward<Args>(args)...);
axes.emplace_back(res);
axes.back()->updateLabels(chart);
axes.back()->qaxis()->setTitleText(title);
return res;
}
void StatsView::addAxes(StatsAxis *x, StatsAxis *y)
{
chart->addAxis(x->qaxis(), Qt::AlignBottom);
chart->addAxis(y->qaxis(), Qt::AlignLeft);
}
void StatsView::reset()
{
if (!chart)
return;
highlightedSeries = nullptr;
legend.reset();
series.clear();
quartileMarkers.clear();
lineMarkers.clear();
chart->removeAllSeries();
axes.clear();
}
void StatsView::plot(const StatsState &stateIn)
{
state = stateIn;
if (!chart || !state.var1)
return;
reset();
const std::vector<dive *> dives = DiveFilter::instance()->visibleDives();
switch (state.type) {
case ChartType::DiscreteBar:
return plotBarChart(dives, state.subtype, state.var1, state.var1Binner, state.var2,
state.var2Binner, state.labels, state.legend);
case ChartType::DiscreteValue:
return plotValueChart(dives, state.subtype, state.var1, state.var1Binner, state.var2,
state.var2Operation, state.labels);
case ChartType::DiscreteCount:
return plotDiscreteCountChart(dives, state.subtype, state.var1, state.var1Binner, state.labels);
case ChartType::Pie:
return plotPieChart(dives, state.var1, state.var1Binner, state.labels, state.legend);
case ChartType::DiscreteBox:
return plotDiscreteBoxChart(dives, state.var1, state.var1Binner, state.var2);
case ChartType::DiscreteScatter:
return plotDiscreteScatter(dives, state.var1, state.var1Binner, state.var2, state.quartiles);
case ChartType::HistogramCount:
return plotHistogramCountChart(dives, state.subtype, state.var1, state.var1Binner,
state.labels, state.median, state.mean);
case ChartType::HistogramValue:
return plotHistogramValueChart(dives, state.subtype, state.var1, state.var1Binner, state.var2,
state.var2Operation, state.labels);
case ChartType::HistogramStacked:
return plotHistogramStackedChart(dives, state.subtype, state.var1, state.var1Binner,
state.var2, state.var2Binner, state.labels, state.legend);
case ChartType::HistogramBox:
return plotHistogramBoxChart(dives, state.var1, state.var1Binner, state.var2);
case ChartType::ScatterPlot:
return plotScatter(dives, state.var1, state.var2);
default:
qWarning("Unknown chart type: %d", (int)state.type);
return;
}
}
template<typename T>
CategoryAxis *StatsView::createCategoryAxis(const QString &name, const StatsBinner &binner,
const std::vector<T> &bins, bool isHorizontal)
{
std::vector<QString> labels;
labels.reserve(bins.size());
for (const auto &[bin, dummy]: bins)
labels.push_back(binner.format(*bin));
return createAxis<CategoryAxis>(name, labels, isHorizontal);
}
CountAxis *StatsView::createCountAxis(int maxVal, bool isHorizontal)
{
return createAxis<CountAxis>(StatsTranslations::tr("No. dives"), maxVal, isHorizontal);
}
// For "two-dimensionally" binned plots (eg. stacked bar or grouped bar):
// Counts for each bin on the independent variable, including the total counts for that bin.
struct BinCounts {
StatsBinPtr bin;
std::vector<int> counts;
int total;
};
// The problem with bar plots is that for different category
// bins, we might get different value bins. So we have to keep track
// of our counts and adjust accordingly. That's a bit annoying.
// Perhaps we should determine the bins of all dives first and then
// query the counts for precisely those bins?
struct BarPlotData {
std::vector<BinCounts> hbin_counts; // For each category bin the counts for all value bins
std::vector<StatsBinPtr> vbins;
std::vector<QString> vbinNames;
int maxCount; // Highest count of any bin-combination
int maxCategoryCount; // Highest count of any category bin
// Attention: categoryBin argument will be consumed!
BarPlotData(std::vector<StatsBinDives> &categoryBins, const StatsBinner &valuebinner);
};
BarPlotData::BarPlotData(std::vector<StatsBinDives> &categoryBins, const StatsBinner &valueBinner) :
maxCount(0), maxCategoryCount(0)
{
for (auto &[bin, dives]: categoryBins) {
// This moves the bin - the original pointer is invalidated
hbin_counts.push_back({ std::move(bin), std::vector<int>(vbins.size(), 0), 0 });
for (auto &[vbin, count]: valueBinner.count_dives(dives, false)) {
// Note: we assume that the bins are sorted!
auto it = std::lower_bound(vbins.begin(), vbins.end(), vbin,
[] (const StatsBinPtr &p, const StatsBinPtr &bin)
{ return *p < *bin; });
ssize_t pos = it - vbins.begin();
if (it == vbins.end() || **it != *vbin) {
// Add a new value bin.
// Attn: this invalidates "vbin", which must not be used henceforth!
vbins.insert(it, std::move(vbin));
// Fix the old arrays
for (auto &[bin, v, total]: hbin_counts)
v.insert(v.begin() + pos, 0);
}
hbin_counts.back().counts[pos] = count;
hbin_counts.back().total += count;
if (count > maxCount)
maxCount = count;
}
maxCategoryCount = std::max(maxCategoryCount, hbin_counts.back().total);
}
vbinNames.reserve(vbins.size());
for (const auto &vbin: vbins)
vbinNames.push_back(valueBinner.formatWithUnit(*vbin));
}
// Formats "x (y%)" as either a single or two strings for horizontal and non-horizontal cases, respectively.
static std::vector<QString> makePercentageLabels(int count, int total, bool isHorizontal)
{
double percentage = count * 100.0 / total;
QString countString = QString("%L1").arg(count);
QString percentageString = QString("%L1%").arg(percentage, 0, 'f', 1);
if (isHorizontal)
return { QString("%1 %2").arg(countString, percentageString) };
else
return { countString, percentageString };
}
// From a list of counts, make (count, label) pairs, where the label
// formats the total number and the percentage of dives.
static std::vector<std::pair<int, std::vector<QString>>> makeCountLabels(const std::vector<int> &counts, int total,
bool labels, bool isHorizontal)
{
std::vector<std::pair<int, std::vector<QString>>> count_labels;
count_labels.reserve(counts.size());
for (int count: counts) {
std::vector<QString> label = labels ? makePercentageLabels(count, total, isHorizontal)
: std::vector<QString>();
count_labels.push_back(std::make_pair(count, label));
}
return count_labels;
}
void StatsView::plotBarChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool showLegend)
{
if (!categoryBinner || !valueBinner)
return;
setTitle(valueVariable->nameWithBinnerUnit(*valueBinner));
std::vector<StatsBinDives> categoryBins = categoryBinner->bin_dives(dives, false);
bool isStacked = subType == ChartSubType::VerticalStacked || subType == ChartSubType::HorizontalStacked;
bool isHorizontal = subType == ChartSubType::HorizontalGrouped || subType == ChartSubType::HorizontalStacked;
// Construct the histogram axis now, because the pointers to the bins
// will be moved away when constructing BarPlotData below.
CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
BarPlotData data(categoryBins, *valueBinner);
int maxVal = isStacked ? data.maxCategoryCount : data.maxCount;
CountAxis *valAxis = createCountAxis(maxVal, isHorizontal);
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
// Paint legend first, because the bin-names will be moved away from.
if (showLegend)
legend = std::make_unique<Legend>(chart, data.vbinNames);
std::vector<BarSeries::MultiItem> items;
items.reserve(data.hbin_counts.size());
double pos = 0.0;
for (auto &[hbin, counts, total]: data.hbin_counts) {
items.push_back({ pos - 0.5, pos + 0.5, makeCountLabels(counts, total, labels, isHorizontal),
categoryBinner->formatWithUnit(*hbin) });
pos += 1.0;
}
createSeries<BarSeries>(isHorizontal, isStacked, categoryVariable->name(), valueVariable, std::move(data.vbinNames), items);
}
const double NaN = std::numeric_limits<double>::quiet_NaN();
// These templates are used to extract min and max y-values of various lists.
// A bit too convoluted for my tastes - can we make that simpler?
static std::pair<double, double> getMinMaxValueBase(const std::vector<StatsValue> &values)
{
// Attention: this supposes that the list is sorted!
return values.empty() ? std::make_pair(NaN, NaN) : std::make_pair(values.front().v, values.back().v);
}
static std::pair<double, double> getMinMaxValueBase(double v)
{
return { v, v };
}
static std::pair<double, double> getMinMaxValueBase(const StatsQuartiles &q)
{
return { q.min, q.max };
}
static std::pair<double, double> getMinMaxValueBase(const StatsScatterItem &s)
{
return { s.y, s.y };
}
template <typename T1, typename T2>
static std::pair<double, double> getMinMaxValueBase(const std::pair<T1, T2> &p)
{
return getMinMaxValueBase(p.second);
}
template <typename T>
static std::pair<double, double> getMinMaxValueBase(const StatsBinValue<T> &v)
{
return getMinMaxValueBase(v.value);
}
template <typename T>
static void updateMinMax(double &min, double &max, bool &found, const T &v)
{
const auto [mi, ma] = getMinMaxValueBase(v);
if (!std::isnan(mi) && mi < min)
min = mi;
if (!std::isnan(ma) && ma > max)
max = ma;
if (!std::isnan(mi) || !std::isnan(ma))
found = true;
}
template <typename T>
static std::pair<double, double> getMinMaxValue(const std::vector<T> &values)
{
double min = 1e14, max = 0.0;
bool found = false;
for (const T &v: values)
updateMinMax(min, max, found, v);
return found ? std::make_pair(min, max) : std::make_pair(0.0, 0.0);
}
static std::pair<double, double> getMinMaxValue(const std::vector<StatsBinOp> &bins, StatsOperation op)
{
double min = 1e14, max = 0.0;
bool found = false;
for (auto &[bin, res]: bins) {
if (!res.isValid())
continue;
updateMinMax(min, max, found, res.get(op));
}
return found ? std::make_pair(min, max) : std::make_pair(0.0, 0.0);
}
void StatsView::plotValueChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, StatsOperation valueAxisOperation,
bool labels)
{
if (!categoryBinner)
return;
setTitle(QStringLiteral("%1 (%2)").arg(valueVariable->name(), StatsVariable::operationName(valueAxisOperation)));
std::vector<StatsBinOp> categoryBins = valueVariable->bin_operations(*categoryBinner, dives, false);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
bool isHorizontal = subType == ChartSubType::Horizontal;
const auto [minValue, maxValue] = getMinMaxValue(categoryBins, valueAxisOperation);
int decimals = valueVariable->decimals();
CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
ValueAxis *valAxis = createAxis<ValueAxis>(valueVariable->nameWithUnit(),
0.0, maxValue, valueVariable->decimals(), isHorizontal);
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
std::vector<BarSeries::ValueItem> items;
items.reserve(categoryBins.size());
double pos = 0.0;
QString unit = valueVariable->unitSymbol();
for (auto &[bin, res]: categoryBins) {
if (res.isValid()) {
double height = res.get(valueAxisOperation);
QString value = QString("%L1").arg(height, 0, 'f', decimals);
std::vector<QString> label = labels ? std::vector<QString> { value }
: std::vector<QString>();
items.push_back({ pos - 0.5, pos + 0.5, height, label,
categoryBinner->formatWithUnit(*bin), res });
}
pos += 1.0;
}
createSeries<BarSeries>(isHorizontal, categoryVariable->name(), valueVariable, items);
}
static int getTotalCount(const std::vector<StatsBinCount> &bins)
{
int total = 0;
for (const auto &[bin, count]: bins)
total += count;
return total;
}
template<typename T>
static int getMaxCount(const std::vector<T> &bins)
{
int res = 0;
for (auto const &[dummy, val]: bins) {
if (val > res)
res = val;
}
return res;
}
void StatsView::plotDiscreteCountChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
bool labels)
{
if (!categoryBinner)
return;
setTitle(categoryVariable->nameWithBinnerUnit(*categoryBinner));
std::vector<StatsBinCount> categoryBins = categoryBinner->count_dives(dives, false);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
int total = getTotalCount(categoryBins);
bool isHorizontal = subType != ChartSubType::Vertical;
CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
int maxCount = getMaxCount(categoryBins);
CountAxis *valAxis = createCountAxis(maxCount, isHorizontal);
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
std::vector<BarSeries::CountItem> items;
items.reserve(categoryBins.size());
double pos = 0.0;
for (auto const &[bin, count]: categoryBins) {
std::vector<QString> label = labels ? makePercentageLabels(count, total, isHorizontal)
: std::vector<QString>();
items.push_back({ pos - 0.5, pos + 0.5, count, label,
categoryBinner->formatWithUnit(*bin), total });
pos += 1.0;
}
createSeries<BarSeries>(isHorizontal, categoryVariable->name(), items);
}
void StatsView::plotPieChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
bool labels, bool showLegend)
{
if (!categoryBinner)
return;
setTitle(categoryVariable->nameWithBinnerUnit(*categoryBinner));
std::vector<StatsBinCount> categoryBins = categoryBinner->count_dives(dives, false);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
std::vector<std::pair<QString, int>> data;
data.reserve(categoryBins.size());
for (auto const &[bin, count]: categoryBins)
data.emplace_back(categoryBinner->formatWithUnit(*bin), count);
bool keepOrder = categoryVariable->type() != StatsVariable::Type::Discrete;
PieSeries *series = createSeries<PieSeries>(categoryVariable->name(), data, keepOrder, labels);
if (showLegend)
legend = std::make_unique<Legend>(chart, series->binNames());
}
void StatsView::plotDiscreteBoxChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable)
{
if (!categoryBinner)
return;
setTitle(valueVariable->name());
std::vector<StatsBinQuartiles> categoryBins = valueVariable->bin_quartiles(*categoryBinner, dives, false);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, true);
auto [minY, maxY] = getMinMaxValue(categoryBins);
ValueAxis *valueAxis = createAxis<ValueAxis>(valueVariable->nameWithUnit(),
minY, maxY, valueVariable->decimals(), false);
addAxes(catAxis, valueAxis);
BoxSeries *series = createSeries<BoxSeries>(valueVariable->name(), valueVariable->unitSymbol(), valueVariable->decimals());
double pos = 0.0;
for (auto &[bin, q]: categoryBins) {
if (q.isValid())
series->append(pos - 0.5, pos + 0.5, q, categoryBinner->formatWithUnit(*bin));
pos += 1.0;
}
}
void StatsView::plotDiscreteScatter(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, bool quartiles)
{
if (!categoryBinner)
return;
setTitle(valueVariable->name());
std::vector<StatsBinValues> categoryBins = valueVariable->bin_values(*categoryBinner, dives, false);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, true);
auto [minValue, maxValue] = getMinMaxValue(categoryBins);
ValueAxis *valAxis = createAxis<ValueAxis>(valueVariable->nameWithUnit(),
minValue, maxValue, valueVariable->decimals(), false);
addAxes(catAxis, valAxis);
ScatterSeries *series = createSeries<ScatterSeries>(*categoryVariable, *valueVariable);
double x = 0.0;
for (const auto &[bin, array]: categoryBins) {
for (auto [v, d]: array)
series->append(d, x, v);
if (quartiles) {
StatsQuartiles quartiles = StatsVariable::quartiles(array);
if (quartiles.isValid()) {
quartileMarkers.emplace_back(x, quartiles.q1, series);
quartileMarkers.emplace_back(x, quartiles.q2, series);
quartileMarkers.emplace_back(x, quartiles.q3, series);
}
}
x += 1.0;
}
}
StatsView::QuartileMarker::QuartileMarker(double pos, double value, QtCharts::QAbstractSeries *series) :
item(new QGraphicsLineItem(series->chart())),
series(series),
pos(pos),
value(value)
{
item->setZValue(ZValues::chartFeatures);
item->setPen(QPen(quartileMarkerColor, 2.0));
updatePosition();
}
void StatsView::QuartileMarker::updatePosition()
{
QtCharts::QChart *chart = series->chart();
QPointF center = chart->mapToPosition(QPointF(pos, value), series);
item->setLine(center.x() - quartileMarkerSize / 2.0, center.y(),
center.x() + quartileMarkerSize / 2.0, center.y());
}
StatsView::LineMarker::LineMarker(QPointF from, QPointF to, QPen pen, QtCharts::QAbstractSeries *series) :
item(new QGraphicsLineItem(series->chart())),
series(series), from(from), to(to)
{
item->setZValue(ZValues::chartFeatures);
item->setPen(pen);
updatePosition();
}
void StatsView::LineMarker::updatePosition()
{
QtCharts::QChart *chart = series->chart();
item->setLine(QLineF(chart->mapToPosition(from, series),
chart->mapToPosition(to, series)));
}
void StatsView::addLinearRegression(double a, double b, double minX, double maxX, QtCharts::QAbstractSeries *series)
{
lineMarkers.emplace_back(QPointF(minX, a * minX + b), QPointF(maxX, a * maxX + b), QPen(Qt::red), series);
}
void StatsView::addHistogramMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal, QtCharts::QAbstractSeries *series)
{
QPointF from = isHorizontal ? QPointF(low, pos) : QPointF(pos, low);
QPointF to = isHorizontal ? QPointF(high, pos) : QPointF(pos, high);
lineMarkers.emplace_back(from, to, pen, series);
}
// Yikes, we get our data in different kinds of (bin, value) pairs.
// To create a category axis from this, we have to templatify the function.
template<typename T>
HistogramAxis *StatsView::createHistogramAxis(const QString &name, const StatsBinner &binner,
const std::vector<T> &bins, bool isHorizontal)
{
std::vector<HistogramAxisEntry> labels;
for (auto const &[bin, dummy]: bins) {
QString label = binner.formatLowerBound(*bin);
double lowerBound = binner.lowerBoundToFloat(*bin);
bool prefer = binner.preferBin(*bin);
labels.push_back({ label, lowerBound, prefer });
}
const StatsBin &lastBin = *bins.back().bin;
QString lastLabel = binner.formatUpperBound(lastBin);
double upperBound = binner.upperBoundToFloat(lastBin);
labels.push_back({ lastLabel, upperBound, false });
return createAxis<HistogramAxis>(name, std::move(labels), isHorizontal);
}
void StatsView::plotHistogramCountChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
bool labels, bool showMedian, bool showMean)
{
if (!categoryBinner)
return;
setTitle(categoryVariable->name());
std::vector<StatsBinCount> categoryBins = categoryBinner->count_dives(dives, true);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
bool isHorizontal = subType == ChartSubType::Horizontal;
HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
int maxCategoryCount = getMaxCount(categoryBins);
int total = getTotalCount(categoryBins);
StatsAxis *valAxis = createCountAxis(maxCategoryCount, isHorizontal);
double chartHeight = valAxis->minMax().second;
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
std::vector<BarSeries::CountItem> items;
items.reserve(categoryBins.size());
for (auto const &[bin, count]: categoryBins) {
double lowerBound = categoryBinner->lowerBoundToFloat(*bin);
double upperBound = categoryBinner->upperBoundToFloat(*bin);
std::vector<QString> label = labels ? makePercentageLabels(count, total, isHorizontal)
: std::vector<QString>();
items.push_back({ lowerBound, upperBound, count, label,
categoryBinner->formatWithUnit(*bin), total });
}
BarSeries *series = createSeries<BarSeries>(isHorizontal, categoryVariable->name(), items);
if (categoryVariable->type() == StatsVariable::Type::Numeric) {
if (showMean) {
double mean = categoryVariable->mean(dives);
QPen meanPen(Qt::green);
meanPen.setWidth(2);
if (!std::isnan(mean))
addHistogramMarker(mean, 0.0, chartHeight, meanPen, isHorizontal, series);
}
if (showMedian) {
double median = categoryVariable->quartiles(dives).q2;
QPen medianPen(Qt::red);
medianPen.setWidth(2);
if (!std::isnan(median))
addHistogramMarker(median, 0.0, chartHeight, medianPen, isHorizontal, series);
}
}
}
void StatsView::plotHistogramValueChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, StatsOperation valueAxisOperation,
bool labels)
{
if (!categoryBinner)
return;
setTitle(QStringLiteral("%1 (%2)").arg(valueVariable->name(), StatsVariable::operationName(valueAxisOperation)));
std::vector<StatsBinOp> categoryBins = valueVariable->bin_operations(*categoryBinner, dives, true);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
bool isHorizontal = subType == ChartSubType::Horizontal;
HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
const auto [minValue, maxValue] = getMinMaxValue(categoryBins, valueAxisOperation);
int decimals = valueVariable->decimals();
ValueAxis *valAxis = createAxis<ValueAxis>(valueVariable->nameWithUnit(),
0.0, maxValue, decimals, isHorizontal);
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
std::vector<BarSeries::ValueItem> items;
items.reserve(categoryBins.size());
QString unit = valueVariable->unitSymbol();
for (auto const &[bin, res]: categoryBins) {
if (!res.isValid())
continue;
double height = res.get(valueAxisOperation);
double lowerBound = categoryBinner->lowerBoundToFloat(*bin);
double upperBound = categoryBinner->upperBoundToFloat(*bin);
QString value = QString("%L1").arg(height, 0, 'f', decimals);
std::vector<QString> label = labels ? std::vector<QString> { value }
: std::vector<QString>();
items.push_back({ lowerBound, upperBound, height, label,
categoryBinner->formatWithUnit(*bin), res });
}
createSeries<BarSeries>(isHorizontal, categoryVariable->name(), valueVariable, items);
}
void StatsView::plotHistogramStackedChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool showLegend)
{
if (!categoryBinner || !valueBinner)
return;
setTitle(valueVariable->nameWithBinnerUnit(*valueBinner));
std::vector<StatsBinDives> categoryBins = categoryBinner->bin_dives(dives, true);
// Construct the histogram axis now, because the pointers to the bins
// will be moved away when constructing BarPlotData below.
bool isHorizontal = subType == ChartSubType::HorizontalStacked;
HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, !isHorizontal);
BarPlotData data(categoryBins, *valueBinner);
if (showLegend)
legend = std::make_unique<Legend>(chart, data.vbinNames);
CountAxis *valAxis = createCountAxis(data.maxCategoryCount, isHorizontal);
if (isHorizontal)
addAxes(valAxis, catAxis);
else
addAxes(catAxis, valAxis);
std::vector<BarSeries::MultiItem> items;
items.reserve(data.hbin_counts.size());
for (auto &[hbin, counts, total]: data.hbin_counts) {
double lowerBound = categoryBinner->lowerBoundToFloat(*hbin);
double upperBound = categoryBinner->upperBoundToFloat(*hbin);
items.push_back({ lowerBound, upperBound, makeCountLabels(counts, total, labels, isHorizontal),
categoryBinner->formatWithUnit(*hbin) });
}
createSeries<BarSeries>(isHorizontal, true, categoryVariable->name(), valueVariable, std::move(data.vbinNames), items);
}
void StatsView::plotHistogramBoxChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable)
{
if (!categoryBinner)
return;
setTitle(valueVariable->name());
std::vector<StatsBinQuartiles> categoryBins = valueVariable->bin_quartiles(*categoryBinner, dives, true);
// If there is nothing to display, quit
if (categoryBins.empty())
return;
HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner),
*categoryBinner, categoryBins, true);
auto [minY, maxY] = getMinMaxValue(categoryBins);
ValueAxis *valueAxis = createAxis<ValueAxis>(valueVariable->nameWithUnit(),
minY, maxY, valueVariable->decimals(), false);
addAxes(catAxis, valueAxis);
BoxSeries *series = createSeries<BoxSeries>(valueVariable->name(), valueVariable->unitSymbol(), valueVariable->decimals());
for (auto &[bin, q]: categoryBins) {
if (!q.isValid())
continue;
double lowerBound = categoryBinner->lowerBoundToFloat(*bin);
double upperBound = categoryBinner->upperBoundToFloat(*bin);
series->append(lowerBound, upperBound, q, categoryBinner->formatWithUnit(*bin));
}
}
static bool is_linear_regression(int sample_size, double cov, double sx2, double sy2)
{
// One point never, two points always form a line
if (sample_size < 2)
return false;
if (sample_size <= 2)
return true;
const double tval[] = { 12.709, 4.303, 3.182, 2.776, 2.571, 2.447, 2.201, 2.120, 2.080, 2.056, 2.021, 1.960, 1.960 };
const int t_df[] = { 1, 2, 3, 4, 5, 6, 11, 16, 21, 26, 40, 100, 100000 };
int df = sample_size - 2; // Following is the one-tailed t-value at p < 0.05 and [sample_size - 2] degrees of freedom for the dive data:
double t = (cov / sx2) / sqrt(((sy2 - cov * cov / sx2) / (double)df) / sx2);
for (int i = std::size(tval) - 2; i >= 0; i--) { // We do linear interpolation rather than having a large lookup table.
if (df >= t_df[i]) { // Look up the appropriate reference t-value at p < 0.05 and df degrees of freedom
double t_lookup = tval[i] - (tval[i] - tval[i+1]) * (df - t_df[i]) / (t_df[i+1] - t_df[i]);
return abs(t) >= t_lookup;
}
}
return true; // can't happen, as we tested for sample_size above.
}
// Returns the coefficients [a,b] of the line y = ax + b
// If case of an undetermined regression or one with infinite slope, returns [nan, nan]
static std::pair<double, double> linear_regression(const std::vector<StatsScatterItem> &v)
{
if (v.size() < 2)
return { NaN, NaN };
// First, calculate the x and y average
double avg_x = 0.0, avg_y = 0.0;
for (auto [x, y, d]: v) {
avg_x += x;
avg_y += y;
}
avg_x /= (double)v.size();
avg_y /= (double)v.size();
double cov = 0.0, sx2 = 0.0, sy2 = 0.0;
for (auto [x, y, d]: v) {
cov += (x - avg_x) * (y - avg_y);
sx2 += (x - avg_x) * (x - avg_x);
sy2 += (y - avg_y) * (y - avg_y);
}
bool is_linear = is_linear_regression((int)v.size(), cov, sx2, sy2);
if (fabs(sx2) < 1e-10 || !is_linear) // If t is not statistically significant, do not plot the regression line.
return { NaN, NaN };
double a = cov / sx2;
double b = avg_y - a * avg_x;
return { a, b };
}
void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable)
{
setTitle(StatsTranslations::tr("%1 vs. %2").arg(valueVariable->name(), categoryVariable->name()));
std::vector<StatsScatterItem> points = categoryVariable->scatter(*valueVariable, dives);
if (points.empty())
return;
double minX = points.front().x;
double maxX = points.back().x;
auto [minY, maxY] = getMinMaxValue(points);
StatsAxis *axisX = categoryVariable->type() == StatsVariable::Type::Continuous ?
static_cast<StatsAxis *>(createAxis<DateAxis>(categoryVariable->nameWithUnit(),
minX, maxX, true)) :
static_cast<StatsAxis *>(createAxis<ValueAxis>(categoryVariable->nameWithUnit(),
minX, maxX, categoryVariable->decimals(), true));
StatsAxis *axisY = createAxis<ValueAxis>(valueVariable->nameWithUnit(), minY, maxY, valueVariable->decimals(), false);
addAxes(axisX, axisY);
ScatterSeries *series = createSeries<ScatterSeries>(*categoryVariable, *valueVariable);
for (auto [x, y, dive]: points)
series->append(dive, x, y);
// y = ax + b
auto [a, b] = linear_regression(points);
if (!std::isnan(a)) {
auto [minx, maxx] = axisX->minMax();
addLinearRegression(a, b, minx, maxx, series);
}
}

138
stats/statsview.h Normal file
View file

@ -0,0 +1,138 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef STATS_VIEW_H
#define STATS_VIEW_H
#include "statsstate.h"
#include <memory>
#include <QQuickWidget>
struct dive;
struct StatsBinner;
struct StatsBin;
struct StatsState;
struct StatsVariable;
namespace QtCharts {
class QAbstractSeries;
class QChart;
}
class QGraphicsLineItem;
class StatsSeries;
class CategoryAxis;
class CountAxis;
class HistogramAxis;
class StatsAxis;
class Legend;
enum class ChartSubType : int;
enum class StatsOperation : int;
class StatsView : public QQuickWidget {
Q_OBJECT
public:
StatsView(QWidget *parent = NULL);
~StatsView();
void plot(const StatsState &state);
private slots:
void plotAreaChanged(const QRectF &plotArea);
void replotIfVisible();
private:
void reset(); // clears all series and axes
void addAxes(StatsAxis *x, StatsAxis *y); // Add new x- and y-axis
void plotBarChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool legend);
void plotValueChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, StatsOperation valueAxisOperation, bool labels);
void plotDiscreteCountChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, bool labels);
void plotPieChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, bool labels, bool legend);
void plotDiscreteBoxChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, const StatsVariable *valueVariable);
void plotDiscreteScatter(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, bool quartiles);
void plotHistogramCountChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
bool labels, bool showMedian, bool showMean);
void plotHistogramValueChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, StatsOperation valueAxisOperation, bool labels);
void plotHistogramStackedChart(const std::vector<dive *> &dives,
ChartSubType subType,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner,
const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool legend);
void plotHistogramBoxChart(const std::vector<dive *> &dives,
const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, const StatsVariable *valueVariable);
void plotScatter(const std::vector<dive *> &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable);
void setTitle(const QString &);
template <typename T, class... Args>
T *createSeries(Args&&... args);
template <typename T, class... Args>
T *createAxis(const QString &title, Args&&... args);
template<typename T>
CategoryAxis *createCategoryAxis(const QString &title, const StatsBinner &binner,
const std::vector<T> &bins, bool isHorizontal);
template<typename T>
HistogramAxis *createHistogramAxis(const QString &title, const StatsBinner &binner,
const std::vector<T> &bins, bool isHorizontal);
CountAxis *createCountAxis(int maxVal, bool isHorizontal);
// Helper functions to add feature to the chart
void addLineMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal);
// A short line used to mark quartiles
struct QuartileMarker {
std::unique_ptr<QGraphicsLineItem> item;
QtCharts::QAbstractSeries *series; // In case we ever support charts with multiple axes
double pos, value;
QuartileMarker(double pos, double value, QtCharts::QAbstractSeries *series);
void updatePosition();
};
// A general line marker
struct LineMarker {
std::unique_ptr<QGraphicsLineItem> item;
QtCharts::QAbstractSeries *series; // In case we ever support charts with multiple axes
QPointF from, to; // In local coordinates
void updatePosition();
LineMarker(QPointF from, QPointF to, QPen pen, QtCharts::QAbstractSeries *series);
};
void addLinearRegression(double a, double b, double minX, double maxX, QtCharts::QAbstractSeries *series);
void addHistogramMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal, QtCharts::QAbstractSeries *series);
StatsState state;
QtCharts::QChart *chart;
std::vector<std::unique_ptr<StatsAxis>> axes;
std::vector<std::unique_ptr<StatsSeries>> series;
std::unique_ptr<Legend> legend;
std::vector<QuartileMarker> quartileMarkers;
std::vector<LineMarker> lineMarkers;
StatsSeries *highlightedSeries;
// This is unfortunate: we can't derive from QChart, because the chart is allocated by QML.
// Therefore, we have to listen to hover events using an events-filter.
// Probably we should try to get rid of the QML ChartView.
struct EventFilter : public QObject {
StatsView *view;
EventFilter(StatsView *view) : view(view) {}
private:
bool eventFilter(QObject *o, QEvent *event);
} eventFilter;
friend EventFilter;
void hover(QPointF pos);
};
#endif