statistics: implement axes

Implement five kinds of axes:
 - ValueAxis: a standard axis for plotting numerical linear data.
 - CountAxis: a ValueAxis for plotting counts of dives.
 - CategoryAxis: an axis for plotting discrete variables without
   any notion of distance.
 - HistogramAxis: an axis for plotting bins with a numeric value.
 - DateAxis: a HistogramAxis that formats dates.

The axes derive from a common virtual base class that defines
a small interface, notably, returning the minimum and maximum
displayed value and redrawing the axis.

The mapping and painting is performed by QtCharts' axes. On
the one hand, using QtCharts turned out to be too inflexible.
On the other hand it allowed us to quickly prototype the charts.
Ultimately, we should do our own drawing of the axis.

As a testament to the inflexibility, QtCharts' axes do not
allow for repeated labels is needed for quarter-based date
charts (year, Q2, Q3, Q4, year, Q2, Q3, ...). Therefore the
code disambiguates labels by adding unicode zero-width spaces.
Wonderful.

When omitting labels due to space reasons, the histogram
axis attempts to show "preferred" labels. In the quarter
example above, it tries to show full years.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
This commit is contained in:
Berthold Stoeger 2021-01-01 21:35:56 +01:00 committed by Dirk Hohndel
parent 3b26f6a767
commit 31da037701
3 changed files with 481 additions and 0 deletions

View file

@ -7,6 +7,8 @@ include_directories(.
set(SUBSURFACE_STATS_SRCS
legend.h
legend.cpp
statsaxis.h
statsaxis.cpp
statscolors.h
statscolors.cpp
statsvariables.h

394
stats/statsaxis.cpp Normal file
View file

@ -0,0 +1,394 @@
// SPDX-License-Identifier: GPL-2.0
#include "statsaxis.h"
#include "statstranslations.h"
#include "statsvariables.h"
#include "core/pref.h"
#include "core/subsurface-time.h"
#include <math.h> // for lrint
#include <numeric>
#include <QChart>
#include <QFontMetrics>
#include <QLocale>
StatsAxis::StatsAxis(bool horizontal) : horizontal(horizontal)
{
}
StatsAxis::~StatsAxis()
{
}
std::pair<double, double> StatsAxis::minMax() const
{
return { 0.0, 1.0 };
}
// Guess the number of tick marks based on example strings.
// We will use minimum and maximum values, which are not necessarily the
// maximum-size strings especially, when using proportional fonts or for
// categorical data. Therefore, try to err on the safe side by adding enough
// margins.
int StatsAxis::guessNumTicks(const QtCharts::QChart *chart, const QtCharts::QAbstractAxis *axis, const std::vector<QString> &strings) const
{
QFont font = axis->labelsFont();
QFontMetrics fm(font);
int minSize = fm.height();
for (const QString &s: strings) {
QSize size = fm.size(Qt::TextSingleLine, s);
int needed = horizontal ? size.width() : size.height();
if (needed > minSize)
minSize = needed;
}
// Add space between labels
if (horizontal)
minSize = minSize * 3 / 2;
else
minSize *= 2;
QRectF chartSize = chart->plotArea();
double availableSpace = horizontal ? chartSize.width() : chartSize.height();
int numTicks = lrint(availableSpace / minSize);
return std::max(numTicks, 2);
}
ValueAxis::ValueAxis(double min, double max, int decimals, bool horizontal) : StatsAxisTemplate(horizontal),
min(min), max(max), decimals(decimals)
{
}
std::pair<double, double> ValueAxis::minMax() const
{
return { QValueAxis::min(), QValueAxis::max() };
}
static QString makeFormatString(int decimals)
{
return QStringLiteral("%.%1f").arg(decimals < 0 ? 0 : decimals);
}
void ValueAxis::updateLabels(const QtCharts::QChart *chart)
{
using QtCharts::QValueAxis;
// Avoid degenerate cases
if (max - min < 0.0001) {
max += 0.5;
min -= 0.5;
}
QLocale loc;
QString minString = loc.toString(min, 'f', decimals);
QString maxString = loc.toString(max, 'f', decimals);
int numTicks = guessNumTicks(chart, this, { minString, maxString});
// Use full decimal increments
double height = max - min;
double inc = height / numTicks;
double digits = floor(log10(inc));
int digits_int = lrint(digits);
double digits_factor = pow(10.0, digits);
int inc_int = std::max((int)ceil(inc / digits_factor), 1);
// Do "nice" increments of the leading digit: 1, 2, 4, 5.
if (inc_int > 5)
inc_int = 10;
if (inc_int == 3)
inc_int = 4;
inc = inc_int * digits_factor;
if (-digits_int > decimals)
decimals = -digits_int;
setLabelFormat(makeFormatString(decimals));
double actMin = floor(min / inc) * inc;
double actMax = ceil(max / inc) * inc;
int num = lrint((actMax - actMin) / inc);
setRange(actMin, actMax);
setTickCount(num + 1);
}
CountAxis::CountAxis(int count, bool horizontal) : ValueAxis(0.0, (double)count, 0, horizontal),
count(count)
{
}
void CountAxis::updateLabels(const QtCharts::QChart *chart)
{
QLocale loc;
QString countString = loc.toString(count);
int numTicks = guessNumTicks(chart, this, { countString });
// Get estimate of step size
if (count <= 0)
count = 1;
// When determining the step size, make sure to round up
int step = (count + numTicks - 1) / numTicks;
if (step <= 0)
step = 1;
// Get the significant first or first two digits
int scale = 1;
int significant = step;
while (significant > 25) {
significant /= 10;
scale *= 10;
}
for (int increment: { 1, 2, 4, 5, 10, 15, 20, 25 }) {
if (increment >= significant) {
significant = increment;
break;
}
}
step = significant * scale;
// Make maximum an integer number of steps, equal or greater than the needed counts
int num = (count - 1) / step + 1;
int max = num * step;
numTicks = num + 1; // There is one more tick than steps
setLabelFormat("%.0f");
setRange(0, max);
setTickCount(numTicks);
}
CategoryAxis::CategoryAxis(const std::vector<QString> &labels, bool horizontal) : StatsAxisTemplate(horizontal)
{
for (const QString &s: labels)
append(s);
}
void CategoryAxis::updateLabels(const QtCharts::QChart *)
{
}
// A small helper class that makes strings unique. We need this,
// because QCategoryAxis can only handle unique category names.
// Disambiguate strings by adding unicode zero-width spaces.
// Keep track of a list of strings and how many spaces have to
// be added.
class LabelDisambiguator {
using Pair = std::pair<QString, int>;
std::vector<Pair> entries;
public:
QString transmogrify(const QString &s);
};
QString LabelDisambiguator::transmogrify(const QString &s)
{
auto it = std::find_if(entries.begin(), entries.end(),
[&s](const Pair &p) { return p.first == s; });
if (it == entries.end()) {
entries.emplace_back(s, 0);
return s;
} else {
++(it->second);
return s + QString(it->second, QChar(0x200b));
}
}
HistogramAxis::HistogramAxis(std::vector<HistogramAxisEntry> bins, bool horizontal) : StatsAxisTemplate(horizontal),
bin_values(std::move(bins))
{
if (bin_values.size() < 2) // Less than two makes no sense -> there must be at least one category
return;
LabelDisambiguator labeler;
for (HistogramAxisEntry &entry: bin_values)
entry.name = labeler.transmogrify(entry.name);
// The caller can declare some bin labels as preferred, when there are
// too many labels to show all. Try to infer the preferred step size
// by finding two consecutive preferred labels. This supposes that
// the preferred labels are equi-distant and that the caller does not
// use large prime (or nearly prime) steps.
auto it1 = std::find_if(bin_values.begin(), bin_values.end(),
[](const HistogramAxisEntry &e) { return e.recommended; });
auto next_it = it1 == bin_values.end() ? it1 : std::next(it1);
auto it2 = std::find_if(next_it, bin_values.end(),
[](const HistogramAxisEntry &e) { return e.recommended; });
preferred_step = it2 == bin_values.end() ? 1 : it2 - it1;
setMin(bin_values.front().value);
setMax(bin_values.back().value);
setStartValue(bin_values.front().value);
setLabelsPosition(QCategoryAxis::AxisLabelsPositionOnValue);
}
std::pair<double, double> HistogramAxis::minMax() const
{
if (bin_values.size() < 2) // Less than two makes no sense -> there must be at least one category
return { 0.0, 1.0 };
return { QValueAxis::min(), QValueAxis::max() };
}
// Initialize a histogram axis with the given labels. Labels are specified as (name, value, recommended) triplets.
// If labels are skipped, try to skip it in such a way that a recommended label is shown.
// The one example where this is relevant is the quarterly bins, which are formated as (2019, q1, q2, q3, 2020, ...).
// There, we obviously want to show the years and not the quarters.
void HistogramAxis::updateLabels(const QtCharts::QChart *chart)
{
if (bin_values.size() < 2) // Less than two makes no sense -> there must be at least one category
return;
// There is no clear all labels function in QCategoryAxis!? You must be kidding.
for (const QString &label: categoriesLabels())
remove(label);
if (count() > 0)
qWarning("HistogramAxis::updateLabels(): labels left after clearing!?");
std::vector<QString> strings;
strings.reserve(bin_values.size());
for (auto &[name, value, recommended]: bin_values)
strings.push_back(name);
int maxLabels = guessNumTicks(chart, this, strings);
int step = ((int)bin_values.size() - 1) / maxLabels + 1;
if (step < preferred_step) {
if (step * 2 > preferred_step) {
step = preferred_step;
} else {
int gcd = std::gcd(step, preferred_step);
while (preferred_step % step != 0)
step += gcd;
}
} else if (step > preferred_step) {
int remainder = (step + preferred_step) % preferred_step;
if (remainder != 0)
step = step + preferred_step - remainder;
}
int first = 0;
if (step > 1) {
for (int i = 0; i < (int)bin_values.size(); ++i) {
const auto &[name, value, recommended] = bin_values[i];
if (recommended) {
first = i % step;
break;
}
}
}
for (int i = first; i < (int)bin_values.size(); i += step) {
const auto &[name, value, recommended] = bin_values[i];
append(name, value);
}
}
// Helper function to turn days since "Unix epoch" into a timestamp_t
static const double seconds_in_day = 86400.0;
static timestamp_t double_to_timestamp(double d)
{
return timestamp_t{ lrint(d * seconds_in_day) };
}
// Turn double to (year, month) pair
static std::pair<int, int> double_to_month(double d)
{
struct tm tm;
utc_mkdate(double_to_timestamp(d), &tm);
return { tm.tm_year, tm.tm_mon };
}
// Increase (year, month) pair by one month
static void inc(std::pair<int, int> &ym)
{
if (++ym.second >= 12) {
++ym.first;
ym.second = 0;
}
}
static std::array<int, 3> double_to_day(double d)
{
struct tm tm;
utc_mkdate(double_to_timestamp(d), &tm);
return { tm.tm_year, tm.tm_mon, tm.tm_mday };
}
// This is trashy: to increase a day, turn into timestamp and back.
// This surely can be done better.
static void inc(std::array<int, 3> &ymd)
{
struct tm tm = { 0 };
tm.tm_year = ymd[0];
tm.tm_mon = ymd[1];
tm.tm_mday = ymd[2] + 1;
timestamp_t t = utc_mktime(&tm);
utc_mkdate(t, &tm);
ymd = { tm.tm_year, tm.tm_mon, tm.tm_mday };
}
// Use heuristics to determine the preferred day/month format:
// Try to see whether day or month comes first and try to extract
// the separator character. Returns a (day_first, separator) pair.
static std::pair<bool, char> day_format()
{
const char *fmt = prefs.date_format;
const char *d, *m, *sep;
for (d = fmt; *d && *d != 'd' && *d != 'D'; ++d)
;
for (m = fmt; *m && *m != 'm' && *m != 'M'; ++m)
;
for(sep = std::min(m, d); *sep == 'm' || *sep == 'M' || *sep == 'd' || *sep == 'D'; ++sep)
;
return { d < m, *sep ? *sep : '.' };
}
// For now, misuse the histogram axis for creating a time axis. Depending on the range,
// create year, month or day-based bins. This is certainly not efficient and may need
// some tuning. However, it should ensure that no crazy number of bins is generated.
// Ultimately, this should be replaced by a better and dynamic scheme
// From and to are given in seconds since "epoch".
static std::vector<HistogramAxisEntry> timeRangeToBins(double from, double to)
{
// from and two are given in days since the "Unix epoch".
// The lowest precision we do is two days.
if (to - from < 2.0) {
double center = (from + to) / 2.0;
from = center + 1.0;
to = center - 1.0;
}
std::vector<HistogramAxisEntry> res;
if (to - from > 2.0 * 356.0) {
// For two years or more, do year based bins
int year_from = utc_year(double_to_timestamp(from));
int year_to = utc_year(double_to_timestamp(to)) + 1;
for (int year = year_from; year <= year_to; ++year)
res.push_back({ QString::number(year), date_to_double(year, 0, 0), true });
} else if (to - from > 2.0 * 30.0) {
// For two months or more, do month based bins
auto year_month_from = double_to_month(from);
auto year_month_to = double_to_month(to);
inc(year_month_to);
for (auto act = year_month_from; act <= year_month_to; inc(act)) {
double val = date_to_double(act.first, act.second, 0);
if (act.second == 0)
res.push_back({ QString::number(act.first), val, true });
else
res.push_back({ monthname(act.second), val, false });
}
} else {
// For less than two months, do date based bins
auto day_from = double_to_day(from);
auto day_to = double_to_day(to);
inc(day_to);
auto [day_before_month, separator] = day_format();
QString format = day_before_month ? QStringLiteral("%1%2%3")
: QStringLiteral("%3%2%1");
QString sep = QString(separator);
for (auto act = day_from; act < day_to; inc(act)) {
double val = date_to_double(act[0], act[1], act[2]);
if (act[1] == 0) {
res.push_back({ QString::number(act[0]), val, true });
} else if (act[2] == 0) {
res.push_back({ monthname(act[1]), val, true });
} else {
QString s = format.arg(QString::number(act[2]), sep, QString::number(act[1]));
res.push_back({s, val, true });
}
}
}
return res;
}
DateAxis::DateAxis(double from, double to, bool horizontal) :
HistogramAxis(timeRangeToBins(from, to), horizontal)
{
}

85
stats/statsaxis.h Normal file
View file

@ -0,0 +1,85 @@
// SPDX-License-Identifier: GPL-2.0
// Supported chart axes
#ifndef STATS_AXIS_H
#define STATS_AXIS_H
#include <vector>
#include <QBarCategoryAxis>
#include <QCategoryAxis>
#include <QValueAxis>
namespace QtCharts {
class QChart;
}
class StatsAxis {
public:
virtual ~StatsAxis();
virtual void updateLabels(const QtCharts::QChart *chart) = 0;
virtual QtCharts::QAbstractAxis *qaxis() = 0;
// Returns minimum and maximum of shown range, not of data points.
virtual std::pair<double, double> minMax() const;
protected:
StatsAxis(bool horizontal);
int guessNumTicks(const QtCharts::QChart *chart, const QtCharts::QAbstractAxis *axis, const std::vector<QString> &strings) const;
bool horizontal;
};
// Small template that derives from a QChart-axis and defines
// the corresponding virtual axis() accessor.
template<typename QAxis>
class StatsAxisTemplate : public StatsAxis, public QAxis
{
using StatsAxis::StatsAxis;
QtCharts::QAbstractAxis *qaxis() override final {
return this;
}
};
class ValueAxis : public StatsAxisTemplate<QtCharts::QValueAxis> {
public:
ValueAxis(double min, double max, int decimals, bool horizontal);
private:
double min, max;
int decimals;
void updateLabels(const QtCharts::QChart *chart) override;
std::pair<double, double> minMax() const override;
};
class CountAxis : public ValueAxis {
public:
CountAxis(int count, bool horizontal);
private:
int count;
void updateLabels(const QtCharts::QChart *chart) override;
};
class CategoryAxis : public StatsAxisTemplate<QtCharts::QBarCategoryAxis> {
public:
CategoryAxis(const std::vector<QString> &labels, bool horizontal);
private:
void updateLabels(const QtCharts::QChart *chart);
};
struct HistogramAxisEntry {
QString name;
double value;
bool recommended;
};
class HistogramAxis : public StatsAxisTemplate<QtCharts::QCategoryAxis> {
public:
HistogramAxis(std::vector<HistogramAxisEntry> bin_values, bool horizontal);
private:
void updateLabels(const QtCharts::QChart *chart) override;
std::pair<double, double> minMax() const override;
std::vector<HistogramAxisEntry> bin_values;
int preferred_step;
};
class DateAxis : public HistogramAxis {
public:
DateAxis(double from, double to, bool horizontal);
};
#endif