statistics: implement a structure representing the chart state

The StatsState structure fully describes the current state of
the chart: the selected axes, operations and additional chart
features, such as legend or labels.

The code implements sanity checks and reacts accordingly,
if an invalid combination of variables and charts is chosen.

The chart and variable lists to be displayed can be queried
and are encapsulated in the StatsState::UIState structure.

Some variable / chart combinations are possible, but not
recommended, which is represented by a warning flag.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
This commit is contained in:
Berthold Stoeger 2021-01-01 22:37:55 +01:00 committed by Dirk Hohndel
parent cccc0abc0c
commit a034014a6a
3 changed files with 618 additions and 0 deletions

View file

@ -23,6 +23,8 @@ set(SUBSURFACE_STATS_SRCS
statscolors.cpp statscolors.cpp
statsseries.h statsseries.h
statsseries.cpp statsseries.cpp
statsstate.h
statsstate.cpp
statsvariables.h statsvariables.h
statsvariables.cpp statsvariables.cpp
zvalues.h zvalues.h

496
stats/statsstate.cpp Normal file
View file

@ -0,0 +1,496 @@
// SPDX-License-Identifier: GPL-2.0
#include "statsstate.h"
#include "statstranslations.h"
#include "statsvariables.h"
// Attn: The order must correspond to the enum above
static const char *chart_subtype_names[] = {
QT_TRANSLATE_NOOP("StatsTranslations", "vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "data points"),
QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"),
QT_TRANSLATE_NOOP("StatsTranslations", "piechart"),
};
enum class SupportedVariable {
Count,
Categorical, // Implies that the variable is binned
Continuous, // Implies that the variable is binned
Numeric
};
static const int ChartFeatureLabels = 1 << 0;
static const int ChartFeatureLegend = 1 << 1;
static const int ChartFeatureMedian = 1 << 2;
static const int ChartFeatureMean = 1 << 3;
static const int ChartFeatureQuartiles = 1 << 4;
static const struct ChartTypeDesc {
ChartType id;
const char *name;
SupportedVariable var1;
SupportedVariable var2;
bool var2HasOperations;
const std::vector<ChartSubType> subtypes;
int features;
} chart_types[] = {
{
ChartType::ScatterPlot,
QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
false,
{ ChartSubType::Dots },
0
},
{
ChartType::HistogramCount,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Count,
false,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean
},
{
ChartType::HistogramValue,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
true,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::HistogramBox,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
false,
{ ChartSubType::Box },
0
},
{
ChartType::HistogramStacked,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Categorical,
false,
{ ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked },
ChartFeatureLabels | ChartFeatureLegend
},
{
ChartType::DiscreteScatter,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
false,
{ ChartSubType::Dots },
ChartFeatureQuartiles
},
{
ChartType::DiscreteValue,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
true,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::DiscreteCount,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Count,
false,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::DiscreteBox,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
false,
{ ChartSubType::Box },
0
},
{
ChartType::Pie,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Count,
false,
{ ChartSubType::Pie },
ChartFeatureLabels | ChartFeatureLegend
},
{
ChartType::DiscreteBar,
QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"),
SupportedVariable::Categorical,
SupportedVariable::Categorical,
false,
{ ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked },
ChartFeatureLabels | ChartFeatureLegend
}
};
// Some charts are valid, but not preferrable. For example a numeric variable
// is better plotted in a histogram than in a categorical bar chart. To
// describe this use an enum: good, bad, invalid. Default to "good" charts
// first, but ultimately let the user decide.
enum ChartValidity {
Good,
Undesired,
Invalid
};
static const int count_idx = -1; // Special index for the count variable
StatsState::StatsState() :
var1(stats_variables[0]),
var2(nullptr),
type(ChartType::DiscreteBar),
subtype(ChartSubType::Vertical),
labels(true),
legend(true),
median(false),
mean(false),
quartiles(true),
var1Binner(nullptr),
var2Binner(nullptr),
var2Operation(StatsOperation::Invalid),
var1Binned(false),
var2Binned(false),
var2HasOperations(false)
{
validate(true);
}
static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addCount, const StatsVariable *omit)
{
StatsState::VariableList res;
res.variables.reserve(stats_variables.size() + addCount);
res.selected = -1;
if (addCount) {
if (selected == nullptr)
res.selected = (int)res.variables.size();
res.variables.push_back({ StatsTranslations::tr("Count"), count_idx });
}
for (int i = 0; i < (int)stats_variables.size(); ++i) {
const StatsVariable *variable = stats_variables[i];
if (variable == omit)
continue;
if (variable == selected)
res.selected = (int)res.variables.size();
res.variables.push_back({ variable->name(), i });
}
return res;
}
// This is a bit lame: we pass Chart/SubChart as an integer to the UI,
// by placing one in the lower and one in the upper 16 bit of a 32 bit integer.
static int toInt(ChartType type, ChartSubType subtype)
{
return ((int)type << 16) | (int)subtype;
}
static std::pair<ChartType, ChartSubType> fromInt(int id)
{
return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) };
}
static ChartValidity variableValidity(StatsVariable::Type type, SupportedVariable var)
{
switch (var) {
default:
case SupportedVariable::Count:
return ChartValidity::Invalid; // Count has been special cased outside of this function
case SupportedVariable::Categorical:
return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ?
ChartValidity::Undesired : ChartValidity::Good;
case SupportedVariable::Continuous:
return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good;
case SupportedVariable::Numeric:
return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good;
}
}
static ChartValidity chartValidity(const ChartTypeDesc &desc, const StatsVariable *var1, const StatsVariable *var2)
{
if (!var1)
return ChartValidity::Invalid; // Huh? We don't support count as independent variable
// Check the first variable
ChartValidity valid1 = variableValidity(var1->type(), desc.var1);
if (valid1 == ChartValidity::Invalid)
return ChartValidity::Invalid;
// Then, check the second variable
if (var2 == nullptr) // Our special marker for "count"
return desc.var2 == SupportedVariable::Count ? valid1 : ChartValidity::Invalid;
ChartValidity valid2 = variableValidity(var2->type(), desc.var2);
if (valid2 == ChartValidity::Invalid)
return ChartValidity::Invalid;
return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ?
ChartValidity::Undesired : ChartValidity::Good;
}
// Returns a list of (chart-type, warning) pairs
const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2)
{
std::vector<std::pair<const ChartTypeDesc &, bool>> res;
res.reserve(std::size(chart_types));
for (const ChartTypeDesc &desc: chart_types) {
ChartValidity valid = chartValidity(desc, var1, var2);
if (valid == ChartValidity::Invalid)
continue;
res.emplace_back(desc, valid == ChartValidity::Undesired);
}
return res;
}
static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2, ChartType selectedType, ChartSubType selectedSubType)
{
StatsState::ChartList res;
res.selected = -1;
for (auto [desc, warn]: validCharts(var1, var2)) {
QString name = StatsTranslations::tr(desc.name);
for (ChartSubType subtype: desc.subtypes) {
int id = toInt(desc.id, subtype);
if (selectedType == desc.id && selectedSubType == subtype)
res.selected = id;
QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]);
res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn });
}
}
// If none of the charts are recommended - remove the warning flag.
// This can happen if if first variable is numerical, but the second is categorical.
if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) {
for (StatsState::Chart &c: res.charts)
c.warning = false;
}
return res;
}
static StatsState::BinnerList createBinnerList(bool binned, const StatsVariable *var, const StatsBinner *binner)
{
StatsState::BinnerList res;
res.selected = -1;
if (!binned || !var)
return res;
std::vector<const StatsBinner *> binners = var->binners();
if (binners.size() <= 1)
return res; // Don't show combo boxes for single binners
res.binners.reserve(binners.size());
for (const StatsBinner *bin: binners) {
if (bin == binner)
res.selected = (int)res.binners.size();
res.binners.push_back(bin->name());
}
return res;
}
static StatsState::VariableList createOperationsList(bool hasOperations, const StatsVariable *var, StatsOperation operation)
{
StatsState::VariableList res;
res.selected = -1;
if (!hasOperations || !var)
return res;
std::vector<StatsOperation> operations = var->supportedOperations();
res.variables.reserve(operations.size());
for (StatsOperation op: operations) {
if (op == operation)
res.selected = (int)res.variables.size();
res.variables.push_back({ StatsVariable::operationName(op), (int)op });
}
return res;
}
static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles)
{
std::vector<StatsState::Feature> res;
if (chartFeatures & ChartFeatureLabels)
res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels });
if (chartFeatures & ChartFeatureLegend)
res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend });
if (chartFeatures & ChartFeatureMedian)
res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median });
if (chartFeatures & ChartFeatureMean)
res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean });
if (chartFeatures & ChartFeatureQuartiles)
res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles });
return res;
}
StatsState::UIState StatsState::getUIState() const
{
UIState res;
res.var1 = createVariableList(var1, false, nullptr);
res.var2 = createVariableList(var2, true, var1);
res.var1Name = var1 ? var1->name() : QString();
res.var2Name = var2 ? var2->name() : QString();
res.charts = createChartList(var1, var2, type, subtype);
res.binners1 = createBinnerList(var1Binned, var1, var1Binner);
res.binners2 = createBinnerList(var2Binned, var2, var2Binner);
res.operations2 = createOperationsList(var2HasOperations, var2, var2Operation);
res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles);
return res;
}
static const StatsBinner *idxToBinner(const StatsVariable *v, int idx)
{
if (!v)
return nullptr;
auto binners = v->binners();
return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0;
}
void StatsState::var1Changed(int id)
{
var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
validate(true);
}
void StatsState::binner1Changed(int idx)
{
var1Binner = idxToBinner(var1, idx);
validate(false);
}
void StatsState::var2Changed(int id)
{
// The "count" variable is represented by a nullptr
var2 = id == count_idx ? nullptr
: stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
validate(true);
}
void StatsState::binner2Changed(int idx)
{
var2Binner = idxToBinner(var2, idx);
validate(false);
}
void StatsState::var2OperationChanged(int id)
{
var2Operation = (StatsOperation)id;
validate(false);
}
void StatsState::chartChanged(int id)
{
std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once
validate(false);
}
void StatsState::featureChanged(int id, bool state)
{
if (id == ChartFeatureLabels)
labels = state;
else if (id == ChartFeatureLegend)
legend = state;
else if (id == ChartFeatureMedian)
median = state;
else if (id == ChartFeatureMean)
mean = state;
else if (id == ChartFeatureQuartiles)
quartiles = state;
}
// Creates the new chart-type from the current chart-type and a list of possible chart types.
// If the flag "varChanged" is true, the current chart-type will be changed if the
// current chart-type is undesired.
const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts,
bool varChanged)
{
for (auto [desc, warn]: charts) {
// Found it, but if the axis was changed, we change anyway if the chart is "undesired"
if (type == desc.id) {
if (!varChanged || !warn)
return desc;
break;
}
}
// Find the first non-undesired chart
for (auto [desc, warn]: charts) {
if (!warn)
return desc;
}
return charts.empty() ? chart_types[0] : charts[0].first;
}
static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool isBinned)
{
if (!var || !isBinned) {
binner = nullptr;
return;
}
auto binners = var->binners();
if (std::find(binners.begin(), binners.end(), binner) != binners.end())
return;
// For now choose the first binner. However, we might try to be smarter here
// and adapt to the given screen size and the estimated number of bins.
binner = binners.empty() ? nullptr : binners[0];
}
static void validateOperation(StatsOperation &operation, const StatsVariable *var, bool hasOperation)
{
if (!hasOperation) {
operation = StatsOperation::Invalid;
return;
}
std::vector<StatsOperation> ops = var->supportedOperations();
if (std::find(ops.begin(), ops.end(), operation) != ops.end())
return;
operation = ops.empty() ? StatsOperation::Invalid : ops[0];
}
// The var changed variable indicates whether this function is called
// after a variable change or a change of the chart type. In the
// former case, the chart type is switched, if it is not recommended.
// In the latter case, the user explicitly chose a non-recommended type,
// so let's use that.
void StatsState::validate(bool varChanged)
{
// Take care that we don't plot a variable against itself.
// By default plot the count of the first variable. Is that sensible?
if (var1 == var2)
var2 = nullptr;
// Let's see if the currently selected chart is one of the valid charts
auto charts = validCharts(var1, var2);
const ChartTypeDesc &desc = newChartType(type, charts, varChanged);
type = desc.id;
// Check if the current subtype is supported by the chart
if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end())
subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0];
var1Binned = type != ChartType::ScatterPlot;
var2Binned = desc.var2 == SupportedVariable::Categorical || desc.var2 == SupportedVariable::Continuous;
var2HasOperations = desc.var2HasOperations;
chartFeatures = desc.features;
// Median and mean currently only if first variable is numeric
if (!var1 || var1->type() != StatsVariable::Type::Numeric)
chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean);
// Check that the binners and operation are valid
validateBinner(var1Binner, var1, var1Binned);
validateBinner(var2Binner, var2, var2Binned);
validateOperation(var2Operation, var2, var2HasOperations);
}

120
stats/statsstate.h Normal file
View file

@ -0,0 +1,120 @@
// SPDX-License-Identifier: GPL-2.0
// Describes the current state of the statistics widget
// (selected variables, chart type, etc.) and is the
// interface between UI and plotting code.
#ifndef STATS_STATE_H
#define STATS_STATE_H
#include <vector>
#include <QString>
enum class ChartType {
DiscreteBar,
DiscreteValue,
DiscreteCount,
DiscreteBox,
DiscreteScatter,
Pie,
HistogramCount,
HistogramValue,
HistogramBox,
HistogramStacked,
ScatterPlot
};
enum class ChartSubType {
Vertical = 0,
VerticalGrouped,
VerticalStacked,
Horizontal,
HorizontalGrouped,
HorizontalStacked,
Dots,
Box,
Pie,
Count
};
struct StatsVariable;
struct StatsBinner;
enum class StatsOperation : int;
struct StatsState {
public:
StatsState();
int setFirstAxis();
int setSecondAxis();
struct Variable {
QString name;
int id;
};
struct VariableList {
std::vector<Variable> variables;
int selected;
};
struct Chart {
QString name;
QString subtypeName;
ChartSubType subtype;
int id;
bool warning; // Not recommended for that combination
};
struct ChartList {
std::vector<Chart> charts;
int selected;
};
struct BinnerList {
std::vector<QString> binners;
int selected;
};
struct Feature {
QString name;
int id;
bool selected;
};
struct UIState {
VariableList var1;
VariableList var2;
QString var1Name;
QString var2Name;
ChartList charts;
std::vector<Feature> features;
BinnerList binners1;
BinnerList binners2;
// Currently, operations are only supported on the second variable
// This reuses the variable list - not very nice.
VariableList operations2;
};
UIState getUIState() const;
// State changers
void var1Changed(int id);
void var2Changed(int id);
void chartChanged(int id);
void binner1Changed(int id);
void binner2Changed(int id);
void var2OperationChanged(int id);
void featureChanged(int id, bool state);
const StatsVariable *var1; // Independent variable
const StatsVariable *var2; // Dependent variable (nullptr: count)
ChartType type;
ChartSubType subtype;
bool labels;
bool legend;
bool median;
bool mean;
bool quartiles;
const StatsBinner *var1Binner; // nullptr: undefined
const StatsBinner *var2Binner; // nullptr: undefined
StatsOperation var2Operation;
private:
void validate(bool varChanged);
bool var1Binned;
bool var2Binned;
bool var2HasOperations;
int chartFeatures;
};
#endif