subsurface/stats/statsstate.cpp

497 lines
16 KiB
C++
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include "statsstate.h"
#include "statstranslations.h"
#include "statsvariables.h"
// Attn: The order must correspond to the enum above
static const char *chart_subtype_names[] = {
QT_TRANSLATE_NOOP("StatsTranslations", "vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"),
QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"),
QT_TRANSLATE_NOOP("StatsTranslations", "data points"),
QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"),
QT_TRANSLATE_NOOP("StatsTranslations", "piechart"),
};
enum class SupportedVariable {
Count,
Categorical, // Implies that the variable is binned
Continuous, // Implies that the variable is binned
Numeric
};
static const int ChartFeatureLabels = 1 << 0;
static const int ChartFeatureLegend = 1 << 1;
static const int ChartFeatureMedian = 1 << 2;
static const int ChartFeatureMean = 1 << 3;
static const int ChartFeatureQuartiles = 1 << 4;
static const struct ChartTypeDesc {
ChartType id;
const char *name;
SupportedVariable var1;
SupportedVariable var2;
bool var2HasOperations;
const std::vector<ChartSubType> subtypes;
int features;
} chart_types[] = {
{
ChartType::ScatterPlot,
QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
false,
{ ChartSubType::Dots },
0
},
{
ChartType::HistogramCount,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Count,
false,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean
},
{
ChartType::HistogramValue,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
true,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::HistogramBox,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Numeric,
false,
{ ChartSubType::Box },
0
},
{
ChartType::HistogramStacked,
QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
SupportedVariable::Continuous,
SupportedVariable::Categorical,
false,
{ ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked },
ChartFeatureLabels | ChartFeatureLegend
},
{
ChartType::DiscreteScatter,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
false,
{ ChartSubType::Dots },
ChartFeatureQuartiles
},
{
ChartType::DiscreteValue,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
true,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::DiscreteCount,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Count,
false,
{ ChartSubType::Vertical, ChartSubType::Horizontal },
ChartFeatureLabels
},
{
ChartType::DiscreteBox,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Numeric,
false,
{ ChartSubType::Box },
0
},
{
ChartType::Pie,
QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
SupportedVariable::Categorical,
SupportedVariable::Count,
false,
{ ChartSubType::Pie },
ChartFeatureLabels | ChartFeatureLegend
},
{
ChartType::DiscreteBar,
QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"),
SupportedVariable::Categorical,
SupportedVariable::Categorical,
false,
{ ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked },
ChartFeatureLabels | ChartFeatureLegend
}
};
// Some charts are valid, but not preferrable. For example a numeric variable
// is better plotted in a histogram than in a categorical bar chart. To
// describe this use an enum: good, bad, invalid. Default to "good" charts
// first, but ultimately let the user decide.
enum ChartValidity {
Good,
Undesired,
Invalid
};
static const int count_idx = -1; // Special index for the count variable
StatsState::StatsState() :
var1(stats_variables[0]),
var2(nullptr),
type(ChartType::DiscreteBar),
subtype(ChartSubType::Vertical),
labels(true),
legend(true),
median(false),
mean(false),
quartiles(true),
var1Binner(nullptr),
var2Binner(nullptr),
var2Operation(StatsOperation::Invalid),
var1Binned(false),
var2Binned(false),
var2HasOperations(false)
{
validate(true);
}
static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addCount, const StatsVariable *omit)
{
StatsState::VariableList res;
res.variables.reserve(stats_variables.size() + addCount);
res.selected = -1;
if (addCount) {
if (selected == nullptr)
res.selected = (int)res.variables.size();
res.variables.push_back({ StatsTranslations::tr("Count"), count_idx });
}
for (int i = 0; i < (int)stats_variables.size(); ++i) {
const StatsVariable *variable = stats_variables[i];
if (variable == omit)
continue;
if (variable == selected)
res.selected = (int)res.variables.size();
res.variables.push_back({ variable->name(), i });
}
return res;
}
// This is a bit lame: we pass Chart/SubChart as an integer to the UI,
// by placing one in the lower and one in the upper 16 bit of a 32 bit integer.
static int toInt(ChartType type, ChartSubType subtype)
{
return ((int)type << 16) | (int)subtype;
}
static std::pair<ChartType, ChartSubType> fromInt(int id)
{
return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) };
}
static ChartValidity variableValidity(StatsVariable::Type type, SupportedVariable var)
{
switch (var) {
default:
case SupportedVariable::Count:
return ChartValidity::Invalid; // Count has been special cased outside of this function
case SupportedVariable::Categorical:
return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ?
ChartValidity::Undesired : ChartValidity::Good;
case SupportedVariable::Continuous:
return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good;
case SupportedVariable::Numeric:
return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good;
}
}
static ChartValidity chartValidity(const ChartTypeDesc &desc, const StatsVariable *var1, const StatsVariable *var2)
{
if (!var1)
return ChartValidity::Invalid; // Huh? We don't support count as independent variable
// Check the first variable
ChartValidity valid1 = variableValidity(var1->type(), desc.var1);
if (valid1 == ChartValidity::Invalid)
return ChartValidity::Invalid;
// Then, check the second variable
if (var2 == nullptr) // Our special marker for "count"
return desc.var2 == SupportedVariable::Count ? valid1 : ChartValidity::Invalid;
ChartValidity valid2 = variableValidity(var2->type(), desc.var2);
if (valid2 == ChartValidity::Invalid)
return ChartValidity::Invalid;
return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ?
ChartValidity::Undesired : ChartValidity::Good;
}
// Returns a list of (chart-type, warning) pairs
const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2)
{
std::vector<std::pair<const ChartTypeDesc &, bool>> res;
res.reserve(std::size(chart_types));
for (const ChartTypeDesc &desc: chart_types) {
ChartValidity valid = chartValidity(desc, var1, var2);
if (valid == ChartValidity::Invalid)
continue;
res.emplace_back(desc, valid == ChartValidity::Undesired);
}
return res;
}
static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2, ChartType selectedType, ChartSubType selectedSubType)
{
StatsState::ChartList res;
res.selected = -1;
for (auto [desc, warn]: validCharts(var1, var2)) {
QString name = StatsTranslations::tr(desc.name);
for (ChartSubType subtype: desc.subtypes) {
int id = toInt(desc.id, subtype);
if (selectedType == desc.id && selectedSubType == subtype)
res.selected = id;
QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]);
res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn });
}
}
// If none of the charts are recommended - remove the warning flag.
// This can happen if if first variable is numerical, but the second is categorical.
if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) {
for (StatsState::Chart &c: res.charts)
c.warning = false;
}
return res;
}
static StatsState::BinnerList createBinnerList(bool binned, const StatsVariable *var, const StatsBinner *binner)
{
StatsState::BinnerList res;
res.selected = -1;
if (!binned || !var)
return res;
std::vector<const StatsBinner *> binners = var->binners();
if (binners.size() <= 1)
return res; // Don't show combo boxes for single binners
res.binners.reserve(binners.size());
for (const StatsBinner *bin: binners) {
if (bin == binner)
res.selected = (int)res.binners.size();
res.binners.push_back(bin->name());
}
return res;
}
static StatsState::VariableList createOperationsList(bool hasOperations, const StatsVariable *var, StatsOperation operation)
{
StatsState::VariableList res;
res.selected = -1;
if (!hasOperations || !var)
return res;
std::vector<StatsOperation> operations = var->supportedOperations();
res.variables.reserve(operations.size());
for (StatsOperation op: operations) {
if (op == operation)
res.selected = (int)res.variables.size();
res.variables.push_back({ StatsVariable::operationName(op), (int)op });
}
return res;
}
static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles)
{
std::vector<StatsState::Feature> res;
if (chartFeatures & ChartFeatureLabels)
res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels });
if (chartFeatures & ChartFeatureLegend)
res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend });
if (chartFeatures & ChartFeatureMedian)
res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median });
if (chartFeatures & ChartFeatureMean)
res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean });
if (chartFeatures & ChartFeatureQuartiles)
res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles });
return res;
}
StatsState::UIState StatsState::getUIState() const
{
UIState res;
res.var1 = createVariableList(var1, false, nullptr);
res.var2 = createVariableList(var2, true, var1);
res.var1Name = var1 ? var1->name() : QString();
res.var2Name = var2 ? var2->name() : QString();
res.charts = createChartList(var1, var2, type, subtype);
res.binners1 = createBinnerList(var1Binned, var1, var1Binner);
res.binners2 = createBinnerList(var2Binned, var2, var2Binner);
res.operations2 = createOperationsList(var2HasOperations, var2, var2Operation);
res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles);
return res;
}
static const StatsBinner *idxToBinner(const StatsVariable *v, int idx)
{
if (!v)
return nullptr;
auto binners = v->binners();
return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0;
}
void StatsState::var1Changed(int id)
{
var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
validate(true);
}
void StatsState::binner1Changed(int idx)
{
var1Binner = idxToBinner(var1, idx);
validate(false);
}
void StatsState::var2Changed(int id)
{
// The "count" variable is represented by a nullptr
var2 = id == count_idx ? nullptr
: stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
validate(true);
}
void StatsState::binner2Changed(int idx)
{
var2Binner = idxToBinner(var2, idx);
validate(false);
}
void StatsState::var2OperationChanged(int id)
{
var2Operation = (StatsOperation)id;
validate(false);
}
void StatsState::chartChanged(int id)
{
std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once
validate(false);
}
void StatsState::featureChanged(int id, bool state)
{
if (id == ChartFeatureLabels)
labels = state;
else if (id == ChartFeatureLegend)
legend = state;
else if (id == ChartFeatureMedian)
median = state;
else if (id == ChartFeatureMean)
mean = state;
else if (id == ChartFeatureQuartiles)
quartiles = state;
}
// Creates the new chart-type from the current chart-type and a list of possible chart types.
// If the flag "varChanged" is true, the current chart-type will be changed if the
// current chart-type is undesired.
const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts,
bool varChanged)
{
for (auto [desc, warn]: charts) {
// Found it, but if the axis was changed, we change anyway if the chart is "undesired"
if (type == desc.id) {
if (!varChanged || !warn)
return desc;
break;
}
}
// Find the first non-undesired chart
for (auto [desc, warn]: charts) {
if (!warn)
return desc;
}
return charts.empty() ? chart_types[0] : charts[0].first;
}
static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool isBinned)
{
if (!var || !isBinned) {
binner = nullptr;
return;
}
auto binners = var->binners();
if (std::find(binners.begin(), binners.end(), binner) != binners.end())
return;
// For now choose the first binner. However, we might try to be smarter here
// and adapt to the given screen size and the estimated number of bins.
binner = binners.empty() ? nullptr : binners[0];
}
static void validateOperation(StatsOperation &operation, const StatsVariable *var, bool hasOperation)
{
if (!hasOperation) {
operation = StatsOperation::Invalid;
return;
}
std::vector<StatsOperation> ops = var->supportedOperations();
if (std::find(ops.begin(), ops.end(), operation) != ops.end())
return;
operation = ops.empty() ? StatsOperation::Invalid : ops[0];
}
// The var changed variable indicates whether this function is called
// after a variable change or a change of the chart type. In the
// former case, the chart type is switched, if it is not recommended.
// In the latter case, the user explicitly chose a non-recommended type,
// so let's use that.
void StatsState::validate(bool varChanged)
{
// Take care that we don't plot a variable against itself.
// By default plot the count of the first variable. Is that sensible?
if (var1 == var2)
var2 = nullptr;
// Let's see if the currently selected chart is one of the valid charts
auto charts = validCharts(var1, var2);
const ChartTypeDesc &desc = newChartType(type, charts, varChanged);
type = desc.id;
// Check if the current subtype is supported by the chart
if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end())
subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0];
var1Binned = type != ChartType::ScatterPlot;
var2Binned = desc.var2 == SupportedVariable::Categorical || desc.var2 == SupportedVariable::Continuous;
var2HasOperations = desc.var2HasOperations;
chartFeatures = desc.features;
// Median and mean currently only if first variable is numeric
if (!var1 || var1->type() != StatsVariable::Type::Numeric)
chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean);
// Check that the binners and operation are valid
validateBinner(var1Binner, var1, var1Binned);
validateBinner(var2Binner, var2, var2Binned);
validateOperation(var2Operation, var2, var2HasOperations);
}