subsurface/core/fulltext.cpp

// SPDX-License-Identifier: GPL-2.0

#include "fulltext.h"
#include "dive.h"
#include "divelog.h"
#include "divesite.h"
#include "tag.h"
#include "trip.h"
#include "qthelper.h"
#include <QLocale>
#include <map>

// This class caches each dives words, so that we can unregister a dive from the full text search
struct full_text_cache {
	std::vector<QString> words;
};

// The FullText-search class
class FullText {
	std::map<QString, std::vector<dive *>> words; // Dives that belong to each word
public:
	void populate(); // Rebuild from current dive_table
	void registerDive(struct dive *d); // Note: can be called repeatedly
	void unregisterDive(struct dive *d); // Note: can be called repeatedly
	void unregisterAll(); // Unregister all dives in the dive table
	FullTextResult find(const FullTextQuery &q, StringFilterMode mode) const; // Find dives matchin all words.
private:
	void registerWords(struct dive *d, const std::vector<QString> &w);
	void unregisterWords(struct dive *d, const std::vector<QString> &w);
	std::vector<dive *> findDives(const QString &s, StringFilterMode mode) const; // Find dives matching a given word.
};

// This class doesn't depend on any other objects, we might just initialize it at startup.
static FullText self;

// C-interface functions

void fulltext_register(struct dive *d)
{
	self.registerDive(d);
}

void fulltext_unregister(struct dive *d)
{
	self.unregisterDive(d);
}

void fulltext_unregister_all()
{
	self.unregisterAll();
}

void fulltext_populate()
{
	self.populate();
}

// C++-only interface functions
FullTextResult fulltext_find_dives(const FullTextQuery &q, StringFilterMode mode)
{
	return self.find(q, mode);
}

// Check whether a single dive matches the fulltext criterion
bool fulltext_dive_matches(const struct dive *d, const FullTextQuery &q, StringFilterMode mode)
{
	if (!q.doit())
		return true;
	if (!d->full_text)
		return false;
	auto matchFunc =
		mode == StringFilterMode::EXACT ? [](const QString &s1, const QString &s2) { return s1 == s2; } :
		mode == StringFilterMode::STARTSWITH ? [](const QString &s1, const QString &s2) { return s1.startsWith(s2); } :
		/* mode == StringFilterMode::SUBSTRING ? */ [](const QString &s1, const QString &s2) { return s1.contains(s2); };
	const std::vector<QString> &words = d->full_text->words;
	for (const QString &search: q.words) {
		if (std::any_of(words.begin(), words.end(), [&search,matchFunc](const QString &w) { return matchFunc(w, search); }))
			return true;
	}
	return false;
}

// Class implementation

// Take a text and tokenize it into words. Normalize the words to the base
// upper case base character (e.g. 'ℓ' to 'L') and add to a given list,
// if not already in list.
// We might think about limiting the lower size of words we store.
// Note: we convert to QString before tokenization because we rely in
// Qt's isPunct() function.
static void tokenize(QString s, std::vector<QString> &res)
{
	if (s.isEmpty())
		return;

	QLocale loc;
	int size = s.size();
	int pos = 0;
	while (pos < size) {
		// Skip whitespace and punctuation
		while (s[pos].isSpace() || s[pos].isPunct()) {
			if (++pos >= size)
				return;
		}
		int end = pos;
		while (end < size && !s[end].isSpace() && !s[end].isPunct())
			++end;
		QString word = s.mid(pos, end - pos);
		word = word.normalized(QString::NormalizationForm_KD);
		word = loc.toUpper(word);
		pos = end;

		if (find(res.begin(), res.end(), word) == res.end())
			res.push_back(word);
	}
}

// Get all words of a dive
static std::vector<QString> getWords(const dive *d)
{
	std::vector<QString> res;
	tokenize(QString::fromStdString(d->notes), res);
	tokenize(QString::fromStdString(d->diveguide), res);
	tokenize(QString::fromStdString(d->buddy), res);
	tokenize(QString::fromStdString(d->suit), res);
	for (const divetag *tag: d->tags)
		tokenize(QString::fromStdString(tag->name), res);
	for (auto &cyl: d->cylinders)
		tokenize(QString::fromStdString(cyl.type.description), res);
	for (auto &ws: d->weightsystems)
		tokenize(QString::fromStdString(ws.description), res);
	// TODO: We should tokenize all dive-sites and trips first and then
	// take the tokens from a cache.
	if (d->dive_site) {
		tokenize(QString::fromStdString(d->dive_site->name), res);
		std::string country = taxonomy_get_country(d->dive_site->taxonomy);
		if (!country.empty())
			tokenize(country.c_str(), res);
	}
	// TODO: We should index trips separately!
	if (d->divetrip)
		tokenize(QString::fromStdString(d->divetrip->location), res);
	return res;
}

void FullText::populate()
{
	// we want this to be two calls as the second text is overwritten below by the lines starting with "\r"
	uiNotification(QObject::tr("Create full text index"));
	uiNotification(QObject::tr("start processing"));
	int i;
	dive *d;
	for_each_dive(i, d)
		registerDive(d);
	uiNotification(QObject::tr("%1 dives processed").arg(divelog.dives->nr));
}

void FullText::registerDive(struct dive *d)
{
	if (d->full_text)
		unregisterWords(d, d->full_text->words);
	else
		d->full_text = new full_text_cache;
	d->full_text->words = getWords(d);
	registerWords(d, d->full_text->words);
}

void FullText::unregisterDive(struct dive *d)
{
	if (!d->full_text)
		return;
	unregisterWords(d, d->full_text->words);
	delete d->full_text;
	d->full_text = nullptr;
}

void FullText::unregisterAll()
{
	int i;
	dive *d;
	for_each_dive(i, d) {
		delete d->full_text;
		d->full_text = nullptr;
	}
	words.clear();
}

// Register words of a dive.
void FullText::registerWords(struct dive *d, const std::vector<QString> &w)
{
	for (const QString &word: w) {
		std::vector<dive *> &entry = words[word];
		if (std::find(entry.begin(), entry.end(), d) == entry.end())
			entry.push_back(d);
	}
}

// Unregister words of a dive.
void FullText::unregisterWords(struct dive *d, const std::vector<QString> &w)
{
	for (const QString &word: w) {
		auto it = words.find(word);
		if (it == words.end()) {
			qWarning("FullText::unregisterWords: didn't find word '%s' in index!?", qPrintable(word));
			continue;
		}
		std::vector<dive *> &entry = it->second;
		entry.erase(std::remove(entry.begin(), entry.end(), d));
		if (entry.empty())
			words.erase(it);
	}
}

// Add dives from second array to first, if not yet there
void combineDives(std::vector<dive *> &to, const std::vector<dive *> &from)
{
	for (dive *d: from) {
		if (std::find(to.begin(), to.end(), d) == to.end())
			to.push_back(d);
	}
}

std::vector<dive *> FullText::findDives(const QString &s, StringFilterMode mode) const
{
	switch (mode) {
	case StringFilterMode::EXACT:
	default: {
		// Try to access a single word
		auto it = words.find(s);
		if (it == words.end())
			return {};
		return it->second;
	}
	case StringFilterMode::STARTSWITH: {
		// Find all words that start with a substring. We use the fact
		// that these words must form a contiguous block, since the words are
		// ordered lexicographically.
		auto it = words.lower_bound(s);
		if (it == words.end() || !it->first.startsWith(s))
			return {};
		std::vector<dive *> res = it->second;
		++it;
		while (it != words.end() && it->first.startsWith(s)) {
			combineDives(res, it->second);
			++it;
		}
		return res;
	}
	case StringFilterMode::SUBSTRING: {
		// Find all words that contain a substring. Here, we have to check all words!
		std::vector<dive *> res;
		for (auto it = words.begin(); it != words.end(); ++it) {
			if (it->first.contains(s))
				combineDives(res, it->second);
		}
		return res;
	}
	}
}

FullTextResult FullText::find(const FullTextQuery &q, StringFilterMode mode) const
{
	if (q.words.empty())
		return FullTextResult();

	std::vector<dive *> res = findDives(q.words[0], mode);
	for (size_t i = 1; i < q.words.size(); ++i) {
		std::vector<dive *> res2 = findDives(q.words[i], mode);
		// Remove dives from res that are not in res2
		res.erase(std::remove_if(res.begin(), res.end(),
				[&res2] (dive *d) { return std::find(res2.begin(), res2.end(), d) == res2.end(); }), res.end());
	}

	return { std::move(res) };
}

FullTextQuery &FullTextQuery::operator=(const QString &s)
{
	originalQuery = s;
	words.clear();
	tokenize(s, words);
	return *this;
}

bool FullTextQuery::doit() const
{
	return !words.empty();
}

bool FullTextResult::dive_matches(const struct dive *d) const
{
	return std::find(dives.begin(), dives.end(), d) != dives.end();
}
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								// SPDX-License-Identifier: GPL-2.0
 								#include "fulltext.h"
 								#include "dive.h"
-												core: introduce divelog structure

The parser API was very annoying, as a number of tables
to-be-filled were passed in as pointers. The goal of this
commit is to collect all these tables in a single struct.
This should make it (more or less) clear what is actually
written into the divelog files.

Moreover, it should now be rather easy to search for
instances, where the global logfile is accessed (and it
turns out that there are many!).

The divelog struct does not contain the tables as substructs,
but only collects pointers. The idea is that the "divelog.h"
file can be included without all the other files describing
the numerous tables.

To make it easier to use from C++ parts of the code, the
struct implements a constructor and a destructor. Sadly,
we can't use smart pointers, since the pointers are accessed
from C code. Therfore the constructor and destructor are
quite complex.

The whole commit is large, but was mostly an automatic
conversion.

One oddity of note: the divelog structure also contains
the "autogroup" flag, since that is saved in the divelog.
This actually fixes a bug: Before, when importing dives
from a different log, the autogroup flag was overwritten.
This was probably not intended and does not happen anymore.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-11-08 21:31:08 +01:00
+								#include "divelog.h"
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								#include "divesite.h"
-												filter: include tags in fulltext search

The tags have been forgotten when implementing the fulltext search.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-05-16 20:10:53 +02:00
+								#include "tag.h"
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								#include "trip.h"
-												core/fulltext: give progress update while populating index

Especially with large dive logs this will prevent the user from thinking
that the app is hung.

Signed-off-by: Dirk Hohndel <dirk@hohndel.org>

											
										
										
											2020-04-01 07:36:31 -07:00
+								#include "qthelper.h"
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								#include <QLocale>
 								#include <map>
 								// This class caches each dives words, so that we can unregister a dive from the full text search
 								struct full_text_cache {
 									std::vector<QString> words;
 								};
 								// The FullText-search class
 								class FullText {
 									std::map<QString, std::vector<dive *>> words; // Dives that belong to each word
 								public:
-												fulltext: rename fulltext_reload() to fulltext_populate()

This function was named improperly: it was only used on freshly
loaded data. Indeed, attempts to use it to actually reload lead
to crashes.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-04-08 09:05:28 +02:00
+									void populate(); // Rebuild from current dive_table
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									void registerDive(struct dive *d); // Note: can be called repeatedly
 									void unregisterDive(struct dive *d); // Note: can be called repeatedly
 									void unregisterAll(); // Unregister all dives in the dive table
 									FullTextResult find(const FullTextQuery &q, StringFilterMode mode) const; // Find dives matchin all words.
 								private:
 									void registerWords(struct dive *d, const std::vector<QString> &w);
 									void unregisterWords(struct dive *d, const std::vector<QString> &w);
 									std::vector<dive *> findDives(const QString &s, StringFilterMode mode) const; // Find dives matching a given word.
 								};
 								// This class doesn't depend on any other objects, we might just initialize it at startup.
 								static FullText self;
 								// C-interface functions
 								void fulltext_register(struct dive *d)
 								{
 									self.registerDive(d);
 								}
 								void fulltext_unregister(struct dive *d)
 								{
 									self.unregisterDive(d);
 								}
 								void fulltext_unregister_all()
 								{
 									self.unregisterAll();
 								}
-												fulltext: rename fulltext_reload() to fulltext_populate()

This function was named improperly: it was only used on freshly
loaded data. Indeed, attempts to use it to actually reload lead
to crashes.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-04-08 09:05:28 +02:00
+								void fulltext_populate()
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								{
-												fulltext: rename fulltext_reload() to fulltext_populate()

This function was named improperly: it was only used on freshly
loaded data. Indeed, attempts to use it to actually reload lead
to crashes.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-04-08 09:05:28 +02:00
+									self.populate();
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								}
 								// C++-only interface functions
 								FullTextResult fulltext_find_dives(const FullTextQuery &q, StringFilterMode mode)
 								{
 									return self.find(q, mode);
 								}
 								// Check whether a single dive matches the fulltext criterion
 								bool fulltext_dive_matches(const struct dive *d, const FullTextQuery &q, StringFilterMode mode)
 								{
 									if (!q.doit())
 										return true;
 									if (!d->full_text)
 										return false;
 									auto matchFunc =
 										mode == StringFilterMode::EXACT ? [](const QString &s1, const QString &s2) { return s1 == s2; } :
 										mode == StringFilterMode::STARTSWITH ? [](const QString &s1, const QString &s2) { return s1.startsWith(s2); } :
 										/* mode == StringFilterMode::SUBSTRING ? */ [](const QString &s1, const QString &s2) { return s1.contains(s2); };
 									const std::vector<QString> &words = d->full_text->words;
 									for (const QString &search: q.words) {
 										if (std::any_of(words.begin(), words.end(), [&search,matchFunc](const QString &w) { return matchFunc(w, search); }))
 											return true;
 									}
 									return false;
 								}
 								// Class implementation
-												filter: normalize text of fulltext search to base letters

The liter symbol is written as 'ℓ'. To allow searching for
that, normalize unicode strings to their base letter. This
corresponds to the 'compatibility' mode.

We might also think about stripping diacritics.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-06-18 15:09:45 -04:00
+								// Take a text and tokenize it into words. Normalize the words to the base
 								// upper case base character (e.g. 'ℓ' to 'L') and add to a given list,
 								// if not already in list.
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								// We might think about limiting the lower size of words we store.
 								// Note: we convert to QString before tokenization because we rely in
 								// Qt's isPunct() function.
 								static void tokenize(QString s, std::vector<QString> &res)
 								{
 									if (s.isEmpty())
 										return;
 									QLocale loc;
 									int size = s.size();
 									int pos = 0;
 									while (pos < size) {
 										// Skip whitespace and punctuation
 										while (s[pos].isSpace() || s[pos].isPunct()) {
 											if (++pos >= size)
 												return;
 										}
 										int end = pos;
 										while (end < size && !s[end].isSpace() && !s[end].isPunct())
 											++end;
-												filter: normalize text of fulltext search to base letters

The liter symbol is written as 'ℓ'. To allow searching for
that, normalize unicode strings to their base letter. This
corresponds to the 'compatibility' mode.

We might also think about stripping diacritics.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-06-18 15:09:45 -04:00
+										QString word = s.mid(pos, end - pos);
 										word = word.normalized(QString::NormalizationForm_KD);
 										word = loc.toUpper(word);
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+										pos = end;
 										if (find(res.begin(), res.end(), word) == res.end())
 											res.push_back(word);
 									}
 								}
 								// Get all words of a dive
 								static std::vector<QString> getWords(const dive *d)
 								{
 									std::vector<QString> res;
-												core: turn struct dive string data into std::string

Much easier memory management!

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-29 20:40:18 +02:00
+									tokenize(QString::fromStdString(d->notes), res);
 									tokenize(QString::fromStdString(d->diveguide), res);
 									tokenize(QString::fromStdString(d->buddy), res);
 									tokenize(QString::fromStdString(d->suit), res);
-												core: port tag-list to C++

Also adds a new test, which tests merging of two tag-lists.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-29 17:57:48 +02:00
+									for (const divetag *tag: d->tags)
 										tokenize(QString::fromStdString(tag->name), res);
-												core: convert cylinder_t and cylinder_table to C++

This had to be done simultaneously, because the table macros
do not work properly with C++ objects.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-28 21:31:11 +02:00
+									for (auto &cyl: d->cylinders)
 										tokenize(QString::fromStdString(cyl.type.description), res);
-												core: convert weightsystem_t and weightsystem_table to C++

As for cylinders, this had to be done simultaneously,

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-29 07:03:03 +02:00
+									for (auto &ws: d->weightsystems)
 										tokenize(QString::fromStdString(ws.description), res);
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									// TODO: We should tokenize all dive-sites and trips first and then
 									// take the tokens from a cache.
-												Desktop: Add Country to the Fields Indexed for Fulltext Search.

Add 'Country' to the fields that are indexed for fulltext search - this
seems to be a quite intuitive choice as 'Country' is also a field that
is available in the dive list view.

Fixes #4134.

Signed-off-by: Michael Keller <mikeller@042.ch>

											
										
										
											2024-03-15 12:36:56 +13:00
+									if (d->dive_site) {
-												core: convert divesite strings to std::string

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-04 17:18:08 +02:00
+										tokenize(QString::fromStdString(d->dive_site->name), res);
-												core: convert taxonomy.c to C++

Since the taxonomy is now a real C++ struct with constructor
and destructor, dive_site has to be converted to C++ as well.

A bit hairy for now, but will ultimately be distinctly simpler.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-04 13:39:04 +02:00
+										std::string country = taxonomy_get_country(d->dive_site->taxonomy);
 										if (!country.empty())
 											tokenize(country.c_str(), res);
-												Desktop: Add Country to the Fields Indexed for Fulltext Search.

Add 'Country' to the fields that are indexed for fulltext search - this
seems to be a quite intuitive choice as 'Country' is also a field that
is available in the dive list view.

Fixes #4134.

Signed-off-by: Michael Keller <mikeller@042.ch>

											
										
										
											2024-03-15 12:36:56 +13:00
+									}
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									// TODO: We should index trips separately!
 									if (d->divetrip)
-												core: turn dive-trip location and notes into std::string

Simpler memory management.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-05-31 17:15:47 +02:00
+										tokenize(QString::fromStdString(d->divetrip->location), res);
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									return res;
 								}
-												fulltext: rename fulltext_reload() to fulltext_populate()

This function was named improperly: it was only used on freshly
loaded data. Indeed, attempts to use it to actually reload lead
to crashes.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-04-08 09:05:28 +02:00
+								void FullText::populate()
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								{
-												core/fulltext: give progress update while populating index

Especially with large dive logs this will prevent the user from thinking
that the app is hung.

Signed-off-by: Dirk Hohndel <dirk@hohndel.org>

											
										
										
											2020-04-01 07:36:31 -07:00
+									// we want this to be two calls as the second text is overwritten below by the lines starting with "\r"
 									uiNotification(QObject::tr("Create full text index"));
 									uiNotification(QObject::tr("start processing"));
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									int i;
 									dive *d;
-												mobile: remove fine-grained notification

When initializing the fulltext-cache and the dive-list, every
100 dives a notification was shown. I had a feeling that this
made startup significantly slower, but that could have been
purely psychological.

Therefore I measured and indeed, removing the fine-grained
notification, it becomes *significantly* faster. For a 3500
dives test log with mobile-on-desktop:

Initialization of the fulltext: 1350 ms -> 730 ms (-46%)
Initialization of the divelistmodel: 689 ms -> 113 ms (-83%)

Let's remove the fine-grained notification. There *is* a visual
indication of work-in-progress anyway.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-04-13 10:42:42 +02:00
+									for_each_dive(i, d)
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+										registerDive(d);
-												core: introduce divelog structure

The parser API was very annoying, as a number of tables
to-be-filled were passed in as pointers. The goal of this
commit is to collect all these tables in a single struct.
This should make it (more or less) clear what is actually
written into the divelog files.

Moreover, it should now be rather easy to search for
instances, where the global logfile is accessed (and it
turns out that there are many!).

The divelog struct does not contain the tables as substructs,
but only collects pointers. The idea is that the "divelog.h"
file can be included without all the other files describing
the numerous tables.

To make it easier to use from C++ parts of the code, the
struct implements a constructor and a destructor. Sadly,
we can't use smart pointers, since the pointers are accessed
from C code. Therfore the constructor and destructor are
quite complex.

The whole commit is large, but was mostly an automatic
conversion.

One oddity of note: the divelog structure also contains
the "autogroup" flag, since that is saved in the divelog.
This actually fixes a bug: Before, when importing dives
from a different log, the autogroup flag was overwritten.
This was probably not intended and does not happen anymore.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-11-08 21:31:08 +01:00
+									uiNotification(QObject::tr("%1 dives processed").arg(divelog.dives->nr));
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								}
 								void FullText::registerDive(struct dive *d)
 								{
-												filter: normalize text of fulltext search to base letters

The liter symbol is written as 'ℓ'. To allow searching for
that, normalize unicode strings to their base letter. This
corresponds to the 'compatibility' mode.

We might also think about stripping diacritics.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-06-18 15:09:45 -04:00
+									if (d->full_text)
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+										unregisterWords(d, d->full_text->words);
-												filter: normalize text of fulltext search to base letters

The liter symbol is written as 'ℓ'. To allow searching for
that, normalize unicode strings to their base letter. This
corresponds to the 'compatibility' mode.

We might also think about stripping diacritics.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2022-06-18 15:09:45 -04:00
+									else
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+										d->full_text = new full_text_cache;
 									d->full_text->words = getWords(d);
 									registerWords(d, d->full_text->words);
 								}
 								void FullText::unregisterDive(struct dive *d)
 								{
 									if (!d->full_text)
 										return;
 									unregisterWords(d, d->full_text->words);
 									delete d->full_text;
 									d->full_text = nullptr;
 								}
 								void FullText::unregisterAll()
 								{
 									int i;
 									dive *d;
 									for_each_dive(i, d) {
 										delete d->full_text;
 										d->full_text = nullptr;
 									}
 									words.clear();
 								}
 								// Register words of a dive.
 								void FullText::registerWords(struct dive *d, const std::vector<QString> &w)
 								{
 									for (const QString &word: w) {
 										std::vector<dive *> &entry = words[word];
 										if (std::find(entry.begin(), entry.end(), d) == entry.end())
 											entry.push_back(d);
 									}
 								}
 								// Unregister words of a dive.
 								void FullText::unregisterWords(struct dive *d, const std::vector<QString> &w)
 								{
 									for (const QString &word: w) {
 										auto it = words.find(word);
 										if (it == words.end()) {
 											qWarning("FullText::unregisterWords: didn't find word '%s' in index!?", qPrintable(word));
 											continue;
 										}
 										std::vector<dive *> &entry = it->second;
 										entry.erase(std::remove(entry.begin(), entry.end(), d));
 										if (entry.empty())
 											words.erase(it);
 									}
 								}
 								// Add dives from second array to first, if not yet there
 								void combineDives(std::vector<dive *> &to, const std::vector<dive *> &from)
 								{
 									for (dive *d: from) {
 										if (std::find(to.begin(), to.end(), d) == to.end())
 											to.push_back(d);
 									}
 								}
 								std::vector<dive *> FullText::findDives(const QString &s, StringFilterMode mode) const
 								{
 									switch (mode) {
 									case StringFilterMode::EXACT:
 									default: {
 										// Try to access a single word
 										auto it = words.find(s);
 										if (it == words.end())
 											return {};
 										return it->second;
 									}
 									case StringFilterMode::STARTSWITH: {
 										// Find all words that start with a substring. We use the fact
 										// that these words must form a contiguous block, since the words are
 										// ordered lexicographically.
 										auto it = words.lower_bound(s);
 										if (it == words.end() || !it->first.startsWith(s))
 											return {};
 										std::vector<dive *> res = it->second;
 										++it;
 										while (it != words.end() && it->first.startsWith(s)) {
 											combineDives(res, it->second);
 											++it;
 										}
 										return res;
 									}
 									case StringFilterMode::SUBSTRING: {
 										// Find all words that contain a substring. Here, we have to check all words!
 										std::vector<dive *> res;
 										for (auto it = words.begin(); it != words.end(); ++it) {
 											if (it->first.contains(s))
 												combineDives(res, it->second);
 										}
 										return res;
 									}
 									}
 								}
 								FullTextResult FullText::find(const FullTextQuery &q, StringFilterMode mode) const
 								{
 									if (q.words.empty())
 										return FullTextResult();
 									std::vector<dive *> res = findDives(q.words[0], mode);
 									for (size_t i = 1; i < q.words.size(); ++i) {
 										std::vector<dive *> res2 = findDives(q.words[i], mode);
 										// Remove dives from res that are not in res2
 										res.erase(std::remove_if(res.begin(), res.end(),
 												[&res2] (dive *d) { return std::find(res2.begin(), res2.end(), d) == res2.end(); }), res.end());
 									}
-												cleanup: more Coverity silencing

Mostly irrelevant std::move() stuff of copy-on-write Qt objects,
a few real bugs, a timestamp_t downconversion and some codingsyle
adaptation.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2024-01-16 17:39:19 +01:00
+									return { std::move(res) };
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+								}
 								FullTextQuery &FullTextQuery::operator=(const QString &s)
 								{
-												fulltext: remember original query

So far, the fulltext-query structure only saves an canonicalized
upper-cased version of the query. However, if we want to save the
fulltext query to the log (filter presets) or want to restore an old
fulltext query, we have to store the original query. We don't want
to confront the user with the mangled upper-cased version.
Therefore, also save the original version.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-05-29 08:26:38 +02:00
+									originalQuery = s;
-												filter: add fulltext filtering code

Add code that indexes all words of a dive and provides searching
for words.

A query is represented by the FullTextQuery class, which can be
initialized by assigning a string to it. It is basically a list
of words.

The result of a search is stored in the FullTextResult class,
which is a list of dives.

The actual indexing and searching is implemented in the FullText
class. However, this class is not exported because the interface
is partially accessible to C. Notably, the reloading of the
fulltext index is done from the C core.

Currently, the indexing and searching is totally unoptimized.
In a ~4000 dives test-log searches typically took single-digit
ms times. There is ample room for optimization (e.g. when
searching for multiple words, chose the words with few dives
first and when down to a few dives, check them individually).

The words of each dive are tokenized and uppercased and
cached with the dive. A pointer to these words is stashed
in the dive structure.

For now, compile only on desktop.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>

											
										
										
											2020-02-16 22:19:44 +01:00
+									words.clear();
 									tokenize(s, words);
 									return *this;
 								}
 								bool FullTextQuery::doit() const
 								{
 									return !words.empty();
 								}
 								bool FullTextResult::dive_matches(const struct dive *d) const
 								{
 									return std::find(dives.begin(), dives.end(), d) != dives.end();
 								}