mirror of
https://github.com/subsurface/subsurface.git
synced 2024-12-11 03:21:29 +00:00
filter: normalize text of fulltext search to base letters
The liter symbol is written as 'ℓ'. To allow searching for that, normalize unicode strings to their base letter. This corresponds to the 'compatibility' mode. We might also think about stripping diacritics. Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
This commit is contained in:
parent
4bac5dbb66
commit
50ff94eb8f
2 changed files with 9 additions and 6 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
- filter: normalize unicode code points to base letter (for searching ℓ, etc.)
|
||||||
- core: when modifying cylinders across multiple dives, match cylinder number before comparing type
|
- core: when modifying cylinders across multiple dives, match cylinder number before comparing type
|
||||||
- core: merge all properties in a dive, including current, waveheight, etc
|
- core: merge all properties in a dive, including current, waveheight, etc
|
||||||
- core: prevent crash when merging dives without cylinders (as we might get when importing from divelogs.de)
|
- core: prevent crash when merging dives without cylinders (as we might get when importing from divelogs.de)
|
||||||
|
|
|
@ -85,8 +85,9 @@ bool fulltext_dive_matches(const struct dive *d, const FullTextQuery &q, StringF
|
||||||
|
|
||||||
// Class implementation
|
// Class implementation
|
||||||
|
|
||||||
// Take a text and tokenize it into words. Normalize the words to upper case
|
// Take a text and tokenize it into words. Normalize the words to the base
|
||||||
// and add to a given list, if not already in list.
|
// upper case base character (e.g. 'ℓ' to 'L') and add to a given list,
|
||||||
|
// if not already in list.
|
||||||
// We might think about limiting the lower size of words we store.
|
// We might think about limiting the lower size of words we store.
|
||||||
// Note: we convert to QString before tokenization because we rely in
|
// Note: we convert to QString before tokenization because we rely in
|
||||||
// Qt's isPunct() function.
|
// Qt's isPunct() function.
|
||||||
|
@ -107,7 +108,9 @@ static void tokenize(QString s, std::vector<QString> &res)
|
||||||
int end = pos;
|
int end = pos;
|
||||||
while (end < size && !s[end].isSpace() && !s[end].isPunct())
|
while (end < size && !s[end].isSpace() && !s[end].isPunct())
|
||||||
++end;
|
++end;
|
||||||
QString word = loc.toUpper(s.mid(pos, end - pos)); // Sad: Locale::toUpper can't use QStringRef - we have to copy the substring!
|
QString word = s.mid(pos, end - pos);
|
||||||
|
word = word.normalized(QString::NormalizationForm_KD);
|
||||||
|
word = loc.toUpper(word);
|
||||||
pos = end;
|
pos = end;
|
||||||
|
|
||||||
if (find(res.begin(), res.end(), word) == res.end())
|
if (find(res.begin(), res.end(), word) == res.end())
|
||||||
|
@ -157,11 +160,10 @@ void FullText::populate()
|
||||||
|
|
||||||
void FullText::registerDive(struct dive *d)
|
void FullText::registerDive(struct dive *d)
|
||||||
{
|
{
|
||||||
if (d->full_text) {
|
if (d->full_text)
|
||||||
unregisterWords(d, d->full_text->words);
|
unregisterWords(d, d->full_text->words);
|
||||||
} else {
|
else
|
||||||
d->full_text = new full_text_cache;
|
d->full_text = new full_text_cache;
|
||||||
}
|
|
||||||
d->full_text->words = getWords(d);
|
d->full_text->words = getWords(d);
|
||||||
registerWords(d, d->full_text->words);
|
registerWords(d, d->full_text->words);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue