subsurface/core/load-git.cpp

1909 lines
52 KiB
C++
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <git2.h>
#include <array>
#include <memory>
#include <libdivecomputer/parser.h>
#include "gettext.h"
#include "device.h"
#include "dive.h"
#include "divelog.h"
#include "divesite.h"
#include "errorhelper.h"
#include "event.h"
#include "format.h"
#include "git-access.h"
#include "picture.h"
#include "qthelper.h"
#include "sample.h"
#include "subsurface-string.h"
#include "subsurface-time.h"
#include "tag.h"
#include "trip.h"
#include "version.h"
// TODO: Should probably be moved to struct divelog to allow for multi-document
std::string saved_git_id;
struct git_parser_state {
git_repository *repo = nullptr;
struct divecomputer *active_dc = nullptr;
std::unique_ptr<dive> active_dive;
std::unique_ptr<dive_trip> active_trip;
std::string fulltext_mode;
std::string fulltext_query;
std::string filter_constraint_type;
std::string filter_constraint_string_mode;
std::string filter_constraint_range_mode;
bool filter_constraint_negate = false;
std::string filter_constraint_data;
struct picture active_pic;
struct dive_site *active_site = nullptr;
std::unique_ptr<filter_preset> active_filter;
struct divelog *log = nullptr;
int o2pressure_sensor = 0;
std::vector<std::string> converted_strings;
size_t act_converted_string = 0;
};
struct keyword_action {
const char *keyword;
void (*fn)(char *, struct git_parser_state *);
};
static git_blob *git_tree_entry_blob(git_repository *repo, const git_tree_entry *entry);
static temperature_t get_temperature(const char *line)
{
temperature_t t;
t.mkelvin = C_to_mkelvin(ascii_strtod(line, NULL));
return t;
}
static depth_t get_depth(const char *line)
{
depth_t d;
d.mm = lrint(1000 * ascii_strtod(line, NULL));
return d;
}
static volume_t get_volume(const char *line)
{
volume_t v;
v.mliter = lrint(1000 * ascii_strtod(line, NULL));
return v;
}
static weight_t get_weight(const char *line)
{
weight_t w;
w.grams = lrint(1000 * ascii_strtod(line, NULL));
return w;
}
static pressure_t get_airpressure(const char *line)
{
pressure_t p;
p.mbar = lrint(ascii_strtod(line, NULL));
return p;
}
static pressure_t get_pressure(const char *line)
{
pressure_t p;
p.mbar = lrint(1000 * ascii_strtod(line, NULL));
return p;
}
static int get_salinity(const char *line)
{
return lrint(10 * ascii_strtod(line, NULL));
}
static fraction_t get_fraction(const char *line)
{
fraction_t f;
f.permille = lrint(10 * ascii_strtod(line, NULL));
return f;
}
static void update_date(timestamp_t *when, const char *line)
{
unsigned yyyy, mm, dd;
struct tm tm;
if (sscanf(line, "%04u-%02u-%02u", &yyyy, &mm, &dd) != 3)
return;
utc_mkdate(*when, &tm);
tm.tm_year = yyyy;
tm.tm_mon = mm - 1;
tm.tm_mday = dd;
*when = utc_mktime(&tm);
}
static void update_time(timestamp_t *when, const char *line)
{
unsigned h, m, s = 0;
struct tm tm;
if (sscanf(line, "%02u:%02u:%02u", &h, &m, &s) < 2)
return;
utc_mkdate(*when, &tm);
tm.tm_hour = h;
tm.tm_min = m;
tm.tm_sec = s;
*when = utc_mktime(&tm);
}
static duration_t get_duration(const char *line)
{
int m = 0, s = 0;
duration_t d;
sscanf(line, "%d:%d", &m, &s);
d.seconds = m * 60 + s;
return d;
}
static enum divemode_t get_dctype(const char *line)
{
for (int i = 0; i < NUM_DIVEMODE; i++) {
if (strcmp(line, divemode_text[i]) == 0)
return (divemode_t)i;
}
return (divemode_t)0;
}
static int get_index(const char *line)
{ return atoi(line); }
static int get_hex(const char *line)
{ return strtoul(line, NULL, 16); }
static void parse_dive_gps(char *line, struct git_parser_state *state)
{
location_t location;
struct dive_site *ds = state->active_dive->dive_site;
parse_location(line, &location);
if (!ds) {
ds = state->log->sites.get_by_gps(&location);
if (!ds)
ds = state->log->sites.create(std::string(), location);
ds->add_dive(state->active_dive.get());
} else {
if (ds->has_gps_location() && ds->location != location) {
std::string coords = printGPSCoordsC(&location);
// we have a dive site that already has GPS coordinates
// note 1: there will be much less copying once the core
// structures are converted to std::string.
// note 2: we could include the first newline in the
// translation string, but that would be weird and cause
// a new string.
ds->notes += '\n';
ds->notes += format_string_std(translate("gettextFromC", "multiple GPS locations for this dive site; also %s\n"), coords.c_str());
}
ds->location = location;
}
}
// Gets the first converted string and consumes it.
// Note: does not remove the string from the vector.
// This is supposed to be used for parsers that expect
// only one string.
static std::string get_first_converted_string(struct git_parser_state *state)
{
if (state->converted_strings.empty())
return std::string();
return std::move(state->converted_strings.front());
}
static void parse_dive_location(char *, struct git_parser_state *state)
{
std::string name = get_first_converted_string(state);
struct dive_site *ds = state->active_dive->dive_site;
if (!ds) {
ds = state->log->sites.get_by_name(name);
if (!ds)
ds = state->log->sites.create(name);
ds->add_dive(state->active_dive.get());
} else {
// we already had a dive site linked to the dive
if (ds->name.empty()) {
ds->name = name.c_str();
} else {
// and that dive site had a name. that's weird - if our name is different, add it to the notes
if (ds->name == name) {
ds->notes += '\n';
ds->notes += format_string_std(translate("gettextFromC", "additional name for site: %s\n"), name.c_str());
}
}
}
}
static void parse_dive_diveguide(char *, struct git_parser_state *state)
{ state->active_dive->diveguide = get_first_converted_string(state); }
static void parse_dive_buddy(char *, struct git_parser_state *state)
{ state->active_dive->buddy = get_first_converted_string(state); }
static void parse_dive_suit(char *, struct git_parser_state *state)
{ state->active_dive->suit = get_first_converted_string(state); }
static void parse_dive_notes(char *, struct git_parser_state *state)
{ state->active_dive->notes = get_first_converted_string(state); }
static void parse_dive_divesiteid(char *line, struct git_parser_state *state)
{ state->log->sites.get_by_uuid(get_hex(line))->add_dive(state->active_dive.get()); }
/*
* We can have multiple tags.
*/
static void parse_dive_tags(char *, struct git_parser_state *state)
{
for (const std::string &tag: state->converted_strings) {
if (!tag.empty())
taglist_add_tag(state->active_dive->tags, tag.c_str());
}
}
static void parse_dive_airtemp(char *line, struct git_parser_state *state)
{ state->active_dive->airtemp = get_temperature(line); }
static void parse_dive_watertemp(char *line, struct git_parser_state *state)
{ state->active_dive->watertemp = get_temperature(line); }
static void parse_dive_airpressure(char *line, struct git_parser_state *state)
{ state->active_dive->surface_pressure = get_airpressure(line); }
static void parse_dive_duration(char *line, struct git_parser_state *state)
{ state->active_dive->duration = get_duration(line); }
static void parse_dive_rating(char *line, struct git_parser_state *state)
{ state->active_dive->rating = get_index(line); }
static void parse_dive_visibility(char *line, struct git_parser_state *state)
{ state->active_dive->visibility = get_index(line); }
static void parse_dive_wavesize(char *line, struct git_parser_state *state)
{ state->active_dive->wavesize = get_index(line); }
static void parse_dive_current(char *line, struct git_parser_state *state)
{ state->active_dive->current = get_index(line); }
static void parse_dive_surge(char *line, struct git_parser_state *state)
{ state->active_dive->surge = get_index(line); }
static void parse_dive_chill(char *line, struct git_parser_state *state)
{ state->active_dive->chill = get_index(line); }
static void parse_dive_watersalinity(char *line, struct git_parser_state *state)
{ state->active_dive->user_salinity = get_salinity(line); }
static void parse_dive_notrip(char *, struct git_parser_state *state)
{
state->active_dive->notrip = true;
}
static void parse_dive_invalid(char *, struct git_parser_state *state)
{
state->active_dive->invalid = true;
}
static void parse_site_description(char *, struct git_parser_state *state)
{ state->active_site->description = get_first_converted_string(state); }
static void parse_site_name(char *, struct git_parser_state *state)
{ state->active_site->name = get_first_converted_string(state); }
static void parse_site_notes(char *, struct git_parser_state *state)
{ state->active_site->notes = get_first_converted_string(state); }
static void parse_site_gps(char *line, struct git_parser_state *state)
{
parse_location(line, &state->active_site->location);
}
static void parse_site_geo(char *line, struct git_parser_state *state)
{
int origin;
int category;
sscanf(line, "cat %d origin %d \"", &category, &origin);
taxonomy_set_category(state->active_site->taxonomy, (taxonomy_category)category,
get_first_converted_string(state), (taxonomy_origin)origin);
}
static std::string pop_cstring(struct git_parser_state *state, const char *err)
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
{
if (state->act_converted_string >= state->converted_strings.size()) {
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
report_error("git-load: string marker without any strings ('%s')", err);
return std::string();
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
}
size_t idx = state->act_converted_string++;
return std::move(state->converted_strings[idx]);
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
}
/* Parse key=val parts of samples and cylinders etc */
static char *parse_keyvalue_entry(void (*fn)(void *, const char *, const std::string &), void *fndata, char *line, struct git_parser_state *state)
{
char *key = line, c;
while ((c = *line) != 0) {
if (isspace(c) || c == '=')
break;
line++;
}
if (c != 0)
*line++ = 0;
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
std::string val;
if (c == '=') {
const char *start_val = line;
while ((c = *line) != 0) {
if (isspace(c))
break;
line++;
}
/* Did we get a string? Take it from the list of strings */
val = start_val[0] == '"' ? pop_cstring(state, key)
: std::string(start_val, line - start_val);
if (c)
line++;
}
fn(fndata, key, val);
return line;
}
static void parse_cylinder_keyvalue(void *_cylinder, const char *key, const std::string &value)
{
cylinder_t *cylinder = (cylinder_t *)_cylinder;
if (!strcmp(key, "vol")) {
cylinder->type.size = get_volume(value.c_str());
return;
}
if (!strcmp(key, "workpressure")) {
cylinder->type.workingpressure = get_pressure(value.c_str());
return;
}
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
if (!strcmp(key, "description")) {
cylinder->type.description = value;
return;
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
}
if (!strcmp(key, "o2")) {
cylinder->gasmix.o2 = get_fraction(value.c_str());
return;
}
if (!strcmp(key, "he")) {
cylinder->gasmix.he = get_fraction(value.c_str());
return;
}
if (!strcmp(key, "start")) {
cylinder->start = get_pressure(value.c_str());
return;
}
if (!strcmp(key, "end")) {
cylinder->end = get_pressure(value.c_str());
return;
}
if (!strcmp(key, "use")) {
cylinder->cylinder_use = cylinderuse_from_text(value.c_str());
return;
}
if (!strcmp(key, "depth")) {
cylinder->depth = get_depth(value.c_str());
return;
}
if ((*key == 'm') && value.empty()) {
/* found a bogus key/value pair in the cylinder, consisting
* of a lonely "m" or m<single quote> without value. This
* is caused by commit 46004c39e26 and fixed in 48d9c8eb6eb0 and
* b984fb98c38e4. See referenced commits for more info.
*
* Just ignore this key/value pair. No processing is broken
* due to this, as the git storage stores only metric SI type data.
* In fact, the m unit is superfluous anyway.
*/
return;
}
report_error("Unknown cylinder key/value pair (%s/%s)", key, value.c_str());
}
static void parse_dive_cylinder(char *line, struct git_parser_state *state)
{
cylinder_t cylinder;
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_cylinder_keyvalue, &cylinder, line, state);
}
if (cylinder.cylinder_use == OXYGEN)
state->o2pressure_sensor = static_cast<int>(state->active_dive->cylinders.size());
state->active_dive->cylinders.push_back(std::move(cylinder));
}
static void parse_weightsystem_keyvalue(void *_ws, const char *key, const std::string &value)
{
weightsystem_t *ws = (weightsystem_t *)_ws;
if (!strcmp(key, "weight")) {
ws->weight = get_weight(value.c_str());
return;
}
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
if (!strcmp(key, "description")) {
ws->description = value;
return;
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
}
report_error("Unknown weightsystem key/value pair (%s/%s)", key, value.c_str());
}
static void parse_dive_weightsystem(char *line, struct git_parser_state *state)
{
weightsystem_t ws;
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_weightsystem_keyvalue, &ws, line, state);
}
state->active_dive->weightsystems.push_back(std::move(ws));
}
static int match_action(char *line, void *data,
const struct keyword_action *action, unsigned nr_action)
{
char *p = line, c;
unsigned low, high;
while ((c = *p) >= 'a' && c <= 'z') // skip over 1st word
p++; // Extract the second word from the line:
if (p == line)
return -1;
switch (c) {
case 0: // if 2nd word is C-terminated
break;
case ' ': // =end of 2nd word?
*p++ = 0; // then C-terminate that word
break;
default:
return -1;
}
/* Standard binary search in a table */
low = 0;
high = nr_action;
while (low < high) {
unsigned mid = (low + high)/2;
const struct keyword_action *a = action + mid;
int cmp = strcmp(line, a->keyword);
if (!cmp) { // attribute found:
a->fn(p, (git_parser_state *)data); // Execute appropriate function,
return 0; // .. passing 2n word from above
} // (p) as a function argument.
if (cmp < 0)
high = mid;
else
low = mid + 1;
}
report_error("Unmatched action '%s'", line);
return -1;
}
template<size_t N>
static int match_action(char *line, void *data, const std::array<keyword_action, N> &action)
{
return match_action(line, data, &action[0], N);
}
/* FIXME! We should do the array thing here too. */
static void parse_sample_keyvalue(void *_sample, const char *key, const std::string &value)
{
struct sample *sample = (struct sample *)_sample;
if (!strcmp(key, "sensor")) {
sample->sensor[0] = atoi(value.c_str());
return;
}
if (!strcmp(key, "ndl")) {
sample->ndl = get_duration(value.c_str());
return;
}
if (!strcmp(key, "tts")) {
sample->tts = get_duration(value.c_str());
return;
}
if (!strcmp(key, "in_deco")) {
sample->in_deco = atoi(value.c_str());
return;
}
if (!strcmp(key, "stoptime")) {
sample->stoptime = get_duration(value.c_str());
return;
}
if (!strcmp(key, "stopdepth")) {
sample->stopdepth = get_depth(value.c_str());
return;
}
if (!strcmp(key, "cns")) {
sample->cns = atoi(value.c_str());
return;
}
if (!strcmp(key, "rbt")) {
sample->rbt = get_duration(value.c_str());
return;
}
if (!strcmp(key, "po2")) {
pressure_t p = get_pressure(value.c_str());
sample->setpoint.mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor1")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[0].mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor2")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[1].mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor3")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[2].mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor4")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[3].mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor5")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[4].mbar = p.mbar;
return;
}
if (!strcmp(key, "sensor6")) {
pressure_t p = get_pressure(value.c_str());
sample->o2sensor[5].mbar = p.mbar;
return;
}
if (!strcmp(key, "o2pressure")) {
pressure_t p = get_pressure(value.c_str());
sample->pressure[1].mbar = p.mbar;
return;
}
if (!strcmp(key, "heartbeat")) {
sample->heartbeat = atoi(value.c_str());
return;
}
if (!strcmp(key, "bearing")) {
sample->bearing.degrees = atoi(value.c_str());
return;
}
report_error("Unexpected sample key/value pair (%s/%s)", key, value.c_str());
}
static char *parse_sample_unit(struct sample *sample, double val, char *unit)
{
Add support for loading and saving multiple pressure samples This does both the XML and the git save format, because the changes really are the same, even if the actual format differs in some details. See how the two "save_samples()" routines both do the same basic setup, for example. This is fairly straightforward, with the possible exception of the odd sensor = sample->sensor[0]; default in the git pressure loading code. That line just means that if we do *not* have an explicit cylinder index for the pressure reading, we will always end up filling in the new pressure as the first pressure (because the cylinder index will match the first sensor slot). So that makes the "add_sample_pressure()" case always do the same thing it used to do for the legacy case: fill in the first slot. The actual sensor index may later change, since the legacy format has a "sensor=X" key value pair that sets the sensor, but it will also use the first sensor slot, making it all do exactly what it used to do. And on the other hand, if we're loading new-style data with cylinder pressure and sensor index together, we just end up using the new semantics for add_sample_pressure(), which tries to keep the same slot for the same sensor, but does the right thing if we already have other pressure values. The XML code has no such issues at all, since it can't share the cases anyway, and we need to have different node names for the different sensor values and cannot just have multiple "pressure" entries. Have I mentioned how much I despise XML lately? Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2017-07-25 19:10:03 -07:00
unsigned int sensor;
char *end = unit, c;
/* Skip over the unit */
while ((c = *end) != 0) {
if (isspace(c)) {
*end++ = 0;
break;
}
end++;
}
/* The units are "°C", "m" or "bar", so let's just look at the first character */
Add support for loading and saving multiple pressure samples This does both the XML and the git save format, because the changes really are the same, even if the actual format differs in some details. See how the two "save_samples()" routines both do the same basic setup, for example. This is fairly straightforward, with the possible exception of the odd sensor = sample->sensor[0]; default in the git pressure loading code. That line just means that if we do *not* have an explicit cylinder index for the pressure reading, we will always end up filling in the new pressure as the first pressure (because the cylinder index will match the first sensor slot). So that makes the "add_sample_pressure()" case always do the same thing it used to do for the legacy case: fill in the first slot. The actual sensor index may later change, since the legacy format has a "sensor=X" key value pair that sets the sensor, but it will also use the first sensor slot, making it all do exactly what it used to do. And on the other hand, if we're loading new-style data with cylinder pressure and sensor index together, we just end up using the new semantics for add_sample_pressure(), which tries to keep the same slot for the same sensor, but does the right thing if we already have other pressure values. The XML code has no such issues at all, since it can't share the cases anyway, and we need to have different node names for the different sensor values and cannot just have multiple "pressure" entries. Have I mentioned how much I despise XML lately? Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2017-07-25 19:10:03 -07:00
/* The cylinder pressure may also be of the form '123.0bar:4' to indicate sensor */
switch (*unit) {
case 'm':
sample->depth.mm = lrint(1000 * val);
break;
case 'b':
Add support for loading and saving multiple pressure samples This does both the XML and the git save format, because the changes really are the same, even if the actual format differs in some details. See how the two "save_samples()" routines both do the same basic setup, for example. This is fairly straightforward, with the possible exception of the odd sensor = sample->sensor[0]; default in the git pressure loading code. That line just means that if we do *not* have an explicit cylinder index for the pressure reading, we will always end up filling in the new pressure as the first pressure (because the cylinder index will match the first sensor slot). So that makes the "add_sample_pressure()" case always do the same thing it used to do for the legacy case: fill in the first slot. The actual sensor index may later change, since the legacy format has a "sensor=X" key value pair that sets the sensor, but it will also use the first sensor slot, making it all do exactly what it used to do. And on the other hand, if we're loading new-style data with cylinder pressure and sensor index together, we just end up using the new semantics for add_sample_pressure(), which tries to keep the same slot for the same sensor, but does the right thing if we already have other pressure values. The XML code has no such issues at all, since it can't share the cases anyway, and we need to have different node names for the different sensor values and cannot just have multiple "pressure" entries. Have I mentioned how much I despise XML lately? Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2017-07-25 19:10:03 -07:00
sensor = sample->sensor[0];
if (end > unit + 4 && unit[3] == ':')
sensor = atoi(unit + 4);
add_sample_pressure(sample, sensor, lrint(1000 * val));
break;
default:
sample->temperature.mkelvin = C_to_mkelvin(val);
break;
}
return end;
}
/*
* If the given cylinder doesn't exist, return NO_SENSOR.
*/
static int sanitize_sensor_id(const struct dive *d, int nr)
{
return d && nr >= 0 && static_cast<size_t>(nr) < d->cylinders.size() ? nr : NO_SENSOR;
}
/*
* By default the sample data does not change unless the
* save-file gives an explicit new value. So we copy the
* data from the previous sample if one exists, and then
* the parsing will update it as necessary.
*
* There are a few exceptions, like the sample pressure:
* missing sample pressure doesn't mean "same as last
* time", but "interpolate". We clear those ones
* explicitly.
*
* NOTE! We default sensor use to 0, 1 respetively for
* the two sensors, but for CCR dives with explicit
* OXYGEN bottles we set the secondary sensor to that.
* Then the primary sensor will be either the first
* or the second cylinder depending on what isn't an
* oxygen cylinder.
*/
static struct sample *new_sample(struct git_parser_state *state)
{
struct sample *sample = prepare_sample(state->active_dc);
size_t num_samples = state->active_dc->samples.size();
if (num_samples >= 2) {
*sample = state->active_dc->samples[num_samples - 2];
sample->pressure[0].mbar = 0;
sample->pressure[1].mbar = 0;
} else {
sample->sensor[0] = sanitize_sensor_id(state->active_dive.get(), !state->o2pressure_sensor);
sample->sensor[1] = sanitize_sensor_id(state->active_dive.get(), state->o2pressure_sensor);
}
return sample;
}
static void sample_parser(char *line, struct git_parser_state *state)
{
int m, s = 0;
struct sample *sample = new_sample(state);
m = strtol(line, &line, 10);
if (*line == ':')
s = strtol(line + 1, &line, 10);
sample->time.seconds = m * 60 + s;
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
/* Less common sample entries have a name */
if (c >= 'a' && c <= 'z') {
line = parse_keyvalue_entry(parse_sample_keyvalue, sample, line, state);
} else {
const char *end;
double val = ascii_strtod(line, &end);
if (end == line) {
report_error("Odd sample data: %s", line);
break;
}
line = (char *)end;
line = parse_sample_unit(sample, val, line);
}
}
}
static void parse_dc_airtemp(char *line, struct git_parser_state *state)
{ state->active_dc->airtemp = get_temperature(line); }
static void parse_dc_date(char *line, struct git_parser_state *state)
{ update_date(&state->active_dc->when, line); }
static void parse_dc_deviceid(char *line, struct git_parser_state *state)
{
get_hex(line); // legacy
}
static void parse_dc_diveid(char *line, struct git_parser_state *state)
{ state->active_dc->diveid = get_hex(line); }
static void parse_dc_duration(char *line, struct git_parser_state *state)
{ state->active_dc->duration = get_duration(line); }
static void parse_dc_dctype(char *line, struct git_parser_state *state)
{ state->active_dc->divemode = get_dctype(line); }
static void parse_dc_lastmanualtime(char *line, struct git_parser_state *state)
{ state->active_dc->last_manual_time = get_duration(line); }
Improve profile display in planner This patch allows the planner to save the last manually-entered dive planner point of a dive plan. When the plan has been saved and re-opened for edit, the time of the last-entered dive planner point is used to ensure that dive planning continues from the same point in the profile as was when the original dive plan was saved. Mechanism: 1) In dive.h, create a new dc attribute dc->last_manual_time with data type of duration_t. 2) In diveplanner.c, ensure that the last manually-entered dive planner point is saved in dc->last_manual_time. 3) In save-xml.c, create a new XML attribute for the <divecomputer> element, named last-manual-time. For dive plans, the element would now look like: <divecomputer model='planned dive' last-manual-time='31:17 min'> 4) In parse-xml.c, insert code that recognises the last-manual-time XML attribute, reads the time value and assigns this time to dc->last_manual_time. 5) In diveplannermodel.cpp, method DiveplannerPointModel::loadfromdive, insert code that sets the appropriate boolean value to dp->entered by comparing newtime (i.e. time of dp) with dc->last_manual_time. 6) Diveplannermodel.cpp also accepts profile data from normal dives in the dive log, whether hand-entered or loaded from dive computer. It looks like the reduction of dive points for dives with >100 points continues to work ok. The result is that when a dive plan is saved with manually entered points up to e.g. 10 minutes into the dive, it can be re-opened for edit in the dive planner and the planner re-creates the plan with manually entered points up to 10 minutes. The rest of the points are "soft" points, shaped by the deco calculations of the planner. Improvements: Improve code for profile display in dive planner This responds to #1052. Change load-git.c and save-git.c so that the last-manual-time is also saved in the git-format dive log. Several stylistic changes in text for consistent C source code. Improvement of dive planner profile display: Do some simplification of my alterations to diveplannermodel.cpp Two small style changes in planner.c and diveplannermodel.cpp as requested ny @neolit123 Signed-off-by: Willem Ferguson <willemferguson@zoology.up.ac.za>
2018-01-15 14:51:47 +02:00
static void parse_dc_maxdepth(char *line, struct git_parser_state *state)
{ state->active_dc->maxdepth = get_depth(line); }
static void parse_dc_meandepth(char *line, struct git_parser_state *state)
{ state->active_dc->meandepth = get_depth(line); }
static void parse_dc_model(char *, struct git_parser_state *state)
{ state->active_dc->model = get_first_converted_string(state); }
static void parse_dc_numberofoxygensensors(char *line, struct git_parser_state *state)
{ state->active_dc->no_o2sensors = get_index(line); }
static void parse_dc_surfacepressure(char *line, struct git_parser_state *state)
{ state->active_dc->surface_pressure = get_pressure(line); }
static void parse_dc_salinity(char *line, struct git_parser_state *state)
{ state->active_dc->salinity = get_salinity(line); }
static void parse_dc_surfacetime(char *line, struct git_parser_state *state)
{ state->active_dc->surfacetime = get_duration(line); }
static void parse_dc_time(char *line, struct git_parser_state *state)
{ update_time(&state->active_dc->when, line); }
static void parse_dc_watertemp(char *line, struct git_parser_state *state)
{ state->active_dc->watertemp = get_temperature(line); }
static int get_divemode(const char *divemodestring) {
for (int i = 0; i < NUM_DIVEMODE; i++) {
if (!strcmp(divemodestring, divemode_text[i]))
return i;
}
return 0;
}
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
/*
* A 'struct event' has a variable-sized name allocation at the
* end. So when we parse the event data, we can't fill in the
* event directly, because we don't know how to allocate one
* before we have the size of the name.
*
* Thus this initial 'parse_event' with a separate name pointer.
*/
struct parse_event {
std::string name;
int has_divemode = false;
struct event ev;
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
};
static void parse_event_keyvalue(void *_parse, const char *key, const std::string &value)
{
struct parse_event *parse = (parse_event *)_parse;
int val = atoi(value.c_str());
if (!strcmp(key, "type")) {
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
parse->ev.type = val;
} else if (!strcmp(key, "flags")) {
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
parse->ev.flags = val;
} else if (!strcmp(key, "value")) {
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
parse->ev.value = val;
} else if (!strcmp(key, "name")) {
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
parse->name = value;
} else if (!strcmp(key,"divemode")) {
parse->ev.value = get_divemode(value.c_str());
parse->has_divemode = 1;
} else if (!strcmp(key, "cylinder")) {
/* NOTE! We add one here as a marker that "yes, we got a cylinder index" */
parse->ev.gas.index = 1 + get_index(value.c_str());
} else if (!strcmp(key, "o2")) {
parse->ev.gas.mix.o2 = get_fraction(value.c_str());
} else if (!strcmp(key, "he")) {
parse->ev.gas.mix.he = get_fraction(value.c_str());
} else
report_error("Unexpected event key/value pair (%s/%s)", key, value.c_str());
}
/* keyvalue "key" "value"
* so we have two strings (possibly empty) */
static void parse_dc_keyvalue(char *line, struct git_parser_state *state)
{
// Let's make sure we have two strings...
if (state->converted_strings.size() != 2)
return;
add_extra_data(state->active_dc, state->converted_strings[0], state->converted_strings[1]);
}
static void parse_dc_event(char *line, struct git_parser_state *state)
{
int m, s = 0;
struct parse_event p;
m = strtol(line, &line, 10);
if (*line == ':')
s = strtol(line + 1, &line, 10);
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
p.ev.time.seconds = m * 60 + s;
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_event_keyvalue, &p, line, state);
}
/* Only modechange events should have a divemode - fix up any corrupted names */
if (p.has_divemode && p.name != "modechange")
p.name = "modechange";
struct event *ev = add_event(state->active_dc, p.ev.time.seconds, p.ev.type, p.ev.flags, p.ev.value, p.name.c_str());
/*
* Older logs might mark the dive to be CCR by having an "SP change" event at time 0:00.
* Better to mark them being CCR on import so no need for special treatments elsewhere on
* the code.
*/
if (p.ev.time.seconds == 0 && p.ev.type == SAMPLE_EVENT_PO2 && p.ev.value && state->active_dc->divemode==OC)
state->active_dc->divemode = CCR;
if (ev->is_gaschange()) {
/*
* We subtract one here because "0" is "no index",
* and the parsing will add one for actual cylinder
* index data (see parse_event_keyvalue)
*/
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
ev->gas.index = p.ev.gas.index-1;
if (p.ev.gas.mix.o2.permille || p.ev.gas.mix.he.permille)
ev->gas.mix = p.ev.gas.mix;
}
}
/* Not needed anymore - trip date calculated implicitly from first dive */
static void parse_trip_date(char *, struct git_parser_state *)
{ }
/* Not needed anymore - trip date calculated implicitly from first dive */
static void parse_trip_time(char *, struct git_parser_state *)
{ }
static void parse_trip_location(char *, struct git_parser_state *state)
{ state->active_trip->location = get_first_converted_string(state); }
static void parse_trip_notes(char *, struct git_parser_state *state)
{ state->active_trip->notes = get_first_converted_string(state); }
static void parse_settings_autogroup(char *, struct git_parser_state *state)
{
state->log->autogroup = true;
}
static void parse_settings_units(char *line, struct git_parser_state *)
{
if (line)
set_informational_units(line);
}
static void parse_settings_userid(char *, struct git_parser_state *)
/* Keep this despite removal of the webservice as there are legacy logbook around
* that still have this defined.
*/
{
}
static void parse_settings_prefs(char *line, struct git_parser_state *)
{
if (line)
set_git_prefs(line);
}
/*
* Our versioning is a joke right now, but this is more of an example of what we
* *can* do some day. And if we do change the version, this warning will show if
* you read with a version of subsurface that doesn't know about it.
* We MUST keep this in sync with the XML version (so we can report a consistent
* minimum datafile version)
*/
static void parse_settings_version(char *line, struct git_parser_state *)
{
int version = atoi(line);
report_datafile_version(version);
if (version > dataformat_version)
report_error("Git save file version %d is newer than version %d I know about", version, dataformat_version);
}
/* The argument string is the version string of subsurface that saved things, just FYI */
static void parse_settings_subsurface(char *, struct git_parser_state *)
{
}
struct divecomputerid {
std::string model;
std::string nickname;
std::string serial;
unsigned int deviceid = 0;
};
static void parse_divecomputerid_keyvalue(void *_cid, const char *key, const std::string &value)
{
struct divecomputerid *cid = (divecomputerid *)_cid;
Clean up divecomputer 'device' handling We have this odd legacy notion of a divecomputer 'device', that was originally just basically the libdivecomputer 'EVENT_DEVINFO' report that was associated with each dive. So it had firmware version, deviceid, and serial number. It had also gotten extended to do 'nickname' handling, and it was all confusing, ugly and bad. It was particularly bad because it wasn't actually a 'per device' thing at all: due to the firmware field, a dive computer that got a firmware update forced a new 'device'. To make matters worse, the 'deviceid' was also almost random, because we've calculated it a couple of different ways, and libdivecomputer itself has changed how the legacy 32-bit 'serial number' is expressed. Finally, because of all these issues, we didn't even try to make the thing unique, so it really ended up being a random snapshot of the state of the dive computer at the time of a dive, and sometimes we'd pick one, and sometimes another, since they weren't really well-defined. So get rid of all this confusion. The new rules: - the actual random dive computer state at the time of a dive is kept in the dive data. So if you want to know the firmware version, it should be in the 'extra data' - the only serial number that matters is the string one in the extra data, because that's the one that actually matches what the dive computer reports, and isn't some random 32-bit integer with ambiguous formatting. - the 'device id' - the thing we match with (together with the model name, eg "Suunto EON Steel") is purely a hash of the real serial number. The device ID that libdivecomputer reports in EVENT_DEVINFO is ignored, as is the device ID we've saved in the XML or git files. If we have a serial number, the device ID will be uniquely associated with that serial number, and if we don't have one, the device ID will be zero (for 'match anything'). So now 'deviceid' is literally just a shorthand for the serial number string, and the two are joined at the hip. - the 'device' managament is _only_ used to track devices that have serial numbers _and_ nicknames. So no more different device structures just because one had a nickname and the other didn't etc. Without a serial number, the device is 'anonymous' and fundamentally cannot be distinguished from other devices of the same model, so a nickname is meaningless. And without a nickname, there is no point in creating a device data structure, since all the data is in the dive itself and the device structure wouldn't add any value.. These rules mean that we no longer have ambiguous 'device' structures, and we can never have duplicates that can confuse us. This does mean that you can't give a nickname to a device that cannot be uniquely identified with a serial number, but those are happily fairly rare (and mostly older ones). Dirk said he'd look at what it takes to give more dive computers proper serial numbers, and I already did it for the Garmin Descent family yesterday. (Honesty in advertizing: right now you can't add a nickname to a dive computer that doesn't already have one, because such a dive computer will not have a device structure. But that's a UI issue, and I'll sort that out separately) Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-08-16 12:50:11 -10:00
// Ignored legacy fields
if (!strcmp(key, "firmware"))
return;
Clean up divecomputer 'device' handling We have this odd legacy notion of a divecomputer 'device', that was originally just basically the libdivecomputer 'EVENT_DEVINFO' report that was associated with each dive. So it had firmware version, deviceid, and serial number. It had also gotten extended to do 'nickname' handling, and it was all confusing, ugly and bad. It was particularly bad because it wasn't actually a 'per device' thing at all: due to the firmware field, a dive computer that got a firmware update forced a new 'device'. To make matters worse, the 'deviceid' was also almost random, because we've calculated it a couple of different ways, and libdivecomputer itself has changed how the legacy 32-bit 'serial number' is expressed. Finally, because of all these issues, we didn't even try to make the thing unique, so it really ended up being a random snapshot of the state of the dive computer at the time of a dive, and sometimes we'd pick one, and sometimes another, since they weren't really well-defined. So get rid of all this confusion. The new rules: - the actual random dive computer state at the time of a dive is kept in the dive data. So if you want to know the firmware version, it should be in the 'extra data' - the only serial number that matters is the string one in the extra data, because that's the one that actually matches what the dive computer reports, and isn't some random 32-bit integer with ambiguous formatting. - the 'device id' - the thing we match with (together with the model name, eg "Suunto EON Steel") is purely a hash of the real serial number. The device ID that libdivecomputer reports in EVENT_DEVINFO is ignored, as is the device ID we've saved in the XML or git files. If we have a serial number, the device ID will be uniquely associated with that serial number, and if we don't have one, the device ID will be zero (for 'match anything'). So now 'deviceid' is literally just a shorthand for the serial number string, and the two are joined at the hip. - the 'device' managament is _only_ used to track devices that have serial numbers _and_ nicknames. So no more different device structures just because one had a nickname and the other didn't etc. Without a serial number, the device is 'anonymous' and fundamentally cannot be distinguished from other devices of the same model, so a nickname is meaningless. And without a nickname, there is no point in creating a device data structure, since all the data is in the dive itself and the device structure wouldn't add any value.. These rules mean that we no longer have ambiguous 'device' structures, and we can never have duplicates that can confuse us. This does mean that you can't give a nickname to a device that cannot be uniquely identified with a serial number, but those are happily fairly rare (and mostly older ones). Dirk said he'd look at what it takes to give more dive computers proper serial numbers, and I already did it for the Garmin Descent family yesterday. (Honesty in advertizing: right now you can't add a nickname to a dive computer that doesn't already have one, because such a dive computer will not have a device structure. But that's a UI issue, and I'll sort that out separately) Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-08-16 12:50:11 -10:00
if (!strcmp(key, "deviceid"))
return;
// Serial number and nickname matter
if (!strcmp(key, "serial")) {
cid->serial = value;
cid->deviceid = calculate_string_hash(value.c_str());
return;
}
if (!strcmp(key, "nickname")) {
cid->nickname = value;
return;
}
report_error("Unknown divecomputerid key/value pair (%s/%s)", key, value.c_str());
}
/*
* The 'divecomputerid' is a bit harder to parse than some other things, because
* it can have multiple strings (but see the tag parsing for another example of
* that) in addition to the non-string entries.
*/
static void parse_settings_divecomputerid(char *line, struct git_parser_state *state)
{
struct divecomputerid id;
id.model = pop_cstring(state, line);
/* Skip the '"' that stood for the model string */
line++;
load-git: clean up string handling during parsing We had some fairly obscure rules for how strings were parsed, and it actually caused bugs when the same line had multiple strings in it. That normally doesn't happen, and the cases where it was _supposed_ to happen had special cases for it (divecomputer ID lines, and tag lines). But by mistake, we had introduced a case of that for the event line handling in commit b9174332d ("Read and write divemode changes (xml and git)"), and nobody realized that the divemode string addition meant that "oops, now it's corrupting the event name". An event line could look like this: event 40:00 type=8 divemode="OC" name="modechange" where we now had both that "OC" and "modechange" strings, and the code to pick the name just picked the first string. So we'd end up effectively mis-parsing the above line as event 40:00 type=8 divemode="OC" name="OC" which is obviously wrong. The dive mode didn't really need to be a string in the first place (there is nothing to quote, and no spaces in it), but hey, here we are. We can't just magially fix the existing broken saves. So make it more straightforward to handle strings in the git format line parser. We still stash the different decoded strings together in one special memory buffer, but now the parser helpers automatically untangle it as they traverse the key value pairs. This is still overly subtle code, and it doesn't fix the cases where we've saved the wrong data back. That comes later. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-02-14 11:49:28 -08:00
/* Parse the rest of the entries */
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_divecomputerid_keyvalue, &id, line, state);
}
create_device_node(state->log->devices, id.model.c_str(), id.serial.c_str(), id.nickname.c_str());
}
struct fingerprint_helper {
uint32_t model = 0;
uint32_t serial = 0;
uint32_t fdeviceid = 0;
uint32_t fdiveid = 0;
std::string hex_data;
};
static void parse_fingerprint_keyvalue(void *_fph, const char *key, const std::string &value)
{
struct fingerprint_helper *fph = (fingerprint_helper *)_fph;
if (!strcmp(key, "model")) {
fph->model = get_hex(value.c_str());
return;
}
if (!strcmp(key, "serial")) {
fph->serial = get_hex(value.c_str());
return;
}
if (!strcmp(key, "deviceid")) {
fph->fdeviceid = get_hex(value.c_str());
return;
}
if (!strcmp(key, "diveid")) {
fph->fdiveid = get_hex(value.c_str());
return;
}
if (!strcmp(key, "data")) {
fph->hex_data = value.c_str();
return;
}
report_error("Unknown fingerprint key/value pair (%s/%s)", key, value.c_str());
}
static void parse_settings_fingerprint(char *line, struct git_parser_state *state)
{
struct fingerprint_helper fph;
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_fingerprint_keyvalue, &fph, line, state);
}
if (verbose > 1)
report_info("fingerprint %08x %08x %08x %08x %s\n", fph.model, fph.serial, fph.fdeviceid, fph.fdiveid, fph.hex_data.c_str());
create_fingerprint_node_from_hex(fingerprints, fph.model, fph.serial,
fph.hex_data, fph.fdeviceid, fph.fdiveid);
}
static void parse_picture_filename(char *, struct git_parser_state *state)
{
state->active_pic.filename = get_first_converted_string(state);
}
static void parse_picture_gps(char *line, struct git_parser_state *state)
{
parse_location(line, &state->active_pic.location);
}
static void parse_picture_hash(char *, struct git_parser_state *)
{
// we no longer use hashes to identify pictures, but we shouldn't
// remove this parser lest users get an ugly red warning when
// opening old git repos
}
/* These need to be sorted! */
static const std::array dc_action {
#undef D
#define D(x) keyword_action { #x, parse_dc_ ## x }
D(airtemp), D(date), D(dctype), D(deviceid), D(diveid), D(duration),
Improve profile display in planner This patch allows the planner to save the last manually-entered dive planner point of a dive plan. When the plan has been saved and re-opened for edit, the time of the last-entered dive planner point is used to ensure that dive planning continues from the same point in the profile as was when the original dive plan was saved. Mechanism: 1) In dive.h, create a new dc attribute dc->last_manual_time with data type of duration_t. 2) In diveplanner.c, ensure that the last manually-entered dive planner point is saved in dc->last_manual_time. 3) In save-xml.c, create a new XML attribute for the <divecomputer> element, named last-manual-time. For dive plans, the element would now look like: <divecomputer model='planned dive' last-manual-time='31:17 min'> 4) In parse-xml.c, insert code that recognises the last-manual-time XML attribute, reads the time value and assigns this time to dc->last_manual_time. 5) In diveplannermodel.cpp, method DiveplannerPointModel::loadfromdive, insert code that sets the appropriate boolean value to dp->entered by comparing newtime (i.e. time of dp) with dc->last_manual_time. 6) Diveplannermodel.cpp also accepts profile data from normal dives in the dive log, whether hand-entered or loaded from dive computer. It looks like the reduction of dive points for dives with >100 points continues to work ok. The result is that when a dive plan is saved with manually entered points up to e.g. 10 minutes into the dive, it can be re-opened for edit in the dive planner and the planner re-creates the plan with manually entered points up to 10 minutes. The rest of the points are "soft" points, shaped by the deco calculations of the planner. Improvements: Improve code for profile display in dive planner This responds to #1052. Change load-git.c and save-git.c so that the last-manual-time is also saved in the git-format dive log. Several stylistic changes in text for consistent C source code. Improvement of dive planner profile display: Do some simplification of my alterations to diveplannermodel.cpp Two small style changes in planner.c and diveplannermodel.cpp as requested ny @neolit123 Signed-off-by: Willem Ferguson <willemferguson@zoology.up.ac.za>
2018-01-15 14:51:47 +02:00
D(event), D(keyvalue), D(lastmanualtime), D(maxdepth), D(meandepth), D(model), D(numberofoxygensensors),
D(salinity), D(surfacepressure), D(surfacetime), D(time), D(watertemp)
};
/* Sample lines start with a space or a number */
static void divecomputer_parser(char *line, struct git_parser_state *state)
{
char c = *line;
if (c < 'a' || c > 'z')
sample_parser(line, state);
match_action(line, state, dc_action);
}
/* These need to be sorted! */
static const std::array dive_action {
#undef D
#define D(x) keyword_action { #x, parse_dive_ ## x }
/* For historical reasons, we accept divemaster and diveguide */
D(airpressure), D(airtemp), D(buddy), D(chill), D(current), D(cylinder), D(diveguide),
keyword_action { "divemaster", parse_dive_diveguide },
D(divesiteid), D(duration), D(gps), D(invalid), D(location), D(notes), D(notrip), D(rating), D(suit), D(surge),
D(tags), D(visibility), D(watersalinity), D(watertemp), D(wavesize), D(weightsystem)
};
static void dive_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, dive_action);
}
/* These need to be sorted! */
static const std::array site_action {
#undef D
#define D(x) keyword_action { #x, parse_site_ ## x }
D(description), D(geo), D(gps), D(name), D(notes)
};
static void site_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, site_action);
}
/* These need to be sorted! */
static const std::array trip_action {
#undef D
#define D(x) keyword_action { #x, parse_trip_ ## x }
D(date), D(location), D(notes), D(time),
};
static void trip_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, trip_action);
}
/* These need to be sorted! */
static const std::array settings_action {
#undef D
#define D(x) keyword_action { #x, parse_settings_ ## x }
D(autogroup), D(divecomputerid), D(fingerprint), D(prefs), D(subsurface), D(units), D(userid), D(version)
};
static void settings_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, settings_action);
}
/* These need to be sorted! */
static const std::array picture_action {
#undef D
#define D(x) keyword_action { #x, parse_picture_ ## x }
D(filename), D(gps), D(hash)
};
static void picture_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, picture_action);
}
static void parse_filter_preset_constraint_keyvalue(void *_state, const char *key, const std::string &value)
{
struct git_parser_state *state = (git_parser_state *)_state;
if (!strcmp(key, "type")) {
state->filter_constraint_type = value;
return;
}
if (!strcmp(key, "rangemode")) {
state->filter_constraint_range_mode = value;
return;
}
if (!strcmp(key, "stringmode")) {
state->filter_constraint_string_mode = value;
return;
}
if (!strcmp(key, "negate")) {
state->filter_constraint_negate = true;
return;
}
if (!strcmp(key, "data")) {
state->filter_constraint_data = value;
return;
}
report_error("Unknown filter preset constraint key/value pair (%s/%s)", key, value.c_str());
}
static void parse_filter_preset_constraint(char *line, struct git_parser_state *state)
{
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_filter_preset_constraint_keyvalue, state, line, state);
}
state->active_filter->add_constraint(state->filter_constraint_type,
state->filter_constraint_string_mode,
state->filter_constraint_range_mode,
state->filter_constraint_negate, state->filter_constraint_data);
state->filter_constraint_type.clear();
state->filter_constraint_string_mode.clear();
state->filter_constraint_range_mode.clear();
state->filter_constraint_negate = false;
state->filter_constraint_data.clear();
}
static void parse_filter_preset_fulltext_keyvalue(void *_state, const char *key, const std::string &value)
{
struct git_parser_state *state = (git_parser_state *)_state;
if (!strcmp(key, "mode")) {
state->fulltext_mode = value;
return;
}
if (!strcmp(key, "query")) {
state->fulltext_query = value;
return;
}
report_error("Unknown filter preset fulltext key/value pair (%s/%s)", key, value.c_str());
}
static void parse_filter_preset_fulltext(char *line, struct git_parser_state *state)
{
for (;;) {
char c;
while (isspace(c = *line))
line++;
if (!c)
break;
line = parse_keyvalue_entry(parse_filter_preset_fulltext_keyvalue, state, line, state);
}
state->active_filter.get()->set_fulltext(std::move(state->fulltext_query), state->fulltext_mode);
state->fulltext_mode.clear();
state->fulltext_query.clear();
}
static void parse_filter_preset_name(char *, struct git_parser_state *state)
{
state->active_filter->name = get_first_converted_string(state);
}
/* These need to be sorted! */
const std::array filter_preset_action {
#undef D
#define D(x) keyword_action { #x, parse_filter_preset_ ## x }
D(constraint), D(fulltext), D(name)
};
static void filter_preset_parser(char *line, struct git_parser_state *state)
{
match_action(line, state, filter_preset_action);
}
/*
* We have a very simple line-based interface, with the small
* complication that lines can have strings in the middle, and
* a string can be multiple lines.
*
* The UTF-8 string escaping is *very* simple, though:
*
* - a string starts and ends with double quotes (")
*
* - inside the string we escape:
* (a) double quotes with '\"'
* (b) backslash (\) with '\\'
*
* - additionally, for human readability, we escape
* newlines with '\n\t', with the exception that
* consecutive newlines are left unescaped (so an
* empty line doesn't become a line with just a tab
* on it).
*
* Also, while the UTF-8 string can have arbitrarily
* long lines, the non-string parts of the lines are
* never long, so we can use a small temporary buffer
* on stack for that part.
*
* Also, note that if a line has one or more strings
* in it:
*
* - each string will be represented as a single '"'
* character in the output.
*
* - all string will be stores in converted_strings.
*/
static const char *parse_one_string(const char *buf, const char *end, std::vector<std::string> &converted_strings)
{
const char *p = buf;
/*
* We turn multiple strings one one line (think dive tags) into the
* converted_strings vector.
*/
std::string s;
while (p < end) {
char replace;
switch (*p++) {
default:
continue;
case '\n':
if (p < end && *p == '\t') {
replace = '\n';
break;
}
continue;
case '\\':
if (p < end) {
replace = *p;
break;
}
continue;
case '"':
replace = 0;
break;
}
s.append(buf, p - buf - 1);
if (!replace)
break;
s += replace;
buf = ++p;
}
converted_strings.push_back(std::move(s));
return p;
}
typedef void (line_fn_t)(char *, struct git_parser_state *);
#define MAXLINE 500
static unsigned parse_one_line(const char *buf, unsigned size, line_fn_t *fn, struct git_parser_state *state)
{
const char *end = buf + size;
const char *p = buf;
char line[MAXLINE + 1];
int off = 0;
git parser: handle left-over multi-line quoted strings better The git save format is designed to be entirely line-based, where all the dive data is on individual lines that are independent. That is very much by design, so that you can merge these files automatically, and not worry about what it does to the context (contrast this to structured files like JSON or XML, where you have multiple levels of indentation, and the context of a line matters). So the parser can just ignore any conflict markers, and parse everything one line at a time. Well, almost. We do have *one* special form of multi-line context, where flowed text (think things like dive notes) will have one "header line" that starts the note, and then it can continue for several lines until the final line that ends the quote. In such a situation, the dive merging can result in a partially merged string note, which has the ending line from one dive, and then continues with more string data from the other dive. That will confuse our parser mightily, because it will have seen the end of the string, and parsed the rest of those string comments as garbage lines. That part in itself is fine - the garbage lines won't pass as any real data (because they don't start with a proper keyword), but while parsing that garbage the *next* end of the string will be seen as a start of a new string. And *that* then confuses the git parser to think that the line after that is now part of the string, and so it won't correctly parse the non-string line that follows. To give a more concrete example, the git dive data (here indented and abbreviated) might look like this: suit "5mm long + 3mm hooded vest" notes "First boat dive. Giant-stride entry." Saw a turtle." cylinder vol=10.0l description="10.0ℓ" depth=66.019m where the two notes from the two dives were notes "First boat dive. Giant-stride entry" and notes "First boat dive. Saw a turtle." respectively, and the merged result contained parts of both. When we parse this, we will parse the 'notes' line as having the string First boat dive. Giant-stride entry which is fine. But then the next line will be that Saw a turtle." and now the ending double quote character on that line will be seen as the beginning of a new string, and the cylinder information on the next line will then be mixed up. The resulting mess will be ignored, but in the process the data on the "cylinder" line will basically have been lost. There are several ways to deal with this, but this particular fix depends on the fact that we can recognize stale string continuation lines: they are either empty (for an empty line), or they start with a TAB character. So to solve the problem with the mis-identified end quote, this recognizes that we're in such a "stale left-over comment line" context, and will just skip such lines entirely. That does mean that when you have conflicts in dive note sections due to having edited the dive concurrently on different machines, you may just lose some of the edits. But this way at least you shouldn't lose any other data due to the merge conflict. NOTE! We could try to improve on this by instead noticing that a "end of multi-line string has a continuation entry on the next line", and just say "ok, that wasn't a real end after all". But that would be an independent thing anyway - this "ignore stale text comment lines" logic would be required anyway, in case those stale text comments ended up somewhere *else* than right after another text line. So do this more important fix first. Reported-by: Michael Werle Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-07-30 20:47:43 -07:00
// Check the first character of a line: an empty line
// or a line starting with a TAB is invalid, and likely
// due to an early string end quote due to a merge
// conflict. Ignore such a line.
switch (*p) {
case '\n': case '\t':
do {
if (*p++ == '\n')
break;
} while (p < end);
report_info("git storage: Ignoring line '%.*s'", (int)(p-buf-1), buf);
git parser: handle left-over multi-line quoted strings better The git save format is designed to be entirely line-based, where all the dive data is on individual lines that are independent. That is very much by design, so that you can merge these files automatically, and not worry about what it does to the context (contrast this to structured files like JSON or XML, where you have multiple levels of indentation, and the context of a line matters). So the parser can just ignore any conflict markers, and parse everything one line at a time. Well, almost. We do have *one* special form of multi-line context, where flowed text (think things like dive notes) will have one "header line" that starts the note, and then it can continue for several lines until the final line that ends the quote. In such a situation, the dive merging can result in a partially merged string note, which has the ending line from one dive, and then continues with more string data from the other dive. That will confuse our parser mightily, because it will have seen the end of the string, and parsed the rest of those string comments as garbage lines. That part in itself is fine - the garbage lines won't pass as any real data (because they don't start with a proper keyword), but while parsing that garbage the *next* end of the string will be seen as a start of a new string. And *that* then confuses the git parser to think that the line after that is now part of the string, and so it won't correctly parse the non-string line that follows. To give a more concrete example, the git dive data (here indented and abbreviated) might look like this: suit "5mm long + 3mm hooded vest" notes "First boat dive. Giant-stride entry." Saw a turtle." cylinder vol=10.0l description="10.0ℓ" depth=66.019m where the two notes from the two dives were notes "First boat dive. Giant-stride entry" and notes "First boat dive. Saw a turtle." respectively, and the merged result contained parts of both. When we parse this, we will parse the 'notes' line as having the string First boat dive. Giant-stride entry which is fine. But then the next line will be that Saw a turtle." and now the ending double quote character on that line will be seen as the beginning of a new string, and the cylinder information on the next line will then be mixed up. The resulting mess will be ignored, but in the process the data on the "cylinder" line will basically have been lost. There are several ways to deal with this, but this particular fix depends on the fact that we can recognize stale string continuation lines: they are either empty (for an empty line), or they start with a TAB character. So to solve the problem with the mis-identified end quote, this recognizes that we're in such a "stale left-over comment line" context, and will just skip such lines entirely. That does mean that when you have conflicts in dive note sections due to having edited the dive concurrently on different machines, you may just lose some of the edits. But this way at least you shouldn't lose any other data due to the merge conflict. NOTE! We could try to improve on this by instead noticing that a "end of multi-line string has a continuation entry on the next line", and just say "ok, that wasn't a real end after all". But that would be an independent thing anyway - this "ignore stale text comment lines" logic would be required anyway, in case those stale text comments ended up somewhere *else* than right after another text line. So do this more important fix first. Reported-by: Michael Werle Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-07-30 20:47:43 -07:00
return p - buf;
default:
break;
}
while (p < end) {
char c = *p++;
if (c == '\n')
break;
line[off] = c;
off++;
if (off > MAXLINE)
off = MAXLINE;
if (c == '"')
p = parse_one_string(p, end, state->converted_strings);
}
line[off] = 0;
fn(line, state);
return p - buf;
}
/*
* We keep on re-using the vector that stores converted
* strings, but the callback function can consume the
* strings.
*/
static void for_each_line(git_blob *blob, line_fn_t *fn, struct git_parser_state *state)
{
const char *content = (const char *)git_blob_rawcontent(blob);
unsigned int size = git_blob_rawsize(blob);
while (size) {
state->converted_strings.clear();
state->act_converted_string = 0;
unsigned int n = parse_one_line(content, size, fn, state);
content += n;
size -= n;
}
}
#define GIT_WALK_OK 0
#define GIT_WALK_SKIP 1
static void finish_active_trip(struct git_parser_state *state)
{
auto &trip = state->active_trip;
if (trip)
state->log->trips.put(std::move(trip));
}
static void finish_active_dive(struct git_parser_state *state)
{
if (state->active_dive)
state->log->dives.record_dive(std::move(state->active_dive));
}
static void create_new_dive(timestamp_t when, struct git_parser_state *state)
{
state->active_dive = std::make_unique<dive>();
/* We'll fill in more data from the dive file */
state->active_dive->when = when;
if (state->active_trip)
state->active_trip->add_dive(state->active_dive.get());
}
static bool validate_date(int yyyy, int mm, int dd)
{
return yyyy > 1930 && yyyy < 3000 &&
mm > 0 && mm < 13 &&
dd > 0 && dd < 32;
}
static bool validate_time(int h, int m, int s)
{
return h >= 0 && h < 24 &&
m >= 0 && m < 60 &&
s >=0 && s <= 60;
}
/*
* Dive trip directory, name is 'nn-alphabetic[~hex]'
*/
static int dive_trip_directory(const char *root, const char *name, struct git_parser_state *state)
{
int yyyy = -1, mm = -1, dd = -1;
if (sscanf(root, "%d/%d", &yyyy, &mm) != 2)
return GIT_WALK_SKIP;
dd = atoi(name);
if (!validate_date(yyyy, mm, dd))
return GIT_WALK_SKIP;
finish_active_trip(state);
state->active_trip = std::make_unique<dive_trip>();
return GIT_WALK_OK;
}
/*
* Dive directory, name is [[yyyy-]mm-]nn-ddd-hh:mm:ss[~hex] in older git repositories
* but [[yyyy-]mm-]nn-ddd-hh=mm=ss[~hex] in newer repos as ':' is an illegal character for Windows files
* and 'timeoff' points to what should be the time part of
* the name (the first digit of the hour).
*
* The root path will be of the form yyyy/mm[/tripdir],
*/
static int dive_directory(const char *root, const git_tree_entry *entry, const char *name, int timeoff, struct git_parser_state *state)
{
int yyyy = -1, mm = -1, dd = -1;
int h, m, s;
int mday_off, month_off, year_off;
struct tm tm;
/* Skip the '-' before the time */
mday_off = timeoff;
if (!mday_off || name[--mday_off] != '-')
return GIT_WALK_SKIP;
/* Skip the day name */
while (mday_off > 0 && name[--mday_off] != '-')
/* nothing */;
mday_off = mday_off - 2;
month_off = mday_off - 3;
year_off = month_off - 5;
if (mday_off < 0)
return GIT_WALK_SKIP;
/* Get the time of day -- parse both time formats so we can read old repos when not on Windows */
if (sscanf(name + timeoff, "%d:%d:%d", &h, &m, &s) != 3 && sscanf(name + timeoff, "%d=%d=%d", &h, &m, &s) != 3)
return GIT_WALK_SKIP;
if (!validate_time(h, m, s))
return GIT_WALK_SKIP;
git dive loading: actually insert the dives into the dive table The biggest part of this commit is the comment about the woeful state of the "git_tree_walk()" interface - the interface is not really very good for seeing any recursive state, since it just walks the tree pretty much linearly. But the only real recursive state we care about is the trip, and in all normal situations the "trip this dive is in" is the same thing as "what was the last trip directory we traversed", so a linear walk works fine. The one exception is if a dive isn't in a trip at all, in which case "last trip directory" obviously isn't what we want. But rather than do our own tree walking by hand (and just passing the trip information in the natural recursive manner when traversing the tree), we hack around it by just looking at the path to the dive. That one-liner trivial hack has now generated about 20 lines of explanation of it. ANYWAY. With this, we parse the dive and trip hierarchy properly, and instead of just printing out the data, we might as well insert the dives and trips into the subsurface data structures. Note: the only data we have about the dive and trip right now is what is visible in the directory structure, since we don't look at the actual dive file at all (not even the name of it, which contains the dive number). So the end result will be just a sea of empty dives and the trips they are contained in. The dives have a date and time, and the trip has a date, though. So this is *not* useful for actually saving and loading data, but the data we do load is easily visualized inside subsurface, so as I'm starting to add real dive data parsing code, it will all be much more visually satisfying. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2014-03-08 08:35:23 -08:00
/*
* Using the "git_tree_walk()" interface is simple, but
* it kind of sucks as an interface because there is
* no sane way to pass the hierarchy to the callbacks.
* The "payload" is a fixed one-time thing: we'd like
* the "current trip" to be passed down to the dives
* that get parsed under that trip, but we can't.
*
* So "active_trip" is not the trip that is in the hierarchy
* _above_ us, it's just the trip that was _before_ us. But
* if a dive is not in a trip at all, we can't tell.
*
* We could just do a better walker that passes the
* return value around, but we hack around this by
* instead looking at the one hierarchical piece of
* data we have: the pathname to the current entry.
*
* This is pretty hacky. The magic '8' is the length
* of a pathname of the form 'yyyy/mm/'.
*/
if (strlen(root) == 8)
finish_active_trip(state);
git dive loading: actually insert the dives into the dive table The biggest part of this commit is the comment about the woeful state of the "git_tree_walk()" interface - the interface is not really very good for seeing any recursive state, since it just walks the tree pretty much linearly. But the only real recursive state we care about is the trip, and in all normal situations the "trip this dive is in" is the same thing as "what was the last trip directory we traversed", so a linear walk works fine. The one exception is if a dive isn't in a trip at all, in which case "last trip directory" obviously isn't what we want. But rather than do our own tree walking by hand (and just passing the trip information in the natural recursive manner when traversing the tree), we hack around it by just looking at the path to the dive. That one-liner trivial hack has now generated about 20 lines of explanation of it. ANYWAY. With this, we parse the dive and trip hierarchy properly, and instead of just printing out the data, we might as well insert the dives and trips into the subsurface data structures. Note: the only data we have about the dive and trip right now is what is visible in the directory structure, since we don't look at the actual dive file at all (not even the name of it, which contains the dive number). So the end result will be just a sea of empty dives and the trips they are contained in. The dives have a date and time, and the trip has a date, though. So this is *not* useful for actually saving and loading data, but the data we do load is easily visualized inside subsurface, so as I'm starting to add real dive data parsing code, it will all be much more visually satisfying. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2014-03-08 08:35:23 -08:00
/*
* Get the date. The day of the month is in the dive directory
* name, the year and month might be in the path leading up
* to it.
*/
dd = atoi(name + mday_off);
if (year_off < 0) {
if (sscanf(root, "%d/%d", &yyyy, &mm) != 2)
return GIT_WALK_SKIP;
} else
yyyy = atoi(name + year_off);
if (month_off >= 0)
mm = atoi(name + month_off);
if (!validate_date(yyyy, mm, dd))
return GIT_WALK_SKIP;
/* Ok, close enough. We've gotten sufficient information */
memset(&tm, 0, sizeof(tm));
tm.tm_hour = h;
tm.tm_min = m;
tm.tm_sec = s;
tm.tm_year = yyyy;
tm.tm_mon = mm-1;
tm.tm_mday = dd;
finish_active_dive(state);
create_new_dive(utc_mktime(&tm), state);
memcpy(state->active_dive->git_id.data(), git_tree_entry_id(entry)->id, 20);
return GIT_WALK_OK;
}
static int picture_directory(const char *, const char *, struct git_parser_state *state)
{
if (!state->active_dive)
return GIT_WALK_SKIP;
return GIT_WALK_OK;
}
/*
* Return the length of the string without the unique part.
*/
static int nonunique_length(const char *str)
{
int len = 0;
for (;;) {
char c = *str++;
if (!c || c == '~')
return len;
len++;
}
}
/*
* When hitting a directory node, we have a couple of cases:
*
* - It's just a date entry - all numeric (either year or month):
*
* [yyyy|mm]
*
* We don't do anything with these, we just traverse into them.
* The numeric data will show up as part of the full path when
* we hit more interesting entries.
*
* - It's a trip directory. The name will be of the form
*
* nn-alphabetic[~hex]
*
* where 'nn' is the day of the month (year and month will be
* encoded in the path leading up to this).
*
* - It's a dive directory. The name will be of the form
*
* [[yyyy-]mm-]nn-ddd-hh=mm=ss[~hex]
*
* (older versions had this as [[yyyy-]mm-]nn-ddd-hh:mm:ss[~hex]
* but that faile on Windows)
*
* which describes the date and time of a dive (yyyy and mm
* are optional, and may be encoded in the path leading up to
* the dive).
*
* - It is a per-dive picture directory ("Pictures")
*
* - It's some random non-dive-data directory.
*
* If it doesn't match the above patterns, we'll ignore them
* for dive loading purposes, and not even recurse into them.
*/
static int walk_tree_directory(const char *root, const git_tree_entry *entry, struct git_parser_state *state)
{
const char *name = git_tree_entry_name(entry);
int digits = 0, len;
char c;
if (!strcmp(name, "Pictures"))
return picture_directory(root, name, state);
if (!strcmp(name, "01-Divesites"))
return GIT_WALK_OK;
if (!strcmp(name, "02-Filterpresets"))
return GIT_WALK_OK;
while (isdigit(c = name[digits]))
digits++;
/* Doesn't start with two or four digits? Skip */
if (digits != 4 && digits != 2)
return GIT_WALK_SKIP;
/* Only digits? Do nothing, but recurse into it */
if (!c)
return GIT_WALK_OK;
/* All valid cases need to have a slash following */
if (c != '-')
return GIT_WALK_SKIP;
/* Do a quick check for a common dive case */
len = nonunique_length(name);
/*
* We know the len is at least 3, because we had at least
* two digits and a dash
*/
if (name[len-3] == ':' || name[len-3] == '=')
return dive_directory(root, entry, name, len-8, state);
if (digits != 2)
return GIT_WALK_SKIP;
return dive_trip_directory(root, name, state);
}
static git_blob *git_tree_entry_blob(git_repository *repo, const git_tree_entry *entry)
{
const git_oid *id = git_tree_entry_id(entry);
git_blob *blob;
if (git_blob_lookup(&blob, repo, id))
return NULL;
return blob;
}
static struct divecomputer *create_new_dc(struct dive *dive)
{
struct divecomputer *dc = &dive->dcs.back();
/* Did we already fill that in? */
if (!dc->samples.empty() || !dc->model.empty() || dc->when) {
dive->dcs.emplace_back();
dc = &dive->dcs.back();
}
dc->when = dive->when;
dc->duration = dive->duration;
return dc;
}
/*
* We should *really* try to delay the dive computer data parsing
* until necessary, in order to reduce load-time. The parsing is
* cheap, but the loading of the git blob into memory can be pretty
* costly.
*/
static int parse_divecomputer_entry(struct git_parser_state *state, const git_tree_entry *entry, const char *)
{
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read divecomputer file");
state->active_dc = create_new_dc(state->active_dive.get());
for_each_line(blob, divecomputer_parser, state);
git_blob_free(blob);
state->active_dc = NULL;
return 0;
}
/*
* NOTE! The "git_id" for the dive is the hash for the whole dive directory.
* As such, it covers not just the dive, but the divecomputers and the
* pictures too. So if any of the dive computers change, the dive cache
* has to be invalidated too.
*/
static int parse_dive_entry(struct git_parser_state *state, const git_tree_entry *entry, const char *suffix)
{
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read dive file");
if (*suffix)
state->active_dive->number = atoi(suffix + 1);
state->active_dive->weightsystems.clear();
state->o2pressure_sensor = 1;
for_each_line(blob, dive_parser, state);
git_blob_free(blob);
return 0;
}
static int parse_site_entry(struct git_parser_state *state, const git_tree_entry *entry, const char *suffix)
{
if (*suffix == '\0')
return report_error("Dive site without uuid");
uint32_t uuid = strtoul(suffix, NULL, 16);
state->active_site = state->log->sites.alloc_or_get(uuid);
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read dive site file");
for_each_line(blob, site_parser, state);
state->active_site = NULL;
git_blob_free(blob);
return 0;
}
static int parse_trip_entry(struct git_parser_state *state, const git_tree_entry *entry)
{
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read trip file");
for_each_line(blob, trip_parser, state);
git_blob_free(blob);
return 0;
}
static int parse_settings_entry(struct git_parser_state *state, const git_tree_entry *entry)
{
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read settings file");
for_each_line(blob, settings_parser, state);
git_blob_free(blob);
return 0;
}
static int parse_picture_entry(struct git_parser_state *state, const git_tree_entry *entry, const char *name)
{
git_blob *blob;
int hh, mm, ss, offset;
char sign;
/*
* The format of the picture name files is just the offset within
* the dive in form [[+-]hh=mm=ss (previously [[+-]hh:mm:ss, but
* that didn't work on Windows), possibly followed by a hash to
* make the filename unique (which we can just ignore).
*/
if (sscanf(name, "%c%d:%d:%d", &sign, &hh, &mm, &ss) != 4 &&
sscanf(name, "%c%d=%d=%d", &sign, &hh, &mm, &ss) != 4)
return report_error("Unknown file name %s", name);
offset = ss + 60 * (mm + 60 * hh);
if (sign == '-')
offset = -offset;
blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read picture file");
state->active_pic.offset.seconds = offset;
for_each_line(blob, picture_parser, state);
add_picture(state->active_dive->pictures, std::move(state->active_pic));
git_blob_free(blob);
/* add_picture took ownership of the data -
* clear out our copy just to be sure. */
state->active_pic = picture();
return 0;
}
static int parse_filter_preset(struct git_parser_state *state, const git_tree_entry *entry)
{
git_blob *blob = git_tree_entry_blob(state->repo, entry);
if (!blob)
return report_error("Unable to read filter preset file");
state->active_filter = std::make_unique<filter_preset>();
for_each_line(blob, filter_preset_parser, state);
git_blob_free(blob);
state->log->filter_presets.add(*state->active_filter);
state->active_filter.reset();
return 0;
}
static int walk_tree_file(const char *root, const git_tree_entry *entry, struct git_parser_state *state)
{
auto &dive = state->active_dive;
auto &trip = state->active_trip;
const char *name = git_tree_entry_name(entry);
if (verbose > 1)
report_info("git load handling file %s\n", name);
switch (*name) {
case '-': case '+':
if (dive)
return parse_picture_entry(state, entry, name);
break;
case 'D':
if (dive && !strncmp(name, "Divecomputer", 12))
return parse_divecomputer_entry(state, entry, name + 12);
if (dive && !strncmp(name, "Dive", 4))
return parse_dive_entry(state, entry, name + 4);
break;
case 'P':
if (!strncmp(name, "Preset-", 7))
return parse_filter_preset(state, entry);
break;
case 'S':
if (!strncmp(name, "Site", 4))
return parse_site_entry(state, entry, name + 5);
break;
case '0':
if (trip && !strcmp(name, "00-Trip"))
return parse_trip_entry(state, entry);
if (!strcmp(name, "00-Subsurface"))
return parse_settings_entry(state, entry);
break;
}
report_error("Unknown file %s%s (%p %p)", root, name, dive.get(), trip.get());
return GIT_WALK_SKIP;
}
static int walk_tree_cb(const char *root, const git_tree_entry *entry, void *payload)
{
struct git_parser_state *state = (git_parser_state *)payload;
git_filemode_t mode = git_tree_entry_filemode(entry);
if (mode == GIT_FILEMODE_TREE)
return walk_tree_directory(root, entry, state);
walk_tree_file(root, entry, state);
/* Ignore failed blob loads */
return GIT_WALK_OK;
}
static int load_dives_from_tree(git_repository *repo, git_tree *tree, struct git_parser_state *state)
{
git_tree_walk(tree, GIT_TREEWALK_PRE, walk_tree_cb, state);
return 0;
}
void clear_git_id()
{
saved_git_id.clear();
}
void set_git_id(const struct git_oid *id)
{
char git_id_buffer[GIT_OID_HEXSZ + 1];
git_oid_tostr(git_id_buffer, sizeof(git_id_buffer), id);
saved_git_id = git_id_buffer;
}
static int find_commit(git_repository *repo, const char *branch, git_commit **commit_p)
{
git_object *object;
if (git_revparse_single(&object, repo, branch))
return report_error("Unable to look up revision '%s'", branch);
if (git_object_peel((git_object **)commit_p, object, GIT_OBJ_COMMIT))
return report_error("Revision '%s' is not a valid commit", branch);
return 0;
}
static int do_git_load(git_repository *repo, const char *branch, struct git_parser_state *state)
{
int ret;
git_commit *commit;
git_tree *tree;
ret = find_commit(repo, branch, &commit);
if (ret)
return ret;
if (git_commit_tree(&tree, commit))
return report_error("Could not look up tree of commit in branch '%s'", branch);
git_storage_update_progress(translate("gettextFromC", "Load dives from local cache"));
ret = load_dives_from_tree(repo, tree, state);
if (!ret) {
set_git_id(git_commit_id(commit));
git_storage_update_progress(translate("gettextFromC", "Successfully opened dive data"));
}
git_object_free((git_object *)tree);
return ret;
}
std::string get_sha(git_repository *repo, const std::string &branch)
{
char git_id_buffer[GIT_OID_HEXSZ + 1];
git_commit *commit;
if (find_commit(repo, branch.c_str(), &commit))
return std::string();
git_oid_tostr(git_id_buffer, sizeof(git_id_buffer), (const git_oid *)commit);
return std::string(git_id_buffer);
}
/*
* Like git_save_dives(), this silently returns a negative
* value if it's not a git repository at all (so that you
* can try to load it some other way.
*
* If it is a git repository, we return zero for success,
* or report an error and return 1 if the load failed.
*/
int git_load_dives(struct git_info *info, struct divelog *log)
{
int ret;
struct git_parser_state state;
state.repo = info->repo;
state.log = log;
if (!info->repo)
return report_error("Unable to open git repository '%s[%s]'", info->url.c_str(), info->branch.c_str());
ret = do_git_load(info->repo, info->branch.c_str(), &state);
finish_active_dive(&state);
finish_active_trip(&state);
return ret;
}