From 1d2a430b807191e6b43fb3b44efb5b8afe747ccd Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Sat, 2 Nov 2024 18:23:59 +0100 Subject: [PATCH 1/2] Slight refactor of DAN parse code Mostly NFC; this commit is mainly to get familiar with the codebase and to meet the people who will review these changes. I hope to make some changes to the DAN parsing code to eventually extract more metainfo from my aqualung divecomputer's `.zxu` formatted logs. To do so, and for me to be able to work on this efficiently, I've refactored the DAN parsing code using a bit more modern C++-style, as well as being more true-to-spec wrt. the (...ancient) DAN file format documentation that i could dig up... hopefully that's an alright tradeoff for the project. This more true-to-spec parsing also fixed a bug with the number being parsed from the incorrect index in the ZDH vector (or, atleast i consider it a bug - the "Export sequence" number was being used as the dive number, instead of the "Internal Dive Sequence" number. The latter, described in the spec as: `The sequence number assigned to the dive by the recording computer`). Also contains some unrelated formatting changes; i tried to keep these minimal (i presume these files haven't been touched in a while by `clang-format`). Signed-off-by: Morten Borup Petersen --- core/import-csv.cpp | 348 ++++++++++++++++++++++++++++---------------- core/xmlparams.cpp | 9 +- core/xmlparams.h | 3 +- dives/DL7.zxu | 6 +- tests/testparse.cpp | 9 +- 5 files changed, 238 insertions(+), 137 deletions(-) diff --git a/core/import-csv.cpp b/core/import-csv.cpp index 281653d6d..dcb43baad 100644 --- a/core/import-csv.cpp +++ b/core/import-csv.cpp @@ -1,21 +1,21 @@ -#include -#include #include #include +#include +#include +#include #include "dive.h" -#include "errorhelper.h" -#include "subsurface-string.h" #include "divelist.h" #include "divelog.h" +#include "errorhelper.h" #include "file.h" #include "format.h" -#include "parse.h" -#include "sample.h" -#include "divelist.h" #include "gettext.h" #include "import-csv.h" +#include "parse.h" #include "qthelper.h" +#include "sample.h" +#include "subsurface-string.h" #include "xmlparams.h" #define MATCH(buffer, pattern) \ @@ -107,18 +107,205 @@ static char *parse_dan_new_line(char *buf, const char *NL) } static int try_to_xslt_open_csv(const char *filename, std::string &mem, const char *tag); + +static int parse_csv_line(char *&ptr, const char *NL, char delim, std::vector &fields) +{ + char *line_end = strstr(ptr, NL); // Find the end of the line using the newline string + bool withNL = line_end; + + if (!line_end) { + // EOF - set line_end to end of 'ptr' + line_end = ptr + strlen(ptr); + } + + // Create a temporary pointer to traverse the line + char *field_start = ptr; + char *field_end = nullptr; + + // Skip leading delimiter + if (*field_start == delim) { + field_start++; + } else { + return report_error("DEBUG: No leading delimiter found"); + } + + while (field_start < line_end) { + // Find the next delimiter or end of line + field_end = static_cast(memchr(field_start, delim, line_end - field_start)); + + if (field_end) { + // If we found a delimiter, extract the field + fields.emplace_back(field_start, field_end - field_start); + // Move to the next character after the delimiter + field_start = field_end + 1; + } else { + // If no more delimiters, add the last field + fields.emplace_back(field_start, line_end - field_start); + break; + } + } + + // Update the pointer to point to the next line + ptr = line_end; + if (withNL) + ptr += strlen(NL); + return 0; +} + + +// Parses a line of DAN data fields (| separated). The provided 'fields' mapping +// will get filled with as many fields as are found in the line. +static int parse_dan_fields( + const char *NL, + std::map &fields, + char *&ptr) +{ + std::vector csv_fields; + if (parse_csv_line(ptr, NL, '|', csv_fields) < 0) + return -1; + + if (csv_fields.size() > fields.size()) { + report_info("DEBUG: More DAN fields than expected"); + return -1; + } + + for (size_t i = 0; i < csv_fields.size(); i++) { + fields[i] = csv_fields[i]; + } + + return 0; +} + + +// Parses the DAN ZDH dive header. +static int parse_dan_zdh(const char *NL, struct xml_params *params, char *&ptr) +{ + // Skip the leading 'ZDH' + ptr += 3; + + std::string temp; + + // Parse all fields - we only use a subset of them, but parse all for code maintain- and debugability. + enum ZDH_FIELD { + EXPORT_SEQUENCE, + INTERNAL_DIVE_SEQUENCE, + RECORD_TYPE, + RECORDING_INTERVAL, + LEAVE_SURFACE, + AIR_TEMPERATURE, + TANK_VOLUME, + O2_MODE, + REBREATHER_DILUENT_GAS, + ALTITUDE, + }; + std::map fields = { + {EXPORT_SEQUENCE, ""}, + {INTERNAL_DIVE_SEQUENCE, ""}, + {RECORD_TYPE, ""}, + {RECORDING_INTERVAL, ""}, + {LEAVE_SURFACE, ""}, + {AIR_TEMPERATURE, ""}, + {TANK_VOLUME, ""}, + {O2_MODE, ""}, + {REBREATHER_DILUENT_GAS, ""}, + {ALTITUDE, ""}, + }; + + if (parse_dan_fields(NL, fields, ptr) < 0) + return -1; + + // Add relevant fields to the XML parameters. + + // Parse date. 'leaveSurface' should (per the spec) be provided in + // the format "YYYYMMDDHHMMSS", but old code used to allow for just parsing + // the date... so we'll do that here as well. + auto &leaveSurface = fields[LEAVE_SURFACE]; + if (leaveSurface.length() >= 8) { + xml_params_add(params, "date", leaveSurface.substr(0, 8)); + } + + // Parse time with "1" prefix + if (leaveSurface.length() >= 14) { + std::string time_str = "1" + leaveSurface.substr(8, 6); + xml_params_add(params, "time", time_str); + } + + xml_params_add(params, "airTemp", fields[AIR_TEMPERATURE]); + xml_params_add(params, "diveNro", fields[INTERNAL_DIVE_SEQUENCE]); + + return 0; +} + +// Parse the DAN ZDT dive trailer. +static int parse_dan_zdt(const char *NL, struct xml_params *params, char *&ptr) +{ + // Skip the leading 'ZDT' + ptr += 3; + + enum ZDT_FIELD { + EXPORT_SEQUENCE, + INTERNAL_DIVE_SEQUENCE, + MAX_DEPTH, + REACH_SURFACE, + MIN_WATER_TEMP, + PRESSURE_DROP, + }; + + std::map fields = { + {EXPORT_SEQUENCE, ""}, + {INTERNAL_DIVE_SEQUENCE, ""}, + {MAX_DEPTH, ""}, + {REACH_SURFACE, ""}, + {MIN_WATER_TEMP, ""}, + {PRESSURE_DROP, ""}, + }; + + if (parse_dan_fields(NL, fields, ptr) < 0) + return -1; + + // Add relevant fields to the XML parameters. + xml_params_add(params, "waterTemp", fields[MIN_WATER_TEMP]); + + return 0; +} + +static int parse_dan_zdp(const char *NL, const char *filename, struct xml_params *params, char *&ptr, std::string &mem_csv) +{ + if (strncmp(ptr, "ZDP{", 4) != 0) + return report_error("DEBUG: Failed to find start of ZDP"); + + if (ptr && ptr[4] == '}') + return report_error(translate("gettextFromC", "No dive profile found from '%s'"), filename); + + ptr = parse_dan_new_line(ptr, NL); + if (!ptr) + return -1; + + // We're now in the ZDP segment. Look for the end of it. + char *end_ptr = strstr(ptr, "ZDP}"); + if (!end_ptr) { + return report_error("DEBUG: failed to find end of ZDP"); + } + + /* Copy the current dive data to start of mem_csv buffer */ + mem_csv = std::string(ptr, end_ptr - ptr); + + // Skip the trailing 'ZDP}' line. + ptr = end_ptr; + ptr = parse_dan_new_line(end_ptr, NL); + return 0; +} + static int parse_dan_format(const char *filename, struct xml_params *params, struct divelog *log) { - int ret = 0, i; - size_t end_ptr = 0; - char tmpbuf[MAXCOLDIGITS]; + int ret = 0; int params_orig_size = xml_params_count(params); char *ptr = NULL; const char *NL = NULL; - char *iter = NULL; auto [mem, err] = readfile(filename); + const char *end = mem.data() + mem.size(); if (err < 0) return report_error(translate("gettextFromC", "Failed to read '%s'"), filename); @@ -132,133 +319,36 @@ static int parse_dan_format(const char *filename, struct xml_params *params, str return -1; } - while ((end_ptr < mem.size()) && (ptr = strstr(mem.data() + end_ptr, "ZDH"))) { - xml_params_resize(params, params_orig_size); // restart with original parameter block - char *iter_end = NULL; - iter = ptr + 4; - iter = strchr(iter, '|'); - if (iter) { - memcpy(tmpbuf, ptr + 4, iter - ptr - 4); - tmpbuf[iter - ptr - 4] = 0; - xml_params_add(params, "diveNro", tmpbuf); - } + // Iteratively parse ZDH, ZDP and ZDT fields, which together comprise a list of dives. + while (ptr < end) { + xml_params_resize(params, params_orig_size); // Restart with original parameter block - //report_info("DEBUG: BEGIN end_ptr %d round %d <%s>", end_ptr, j++, ptr); - iter = ptr + 1; - for (i = 0; i <= 4 && iter; ++i) { - iter = strchr(iter, '|'); - if (iter) - ++iter; - } - - if (!iter) { - report_info("DEBUG: Data corrupt"); - return -1; - } - - /* Setting date */ - memcpy(tmpbuf, iter, 8); - tmpbuf[8] = 0; - xml_params_add(params, "date", tmpbuf); - - /* Setting time, gotta prepend it with 1 to - * avoid octal parsing (this is stripped out in - * XSLT */ - tmpbuf[0] = '1'; - memcpy(tmpbuf + 1, iter + 8, 6); - tmpbuf[7] = 0; - xml_params_add(params, "time", tmpbuf); - - /* Air temperature */ - memset(tmpbuf, 0, sizeof(tmpbuf)); - iter = strchr(iter, '|'); - - if (iter) { - iter = iter + 1; - iter_end = strchr(iter, '|'); - - if (iter_end) { - memcpy(tmpbuf, iter, iter_end - iter); - xml_params_add(params, "airTemp", tmpbuf); - } - } - - /* Search for the next line */ - if (iter) - iter = parse_dan_new_line(iter, NL); - if (!iter) - return -1; - - /* We got a trailer, no samples on this dive */ - if (strncmp(iter, "ZDT", 3) == 0) { - end_ptr = iter - mem.data(); - - /* Water temperature */ - memset(tmpbuf, 0, sizeof(tmpbuf)); - for (i = 0; i < 5 && iter; ++i) - iter = strchr(iter + 1, '|'); - - if (iter) { - iter = iter + 1; - iter_end = strchr(iter, '|'); - - if (iter_end) { - memcpy(tmpbuf, iter, iter_end - iter); - xml_params_add(params, "waterTemp", tmpbuf); - } - } - ret |= parse_xml_buffer(filename, "", 11, log, params); - continue; - } - - /* After ZDH we should get either ZDT (above) or ZDP */ - if (strncmp(iter, "ZDP{", 4) != 0) { - report_info("DEBUG: Input appears to violate DL7 specification"); - end_ptr = iter - mem.data(); - continue; - } - - if (ptr && ptr[4] == '}') - return report_error(translate("gettextFromC", "No dive profile found from '%s'"), filename); - - if (ptr) + // Locate the ZDH header. + while (strncmp(ptr, "ZDH", 3) != 0) { ptr = parse_dan_new_line(ptr, NL); - if (!ptr) - return -1; + if (!ptr) + return report_error("Expected ZDH header not found"); + } - end_ptr = ptr - mem.data(); + if (int ret = parse_dan_zdh(NL, params, ptr); ret < 0) + return ret; - /* Copy the current dive data to start of mem_csv buffer */ - std::string mem_csv(ptr, mem.size() - (ptr - mem.data())); + // Attempt to parse the ZDP field (optional) + std::string mem_csv; + if (strncmp(ptr, "ZDP", 3) == 0) { + if (int ret = parse_dan_zdp(NL, filename, params, ptr, mem_csv); ret < 0) + return ret; + } - ptr = strstr(mem_csv.data(), "ZDP}"); - if (ptr) { - *ptr = 0; + // Parse the mandatorty ZDT field + if (strncmp(ptr, "ZDT", 3) == 0) { + if (int ret = parse_dan_zdt(NL, params, ptr); ret < 0) + return ret; } else { - report_info("DEBUG: failed to find end ZDP"); - return -1; + return report_error("Expected ZDT trailer not found"); } - mem_csv.resize(ptr - mem_csv.data()); - end_ptr += ptr - mem_csv.data(); - iter = parse_dan_new_line(ptr + 1, NL); - if (iter && strncmp(iter, "ZDT", 3) == 0) { - /* Water temperature */ - memset(tmpbuf, 0, sizeof(tmpbuf)); - for (i = 0; i < 5 && iter; ++i) - iter = strchr(iter + 1, '|'); - - if (iter) { - iter = iter + 1; - iter_end = strchr(iter, '|'); - - if (iter_end) { - memcpy(tmpbuf, iter, iter_end - iter); - xml_params_add(params, "waterTemp", tmpbuf); - } - } - } if (try_to_xslt_open_csv(filename, mem_csv, "csv")) return -1; diff --git a/core/xmlparams.cpp b/core/xmlparams.cpp index 3ab6f1c4c..ca9c0fb52 100644 --- a/core/xmlparams.cpp +++ b/core/xmlparams.cpp @@ -18,12 +18,17 @@ void xml_params_resize(struct xml_params *params, int count) void xml_params_add(struct xml_params *params, const char *key, const char *value) { - params->items.push_back({ std::string(key), std::string(value) }); + xml_params_add(params, std::string(key), std::string(value)); +} + +void xml_params_add(struct xml_params *params, const std::string &key, const std::string &value) +{ + params->items.push_back({key, value}); } void xml_params_add_int(struct xml_params *params, const char *key, int value) { - params->items.push_back({ std::string(key), std::to_string(value) }); + params->items.push_back({std::string(key), std::to_string(value)}); } int xml_params_count(const struct xml_params *params) diff --git a/core/xmlparams.h b/core/xmlparams.h index e91a30ce0..146f9ca3e 100644 --- a/core/xmlparams.h +++ b/core/xmlparams.h @@ -18,9 +18,10 @@ extern struct xml_params *alloc_xml_params(); extern void free_xml_params(struct xml_params *params); extern void xml_params_resize(struct xml_params *params, int count); extern void xml_params_add(struct xml_params *params, const char *key, const char *value); +extern void xml_params_add(struct xml_params *params, const std::string &key, const std::string &value); extern void xml_params_add_int(struct xml_params *params, const char *key, int value); extern int xml_params_count(const struct xml_params *params); -extern const char *xml_params_get_key(const struct xml_params *params, int idx); // not stable +extern const char *xml_params_get_key(const struct xml_params *params, int idx); // not stable extern const char *xml_params_get_value(const struct xml_params *params, int idx); // not stable extern void xml_params_set_value(struct xml_params *params, int idx, const char *value); extern const char **xml_params_get(const struct xml_params *params); // not stable diff --git a/dives/DL7.zxu b/dives/DL7.zxu index 49700fcc7..4f6499651 100644 --- a/dives/DL7.zxu +++ b/dives/DL7.zxu @@ -9,6 +9,6 @@ ZDP{ |3300|10||||| |3600|0||||| ZDP} -ZDT|2|2|10.0|20180102110000|25|| -ZDH|3|3|I|QS|20180103101000|28|11|FO2||| -ZDT|3|3|10.0|20180103102000|26|| +ZDT|1|2|10.0|20180102110000|25|| +ZDH|1|3|I|QS|20180103101000|28|11|FO2||| +ZDT|1|3|10.0|20180103102000|26|| diff --git a/tests/testparse.cpp b/tests/testparse.cpp index c75a8bb49..4afa0b42c 100644 --- a/tests/testparse.cpp +++ b/tests/testparse.cpp @@ -5,12 +5,12 @@ #include "core/divelog.h" #include "core/divesite.h" #include "core/errorhelper.h" -#include "core/trip.h" #include "core/file.h" #include "core/import-csv.h" #include "core/parse.h" #include "core/qthelper.h" #include "core/subsurface-string.h" +#include "core/trip.h" #include "core/xmlparams.h" #include @@ -224,7 +224,8 @@ void TestParse::testParseNewFormat() "/dives/") .append(files.at(i)) .toLatin1() - .data(), &divelog), + .data(), + &divelog), 0); QCOMPARE(divelog.dives.size(), i + 1); } @@ -452,7 +453,11 @@ void TestParse::parseDL7() QCOMPARE(parse_csv_file(SUBSURFACE_TEST_DATA "/dives/DL7.zxu", ¶ms, "DL7", &divelog), 0); + QCOMPARE(divelog.dives.size(), 3); + QCOMPARE(divelog.dives[0]->number, 1); + QCOMPARE(divelog.dives[1]->number, 2); + QCOMPARE(divelog.dives[2]->number, 3); QCOMPARE(save_dives("./testdl7out.ssrf"), 0); FILE_COMPARE("./testdl7out.ssrf", From 407beefad6cd8668c3891947e6225b8c152312e1 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 11 Nov 2024 16:58:34 +0100 Subject: [PATCH 2/2] fix default mem_csv value Signed-off-by: Morten Borup Petersen --- core/import-csv.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/import-csv.cpp b/core/import-csv.cpp index dcb43baad..36d7e5d94 100644 --- a/core/import-csv.cpp +++ b/core/import-csv.cpp @@ -350,8 +350,12 @@ static int parse_dan_format(const char *filename, struct xml_params *params, str } + if (mem_csv.empty()) { + mem_csv = ""; + } else { if (try_to_xslt_open_csv(filename, mem_csv, "csv")) return -1; + } ret |= parse_xml_buffer(filename, mem_csv.data(), mem_csv.size(), log, params); }