subsurface/parse-xml.c
Linus Torvalds 0eb53fab52 Flesh out the UDDF xml parsing a bit more
Commit 28aba5a206 ("Flesh out the UDDF xml parsing a bit more")
improved on parsing UDDF files by teaching "percent()" to also handle
pure fractions like UDDF uses. So in a UDDF file, an o2 value of "1.0"
means "100%".

But it turns out that I have a few dives with "1% He", and the "Turn
fractions into percent" logic also turns that into 100%.

So this makes the 'percent()' function a bit smarter. If it actually
finds a percentage-sign after the number, it knows it is already
percent, not a fraction. That disambiguates the two cases: "1.0" is
100%, but "1.0%" (note the explicit percentage sign) is 1%.

So now our native format cannot get confused, because it generally
tries to avoid naked numbers. Good choice.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
2013-02-22 20:17:39 -08:00

1625 lines
37 KiB
C

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <assert.h>
#define __USE_XOPEN
#include <time.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#ifdef XSLT
#include <libxslt/transform.h>
#endif
#include <glib/gi18n.h>
#include "dive.h"
#include "device.h"
int verbose;
static xmlDoc *test_xslt_transforms(xmlDoc *doc, GError **error);
/* the dive table holds the overall dive list; target table points at
* the table we are currently filling */
struct dive_table dive_table;
struct dive_table *target_table = NULL;
static void parser_error(GError **error, const char *fmt, ...)
{
va_list args;
if (!error)
return;
va_start(args, fmt);
*error = g_error_new_valist(g_quark_from_string("subsurface"), DIVE_ERROR_PARSE, fmt, args);
va_end(args);
}
/*
* Add a dive into the dive_table array
*/
static void record_dive_to_table(struct dive *dive, struct dive_table *table)
{
assert(table != NULL);
int nr = table->nr, allocated = table->allocated;
struct dive **dives = table->dives;
if (nr >= allocated) {
allocated = (nr + 32) * 3 / 2;
dives = realloc(dives, allocated * sizeof(struct dive *));
if (!dives)
exit(1);
table->dives = dives;
table->allocated = allocated;
}
dives[nr] = fixup_dive(dive);
table->nr = nr+1;
}
void record_dive(struct dive *dive)
{
record_dive_to_table(dive, &dive_table);
}
static void start_match(const char *type, const char *name, char *buffer)
{
if (verbose > 2)
printf("Matching %s '%s' (%s)\n",
type, name, buffer);
}
static void nonmatch(const char *type, const char *name, char *buffer)
{
if (verbose > 1)
printf("Unable to match %s '%s' (%s)\n",
type, name, buffer);
}
typedef void (*matchfn_t)(char *buffer, void *);
static int match(const char *pattern, int plen,
const char *name, int nlen,
matchfn_t fn, char *buf, void *data)
{
if (plen > nlen)
return 0;
if (memcmp(pattern, name + nlen - plen, plen))
return 0;
fn(buf, data);
return 1;
}
struct units xml_parsing_units;
const struct units SI_units = SI_UNITS;
const struct units IMPERIAL_units = IMPERIAL_UNITS;
/*
* Dive info as it is being built up..
*/
static struct divecomputer *cur_dc;
static struct dive *cur_dive;
static dive_trip_t *cur_trip = NULL;
static struct sample *cur_sample;
static struct {
int active;
duration_t time;
int type, flags, value;
const char *name;
} cur_event;
static struct {
struct {
const char *model;
uint32_t deviceid;
const char *nickname, *serial_nr, *firmware;
} dc;
} cur_settings;
static gboolean in_settings = FALSE;
static struct tm cur_tm;
static int cur_cylinder_index, cur_ws_index;
static int lastndl, laststoptime, laststopdepth, lastcns, lastpo2, lastindeco;
static int lastcylinderindex, lastsensor;
/*
* If we don't have an explicit dive computer,
* we use the implicit one that every dive has..
*/
static struct divecomputer *get_dc(void)
{
return cur_dc ? : &cur_dive->dc;
}
static enum import_source {
UNKNOWN,
LIBDIVECOMPUTER,
DIVINGLOG,
UDDF,
} import_source;
static void divedate(char *buffer, void *_when)
{
int d,m,y;
int hh,mm,ss;
timestamp_t *when = _when;
hh = 0; mm = 0; ss = 0;
if (sscanf(buffer, "%d.%d.%d %d:%d:%d", &d, &m, &y, &hh, &mm, &ss) >= 3) {
/* This is ok, and we got at least the date */
} else if (sscanf(buffer, "%d-%d-%d %d:%d:%d", &y, &m, &d, &hh, &mm, &ss) >= 3) {
/* This is also ok */
} else {
fprintf(stderr, "Unable to parse date '%s'\n", buffer);
return;
}
cur_tm.tm_year = y;
cur_tm.tm_mon = m-1;
cur_tm.tm_mday = d;
cur_tm.tm_hour = hh;
cur_tm.tm_min = mm;
cur_tm.tm_sec = ss;
*when = utc_mktime(&cur_tm);
}
static void divetime(char *buffer, void *_when)
{
int h,m,s = 0;
timestamp_t *when = _when;
if (sscanf(buffer, "%d:%d:%d", &h, &m, &s) >= 2) {
cur_tm.tm_hour = h;
cur_tm.tm_min = m;
cur_tm.tm_sec = s;
*when = utc_mktime(&cur_tm);
}
}
/* Libdivecomputer: "2011-03-20 10:22:38" */
static void divedatetime(char *buffer, void *_when)
{
int y,m,d;
int hr,min,sec;
timestamp_t *when = _when;
if (sscanf(buffer, "%d-%d-%d %d:%d:%d",
&y, &m, &d, &hr, &min, &sec) == 6) {
cur_tm.tm_year = y;
cur_tm.tm_mon = m-1;
cur_tm.tm_mday = d;
cur_tm.tm_hour = hr;
cur_tm.tm_min = min;
cur_tm.tm_sec = sec;
*when = utc_mktime(&cur_tm);
}
}
enum number_type {
NEITHER,
FLOAT
};
static enum number_type parse_float(char *buffer, double *res, char **endp)
{
double val;
errno = 0;
val = g_ascii_strtod(buffer, endp);
if (errno || *endp == buffer)
return NEITHER;
*res = val;
return FLOAT;
}
union int_or_float {
double fp;
};
static enum number_type integer_or_float(char *buffer, union int_or_float *res)
{
char *end;
return parse_float(buffer, &res->fp, &end);
}
static void pressure(char *buffer, void *_press)
{
double mbar;
pressure_t *pressure = _press;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
/* Just ignore zero values */
if (!val.fp)
break;
switch (xml_parsing_units.pressure) {
case PASCAL:
mbar = val.fp / 100;
break;
case BAR:
/* Assume mbar, but if it's really small, it's bar */
mbar = val.fp;
if (mbar < 5000)
mbar = mbar * 1000;
break;
case PSI:
mbar = val.fp * 68.95;
break;
}
if (mbar > 5 && mbar < 500000) {
pressure->mbar = mbar + 0.5;
break;
}
/* fallthrough */
default:
printf("Strange pressure reading %s\n", buffer);
}
}
static void salinity(char *buffer, void *_salinity)
{
int *salinity = _salinity;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
*salinity = val.fp * 10.0 + 0.5;
break;
default:
printf("Strange salinity reading %s\n", buffer);
}
}
static void depth(char *buffer, void *_depth)
{
depth_t *depth = _depth;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
switch (xml_parsing_units.length) {
case METERS:
depth->mm = val.fp * 1000 + 0.5;
break;
case FEET:
depth->mm = val.fp * 304.8 + 0.5;
break;
}
break;
default:
printf("Strange depth reading %s\n", buffer);
}
}
static void weight(char *buffer, void *_weight)
{
weight_t *weight = _weight;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
switch (xml_parsing_units.weight) {
case KG:
weight->grams = val.fp * 1000 + 0.5;
break;
case LBS:
weight->grams = val.fp * 453.6 + 0.5;
break;
}
break;
default:
printf("Strange weight reading %s\n", buffer);
}
}
static void temperature(char *buffer, void *_temperature)
{
temperature_t *temperature = _temperature;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
switch (xml_parsing_units.temperature) {
case KELVIN:
temperature->mkelvin = val.fp * 1000;
break;
case CELSIUS:
temperature->mkelvin = val.fp * 1000 + ZERO_C_IN_MKELVIN + 0.5;
break;
case FAHRENHEIT:
temperature->mkelvin = (val.fp + 459.67) * 5000/9;
break;
}
break;
default:
printf("Strange temperature reading %s\n", buffer);
}
}
static void sampletime(char *buffer, void *_time)
{
int i;
int min, sec;
duration_t *time = _time;
i = sscanf(buffer, "%d:%d", &min, &sec);
switch (i) {
case 1:
sec = min;
min = 0;
/* fallthrough */
case 2:
time->seconds = sec + min*60;
break;
default:
printf("Strange sample time reading %s\n", buffer);
}
}
static void duration(char *buffer, void *_time)
{
sampletime(buffer, _time);
}
static void percent(char *buffer, void *_fraction)
{
fraction_t *fraction = _fraction;
double val;
char *end;
switch (parse_float(buffer, &val, &end)) {
case FLOAT:
/* Turn fractions into percent unless explicit.. */
if (val <= 1.0) {
while (isspace(*end))
end++;
if (*end != '%')
val *= 100;
}
/* Then turn percent into our integer permille format */
if (val >= 0 && val <= 100.0) {
fraction->permille = val * 10 + 0.5;
break;
}
default:
printf("Strange percentage reading %s\n", buffer);
break;
}
}
static void gasmix(char *buffer, void *_fraction)
{
/* libdivecomputer does negative percentages. */
if (*buffer == '-')
return;
if (cur_cylinder_index < MAX_CYLINDERS)
percent(buffer, _fraction);
}
static void gasmix_nitrogen(char *buffer, void *_gasmix)
{
/* Ignore n2 percentages. There's no value in them. */
}
static void cylindersize(char *buffer, void *_volume)
{
volume_t *volume = _volume;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
volume->mliter = val.fp * 1000 + 0.5;
break;
default:
printf("Strange volume reading %s\n", buffer);
break;
}
}
static void utf8_string(char *buffer, void *_res)
{
int size;
char *res;
while (isspace(*buffer))
buffer++;
size = strlen(buffer);
while (size && isspace(buffer[size-1]))
size--;
if (!size)
return;
res = malloc(size + 1);
memcpy(res, buffer, size);
res[size] = 0;
*(char **)_res = res;
}
#define MATCH(pattern, fn, dest) \
match(pattern, strlen(pattern), name, len, fn, buf, dest)
static void get_index(char *buffer, void *_i)
{
int *i = _i;
*i = atoi(buffer);
}
static void get_rating(char *buffer, void *_i)
{
int *i = _i;
int j = atoi(buffer);
if (j >= 0 && j <= 5) {
*i = j;
}
}
static void double_to_permil(char *buffer, void *_i)
{
int *i = _i;
*i = g_ascii_strtod(buffer, NULL) * 1000.0 + 0.5;
}
static void hex_value(char *buffer, void *_i)
{
uint32_t *i = _i;
*i = strtol(buffer, NULL, 16);
}
static void get_tripflag(char *buffer, void *_tf)
{
tripflag_t *tf = _tf;
*tf = strcmp(buffer, "NOTRIP") ? TF_NONE : NO_TRIP;
}
/*
* Divinglog is crazy. The temperatures are in celsius. EXCEPT
* for the sample temperatures, that are in Fahrenheit.
* WTF?
*
* Oh, and I think Diving Log *internally* probably kept them
* in celsius, because I'm seeing entries like
*
* <Temp>32.0</Temp>
*
* in there. Which is freezing, aka 0 degC. I bet the "0" is
* what Diving Log uses for "no temperature".
*
* So throw away crap like that.
*
* It gets worse. Sometimes the sample temperatures are in
* Celsius, which apparently happens if you are in a SI
* locale. So we now do:
*
* - temperatures < 32.0 == Celsius
* - temperature == 32.0 -> garbage, it's a missing temperature (zero converted from C to F)
* - temperatures > 32.0 == Fahrenheit
*/
static void fahrenheit(char *buffer, void *_temperature)
{
temperature_t *temperature = _temperature;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
/* Floating point equality is evil, but works for small integers */
if (val.fp == 32.0)
break;
if (val.fp < 32.0)
temperature->mkelvin = C_to_mkelvin(val.fp);
else
temperature->mkelvin = F_to_mkelvin(val.fp);
break;
default:
fprintf(stderr, "Crazy Diving Log temperature reading %s\n", buffer);
}
}
/*
* Did I mention how bat-shit crazy divinglog is? The sample
* pressures are in PSI. But the tank working pressure is in
* bar. WTF^2?
*
* Crazy stuff like this is why subsurface has everything in
* these inconvenient typed structures, and you have to say
* "pressure->mbar" to get the actual value. Exactly so that
* you can never have unit confusion.
*
* It gets worse: sometimes apparently the pressures are in
* bar, sometimes in psi. Dirk suspects that this may be a
* DivingLog Uemis importer bug, and that they are always
* supposed to be in bar, but that the importer got the
* sample importing wrong.
*
* Sadly, there's no way to really tell. So I think we just
* have to have some arbitrary cut-off point where we assume
* that smaller values mean bar.. Not good.
*/
static void psi_or_bar(char *buffer, void *_pressure)
{
pressure_t *pressure = _pressure;
union int_or_float val;
switch (integer_or_float(buffer, &val)) {
case FLOAT:
if (val.fp > 400)
pressure->mbar = psi_to_mbar(val.fp);
else
pressure->mbar = val.fp * 1000 + 0.5;
break;
default:
fprintf(stderr, "Crazy Diving Log PSI reading %s\n", buffer);
}
}
static int divinglog_fill_sample(struct sample *sample, const char *name, int len, char *buf)
{
return MATCH(".p.time", sampletime, &sample->time) ||
MATCH(".p.depth", depth, &sample->depth) ||
MATCH(".p.temp", fahrenheit, &sample->temperature) ||
MATCH(".p.press1", psi_or_bar, &sample->cylinderpressure) ||
0;
}
static void uddf_gasswitch(char *buffer, void *_sample)
{
struct sample *sample = _sample;
int idx = atoi(buffer);
int seconds = sample->time.seconds;
struct dive *dive = cur_dive;
struct divecomputer *dc = get_dc();
add_gas_switch_event(dive, dc, seconds, idx);
}
static int uddf_fill_sample(struct sample *sample, const char *name, int len, char *buf)
{
return MATCH(".divetime", sampletime, &sample->time) ||
MATCH(".depth", depth, &sample->depth) ||
MATCH(".temperature", temperature, &sample->temperature) ||
MATCH(".tankpressure", pressure, &sample->cylinderpressure) ||
MATCH(".switchmix.ref", uddf_gasswitch, sample) ||
0;
}
static void eventtime(char *buffer, void *_duration)
{
duration_t *duration = _duration;
sampletime(buffer, duration);
if (cur_sample)
duration->seconds += cur_sample->time.seconds;
}
static void try_to_match_autogroup(const char *name, char *buf)
{
int len = strlen(name);
int autogroupvalue;
start_match("autogroup", name, buf);
if (MATCH(".autogroup.state", get_index, &autogroupvalue)) {
set_autogroup(autogroupvalue);
return;
}
nonmatch("autogroup", name, buf);
}
static void try_to_fill_dc_settings(const char *name, char *buf)
{
int len = strlen(name);
start_match("divecomputerid", name, buf);
if (MATCH("divecomputerid.model", utf8_string, &cur_settings.dc.model))
return;
if (MATCH("divecomputerid.deviceid", hex_value, &cur_settings.dc.deviceid))
return;
if (MATCH("divecomputerid.nickname", utf8_string, &cur_settings.dc.nickname))
return;
if (MATCH("divecomputerid.serial", utf8_string, &cur_settings.dc.serial_nr))
return;
if (MATCH("divecomputerid.firmware", utf8_string, &cur_settings.dc.firmware))
return;
nonmatch("divecomputerid", name, buf);
}
static void try_to_fill_event(const char *name, char *buf)
{
int len = strlen(name);
start_match("event", name, buf);
if (MATCH(".event", utf8_string, &cur_event.name))
return;
if (MATCH(".name", utf8_string, &cur_event.name))
return;
if (MATCH(".time", eventtime, &cur_event.time))
return;
if (MATCH(".type", get_index, &cur_event.type))
return;
if (MATCH(".flags", get_index, &cur_event.flags))
return;
if (MATCH(".value", get_index, &cur_event.value))
return;
nonmatch("event", name, buf);
}
static int match_dc_data_fields(struct divecomputer *dc, const char *name, int len, char *buf)
{
if (MATCH(".maxdepth", depth, &dc->maxdepth))
return 1;
if (MATCH(".meandepth", depth, &dc->meandepth))
return 1;
if (MATCH(".depth.max", depth, &dc->maxdepth))
return 1;
if (MATCH(".depth.mean", depth, &dc->meandepth))
return 1;
if (MATCH(".duration", duration, &dc->duration))
return 1;
if (MATCH(".divetime", duration, &dc->duration))
return 1;
if (MATCH(".divetimesec", duration, &dc->duration))
return 1;
if (MATCH(".surfacetime", duration, &dc->surfacetime))
return 1;
if (MATCH(".airtemp", temperature, &dc->airtemp))
return 1;
if (MATCH(".watertemp", temperature, &dc->watertemp))
return 1;
if (MATCH(".temperature.air", temperature, &dc->airtemp))
return 1;
if (MATCH(".temperature.water", temperature, &dc->watertemp))
return 1;
if (MATCH(".surface.pressure", pressure, &dc->surface_pressure))
return 1;
if (MATCH(".water.salinity", salinity, &dc->salinity))
return 1;
return 0;
}
/* We're in the top-level dive xml. Try to convert whatever value to a dive value */
static void try_to_fill_dc(struct divecomputer *dc, const char *name, char *buf)
{
int len = strlen(name);
start_match("divecomputer", name, buf);
if (MATCH(".date", divedate, &dc->when))
return;
if (MATCH(".time", divetime, &dc->when))
return;
if (MATCH(".model", utf8_string, &dc->model))
return;
if (MATCH(".deviceid", hex_value, &dc->deviceid))
return;
if (MATCH(".diveid", hex_value, &dc->diveid))
return;
if (match_dc_data_fields(dc, name, len, buf))
return;
nonmatch("divecomputer", name, buf);
}
void add_gas_switch_event(struct dive *dive, struct divecomputer *dc, int seconds, int idx)
{
/* The gas switch event format is insane. It will be fixed, I think */
int o2 = dive->cylinder[idx].gasmix.o2.permille;
int he = dive->cylinder[idx].gasmix.he.permille;
int value;
if (!o2)
o2 = O2_IN_AIR;
o2 = (o2+5) / 10;
he = (he+5) / 10;
value = o2 + (he << 16);
add_event(dc, seconds, 11, 0, value, "gaschange");
}
static void get_cylinderindex(char *buffer, void *_i)
{
int *i = _i;
*i = atoi(buffer);
if (lastcylinderindex != *i) {
add_gas_switch_event(cur_dive, get_dc(), cur_sample->time.seconds, *i);
lastcylinderindex = *i;
}
}
static void get_sensor(char *buffer, void *_i)
{
int *i = _i;
*i = atoi(buffer);
lastsensor = *i;
}
/* We're in samples - try to convert the random xml value to something useful */
static void try_to_fill_sample(struct sample *sample, const char *name, char *buf)
{
int len = strlen(name);
int in_deco;
start_match("sample", name, buf);
if (MATCH(".sample.pressure", pressure, &sample->cylinderpressure))
return;
if (MATCH(".sample.cylpress", pressure, &sample->cylinderpressure))
return;
if (MATCH(".sample.cylinderindex", get_cylinderindex, &sample->sensor))
return;
if (MATCH(".sample.sensor", get_sensor, &sample->sensor))
return;
if (MATCH(".sample.depth", depth, &sample->depth))
return;
if (MATCH(".sample.temp", temperature, &sample->temperature))
return;
if (MATCH(".sample.temperature", temperature, &sample->temperature))
return;
if (MATCH(".sample.sampletime", sampletime, &sample->time))
return;
if (MATCH(".sample.time", sampletime, &sample->time))
return;
if (MATCH(".sample.ndl", sampletime, &sample->ndl))
return;
if (MATCH(".sample.in_deco", get_index, &in_deco)) {
sample->in_deco = (in_deco == 1);
return;
}
if (MATCH(".sample.stoptime", sampletime, &sample->stoptime))
return;
if (MATCH(".sample.stopdepth", depth, &sample->stopdepth))
return;
if (MATCH(".sample.cns", get_index, &sample->cns))
return;
if (MATCH(".sample.po2", double_to_permil, &sample->po2))
return;
switch (import_source) {
case DIVINGLOG:
if (divinglog_fill_sample(sample, name, len, buf))
return;
break;
case UDDF:
if (uddf_fill_sample(sample, name, len, buf))
return;
break;
default:
break;
}
nonmatch("sample", name, buf);
}
static const char *country, *city;
static void divinglog_place(char *place, void *_location)
{
char **location = _location;
char buffer[256], *p;
int len;
len = snprintf(buffer, sizeof(buffer),
"%s%s%s%s%s",
place,
city ? ", " : "",
city ? city : "",
country ? ", " : "",
country ? country : "");
p = malloc(len+1);
memcpy(p, buffer, len+1);
*location = p;
city = NULL;
country = NULL;
}
static int divinglog_dive_match(struct dive *dive, const char *name, int len, char *buf)
{
return MATCH(".divedate", divedate, &dive->when) ||
MATCH(".entrytime", divetime, &dive->when) ||
MATCH(".depth", depth, &dive->dc.maxdepth) ||
MATCH(".tanktype", utf8_string, &dive->cylinder[0].type.description) ||
MATCH(".tanksize", cylindersize, &dive->cylinder[0].type.size) ||
MATCH(".presw", pressure, &dive->cylinder[0].type.workingpressure) ||
MATCH(".press", pressure, &dive->cylinder[0].start) ||
MATCH(".prese", pressure, &dive->cylinder[0].end) ||
MATCH(".comments", utf8_string, &dive->notes) ||
MATCH(".buddy.names", utf8_string, &dive->buddy) ||
MATCH(".country.name", utf8_string, &country) ||
MATCH(".city.name", utf8_string, &city) ||
MATCH(".place.name", divinglog_place, &dive->location) ||
0;
}
/*
* Uddf specifies ISO 8601 time format.
*
* There are many variations on that. This handles the useful cases.
*/
static void uddf_datetime(char *buffer, void *_when)
{
char c;
int y,m,d,hh,mm,ss;
timestamp_t *when = _when;
struct tm tm = { 0 };
int i;
i = sscanf(buffer, "%d-%d-%d%c%d:%d:%d", &y, &m, &d, &c, &hh, &mm, &ss);
if (i == 7)
goto success;
ss = 0;
if (i == 6)
goto success;
i = sscanf(buffer, "%04d%02d%02d%c%02d%02d%02d", &y, &m, &d, &c, &hh, &mm, &ss);
if (i == 7)
goto success;
ss = 0;
if (i == 6)
goto success;
bad_date:
printf("Bad date time %s\n", buffer);
return;
success:
if (c != 'T' && c != ' ')
goto bad_date;
tm.tm_year = y;
tm.tm_mon = m - 1;
tm.tm_mday = d;
tm.tm_hour = hh;
tm.tm_min = mm;
tm.tm_sec = ss;
*when = utc_mktime(&tm);
}
#define uddf_datedata(name, offset) \
static void uddf_##name(char *buffer, void *_when) \
{ timestamp_t *when = _when; \
cur_tm.tm_##name = atoi(buffer) + offset; \
*when = utc_mktime(&cur_tm); }
uddf_datedata(year, 0)
uddf_datedata(mon, -1)
uddf_datedata(mday, 0)
uddf_datedata(hour, 0)
uddf_datedata(min, 0)
static int uddf_dive_match(struct dive *dive, const char *name, int len, char *buf)
{
return MATCH(".datetime", uddf_datetime, &dive->when) ||
MATCH(".diveduration", duration, &dive->dc.duration) ||
MATCH(".greatestdepth", depth, &dive->dc.maxdepth) ||
MATCH(".date.year", uddf_year, &dive->when) ||
MATCH(".date.month", uddf_mon, &dive->when) ||
MATCH(".date.day", uddf_mday, &dive->when) ||
MATCH(".time.hour", uddf_hour, &dive->when) ||
MATCH(".time.minute", uddf_min, &dive->when) ||
0;
}
/*
* This parses "floating point" into micro-degrees.
* We don't do exponentials etc, if somebody does
* gps locations in that format, they are insane.
*/
static degrees_t parse_degrees(char *buf, char **end)
{
int sign = 1, decimals = 6, value = 0;
degrees_t ret;
while (isspace(*buf))
buf++;
switch (*buf) {
case '-':
sign = -1;
/* fallthrough */
case '+':
buf++;
}
while (isdigit(*buf)) {
value = 10*value + *buf - '0';
buf++;
}
/* Get the first six decimals if they exist */
if (*buf == '.')
buf++;
do {
value *= 10;
if (isdigit(*buf)) {
value += *buf - '0';
buf++;
}
} while (--decimals);
/* Rounding */
switch (*buf) {
case '5' ... '9':
value++;
}
while (isdigit(*buf))
buf++;
*end = buf;
ret.udeg = value * sign;
return ret;
}
static void gps_lat(char *buffer, void *_dive)
{
char *end;
struct dive *dive = _dive;
dive->latitude = parse_degrees(buffer, &end);
}
static void gps_long(char *buffer, void *_dive)
{
char *end;
struct dive *dive = _dive;
dive->longitude = parse_degrees(buffer, &end);
}
static void gps_location(char *buffer, void *_dive)
{
char *end;
struct dive *dive = _dive;
dive->latitude = parse_degrees(buffer, &end);
dive->longitude = parse_degrees(end, &end);
}
/* We're in the top-level dive xml. Try to convert whatever value to a dive value */
static void try_to_fill_dive(struct dive *dive, const char *name, char *buf)
{
int len = strlen(name);
start_match("dive", name, buf);
switch (import_source) {
case DIVINGLOG:
if (divinglog_dive_match(dive, name, len, buf))
return;
break;
case UDDF:
if (uddf_dive_match(dive, name, len, buf))
return;
break;
default:
break;
}
if (MATCH(".number", get_index, &dive->number))
return;
if (MATCH(".tripflag", get_tripflag, &dive->tripflag))
return;
if (MATCH(".date", divedate, &dive->when))
return;
if (MATCH(".time", divetime, &dive->when))
return;
if (MATCH(".datetime", divedatetime, &dive->when))
return;
/*
* Legacy format note: per-dive depths and duration get saved
* in the first dive computer entry
*/
if (match_dc_data_fields(&dive->dc, name, len, buf))
return;
if (MATCH(".cylinderstartpressure", pressure, &dive->cylinder[0].start))
return;
if (MATCH(".cylinderendpressure", pressure, &dive->cylinder[0].end))
return;
if (MATCH(".gps", gps_location, dive))
return;
if (MATCH(".latitude", gps_lat, dive))
return;
if (MATCH(".sitelat", gps_lat, dive))
return;
if (MATCH(".longitude", gps_long, dive))
return;
if (MATCH(".sitelon", gps_long, dive))
return;
if (MATCH(".location", utf8_string, &dive->location))
return;
if (MATCH("dive.name", utf8_string, &dive->location))
return;
if (MATCH(".suit", utf8_string, &dive->suit))
return;
if (MATCH(".divesuit", utf8_string, &dive->suit))
return;
if (MATCH(".notes", utf8_string, &dive->notes))
return;
if (MATCH(".divemaster", utf8_string, &dive->divemaster))
return;
if (MATCH(".buddy", utf8_string, &dive->buddy))
return;
if (MATCH("dive.rating", get_rating, &dive->rating))
return;
if (MATCH("dive.visibility", get_rating, &dive->visibility))
return;
if (MATCH(".cylinder.size", cylindersize, &dive->cylinder[cur_cylinder_index].type.size))
return;
if (MATCH(".cylinder.workpressure", pressure, &dive->cylinder[cur_cylinder_index].type.workingpressure))
return;
if (MATCH(".cylinder.description", utf8_string, &dive->cylinder[cur_cylinder_index].type.description))
return;
if (MATCH(".cylinder.start", pressure, &dive->cylinder[cur_cylinder_index].start))
return;
if (MATCH(".cylinder.end", pressure, &dive->cylinder[cur_cylinder_index].end))
return;
if (MATCH(".weightsystem.description", utf8_string, &dive->weightsystem[cur_ws_index].description))
return;
if (MATCH(".weightsystem.weight", weight, &dive->weightsystem[cur_ws_index].weight))
return;
if (MATCH("weight", weight, &dive->weightsystem[cur_ws_index].weight))
return;
if (MATCH(".o2", gasmix, &dive->cylinder[cur_cylinder_index].gasmix.o2))
return;
if (MATCH(".o2percent", gasmix, &dive->cylinder[cur_cylinder_index].gasmix.o2))
return;
if (MATCH(".n2", gasmix_nitrogen, &dive->cylinder[cur_cylinder_index].gasmix))
return;
if (MATCH(".he", gasmix, &dive->cylinder[cur_cylinder_index].gasmix.he))
return;
if (MATCH(".divetemperature.air", temperature, &dive->airtemp))
return;
nonmatch("dive", name, buf);
}
/* We're in the top-level trip xml. Try to convert whatever value to a trip value */
static void try_to_fill_trip(dive_trip_t **dive_trip_p, const char *name, char *buf)
{
int len = strlen(name);
start_match("trip", name, buf);
dive_trip_t *dive_trip = *dive_trip_p;
if (MATCH(".date", divedate, &dive_trip->when))
return;
if (MATCH(".time", divetime, &dive_trip->when))
return;
if (MATCH(".location", utf8_string, &dive_trip->location))
return;
if (MATCH(".notes", utf8_string, &dive_trip->notes))
return;
nonmatch("trip", name, buf);
}
/*
* While in some formats file boundaries are dive boundaries, in many
* others (as for example in our native format) there are
* multiple dives per file, so there can be other events too that
* trigger a "new dive" marker and you may get some nesting due
* to that. Just ignore nesting levels.
* On the flipside it is possible that we start an XML file that ends
* up having no dives in it at all - don't create a bogus empty dive
* for those. It's not entirely clear what is the minimum set of data
* to make a dive valid, but if it has no location, no date and no
* samples I'm pretty sure it's useless.
*/
static gboolean is_dive(void)
{
return (cur_dive &&
(cur_dive->location || cur_dive->when || cur_dive->dc.samples));
}
static void reset_dc_info(struct divecomputer *dc)
{
lastcns = lastpo2 = lastndl = laststoptime = laststopdepth = lastindeco = 0;
lastsensor = lastcylinderindex = 0;
}
static void reset_dc_settings(void)
{
free((void *)cur_settings.dc.model);
free((void *)cur_settings.dc.nickname);
free((void *)cur_settings.dc.serial_nr);
free((void *)cur_settings.dc.firmware);
cur_settings.dc.model = NULL;
cur_settings.dc.nickname = NULL;
cur_settings.dc.serial_nr = NULL;
cur_settings.dc.firmware = NULL;
cur_settings.dc.deviceid = 0;
}
static void settings_start(void)
{
in_settings = TRUE;
}
static void settings_end(void)
{
in_settings = FALSE;
}
static void dc_settings_start(void)
{
reset_dc_settings();
}
static void dc_settings_end(void)
{
struct device_info *info;
info = create_device_info(cur_settings.dc.model, cur_settings.dc.deviceid);
if (info) {
if (!info->serial_nr && cur_settings.dc.serial_nr)
info->serial_nr = strdup(cur_settings.dc.serial_nr);
if (!info->firmware && cur_settings.dc.firmware)
info->firmware = strdup(cur_settings.dc.firmware);
if (!info->nickname && cur_settings.dc.nickname)
info->nickname = strdup(cur_settings.dc.nickname);
}
reset_dc_settings();
}
static void dive_start(void)
{
if (cur_dive)
return;
cur_dive = alloc_dive();
reset_dc_info(&cur_dive->dc);
memset(&cur_tm, 0, sizeof(cur_tm));
if (cur_trip) {
add_dive_to_trip(cur_dive, cur_trip);
cur_dive->tripflag = IN_TRIP;
}
}
static void dive_end(void)
{
if (!cur_dive)
return;
if (!is_dive())
free(cur_dive);
else
record_dive_to_table(cur_dive, target_table);
cur_dive = NULL;
cur_dc = NULL;
cur_cylinder_index = 0;
cur_ws_index = 0;
}
static void trip_start(void)
{
if (cur_trip)
return;
dive_end();
cur_trip = calloc(sizeof(dive_trip_t),1);
memset(&cur_tm, 0, sizeof(cur_tm));
}
static void trip_end(void)
{
if (!cur_trip)
return;
insert_trip(&cur_trip);
cur_trip = NULL;
}
static void event_start(void)
{
memset(&cur_event, 0, sizeof(cur_event));
cur_event.active = 1;
}
static void event_end(void)
{
struct divecomputer *dc = get_dc();
if (cur_event.name) {
if (strcmp(cur_event.name, "surface") != 0)
add_event(dc, cur_event.time.seconds,
cur_event.type, cur_event.flags,
cur_event.value, cur_event.name);
free((void *)cur_event.name);
}
cur_event.active = 0;
}
static void cylinder_start(void)
{
}
static void cylinder_end(void)
{
cur_cylinder_index++;
}
static void ws_start(void)
{
}
static void ws_end(void)
{
cur_ws_index++;
}
static void sample_start(void)
{
cur_sample = prepare_sample(get_dc());
cur_sample->ndl.seconds = lastndl;
cur_sample->in_deco = lastindeco;
cur_sample->stoptime.seconds = laststoptime;
cur_sample->stopdepth.mm = laststopdepth;
cur_sample->cns = lastcns;
cur_sample->po2 = lastpo2;
cur_sample->sensor = lastsensor;
}
static void sample_end(void)
{
if (!cur_dive)
return;
finish_sample(get_dc());
lastndl = cur_sample->ndl.seconds;
lastindeco = cur_sample->in_deco;
laststoptime = cur_sample->stoptime.seconds;
laststopdepth = cur_sample->stopdepth.mm;
lastcns = cur_sample->cns;
lastpo2 = cur_sample->po2;
cur_sample = NULL;
}
static void divecomputer_start(void)
{
struct divecomputer *dc;
/* Start from the previous dive computer */
dc = &cur_dive->dc;
while (dc->next)
dc = dc->next;
/* Did we already fill that in? */
if (dc->samples || dc->model || dc->when) {
struct divecomputer *newdc = calloc(1, sizeof(*newdc));
if (newdc) {
dc->next = newdc;
dc = newdc;
}
}
/* .. this is the one we'll use */
cur_dc = dc;
reset_dc_info(dc);
}
static void divecomputer_end(void)
{
if (!cur_dc->when)
cur_dc->when = cur_dive->when;
cur_dc = NULL;
}
static void entry(const char *name, char *buf)
{
if (in_settings) {
try_to_fill_dc_settings(name, buf);
try_to_match_autogroup(name, buf);
return;
}
if (cur_event.active) {
try_to_fill_event(name, buf);
return;
}
if (cur_sample) {
try_to_fill_sample(cur_sample, name, buf);
return;
}
if (cur_dc) {
try_to_fill_dc(cur_dc, name, buf);
return;
}
if (cur_dive) {
try_to_fill_dive(cur_dive, name, buf);
return;
}
if (cur_trip) {
try_to_fill_trip(&cur_trip, name, buf);
return;
}
}
static const char *nodename(xmlNode *node, char *buf, int len)
{
if (!node || !node->name)
return "root";
buf += len;
*--buf = 0;
len--;
for(;;) {
const char *name = node->name;
int i = strlen(name);
while (--i >= 0) {
unsigned char c = name[i];
*--buf = tolower(c);
if (!--len)
return buf;
}
node = node->parent;
if (!node || !node->name)
return buf;
*--buf = '.';
if (!--len)
return buf;
}
}
#define MAXNAME 64
static void visit_one_node(xmlNode *node)
{
char *content;
char buffer[MAXNAME];
const char *name;
content = node->content;
if (!content || xmlIsBlankNode(node))
return;
/* Don't print out the node name if it is "text" */
while (!node->name || !strcmp(node->name, "text"))
node = node->parent;
name = nodename(node, buffer, sizeof(buffer));
entry(name, content);
}
static void traverse(xmlNode *root);
static void traverse_properties(xmlNode *node)
{
xmlAttr *p;
for (p = node->properties; p; p = p->next)
traverse(p->children);
}
static void visit(xmlNode *n)
{
visit_one_node(n);
traverse_properties(n);
traverse(n->children);
}
static void DivingLog_importer(void)
{
import_source = DIVINGLOG;
/*
* Diving Log units are really strange.
*
* Temperatures are in C, except in samples,
* when they are in Fahrenheit. Depths are in
* meters, an dpressure is in PSI in the samples,
* but in bar when it comes to working pressure.
*
* Crazy f*%^ morons.
*/
xml_parsing_units = SI_units;
}
static void uddf_importer(void)
{
import_source = UDDF;
xml_parsing_units = SI_units;
xml_parsing_units.pressure = PASCAL;
xml_parsing_units.temperature = KELVIN;
}
/*
* I'm sure this could be done as some fancy DTD rules.
* It's just not worth the headache.
*/
static struct nesting {
const char *name;
void (*start)(void), (*end)(void);
} nesting[] = {
{ "divecomputerid", dc_settings_start, dc_settings_end },
{ "settings", settings_start, settings_end },
{ "dive", dive_start, dive_end },
{ "Dive", dive_start, dive_end },
{ "trip", trip_start, trip_end },
{ "sample", sample_start, sample_end },
{ "waypoint", sample_start, sample_end },
{ "SAMPLE", sample_start, sample_end },
{ "reading", sample_start, sample_end },
{ "event", event_start, event_end },
{ "mix", cylinder_start, cylinder_end },
{ "gasmix", cylinder_start, cylinder_end },
{ "cylinder", cylinder_start, cylinder_end },
{ "weightsystem", ws_start, ws_end },
{ "divecomputer", divecomputer_start, divecomputer_end },
{ "P", sample_start, sample_end },
/* Import type recognition */
{ "Divinglog", DivingLog_importer },
{ "uddf", uddf_importer },
{ NULL, }
};
static void traverse(xmlNode *root)
{
xmlNode *n;
for (n = root; n; n = n->next) {
struct nesting *rule = nesting;
if (!n->name) {
visit(n);
continue;
}
do {
if (!strcmp(rule->name, n->name))
break;
rule++;
} while (rule->name);
if (rule->start)
rule->start();
visit(n);
if (rule->end)
rule->end();
}
}
/* Per-file reset */
static void reset_all(void)
{
/*
* We reset the units for each file. You'd think it was
* a per-dive property, but I'm not going to trust people
* to do per-dive setup. If the xml does have per-dive
* data within one file, we might have to reset it per
* dive for that format.
*/
xml_parsing_units = SI_units;
import_source = UNKNOWN;
}
void parse_xml_buffer(const char *url, const char *buffer, int size,
struct dive_table *table, GError **error)
{
xmlDoc *doc;
target_table = table;
doc = xmlReadMemory(buffer, size, url, NULL, 0);
if (!doc) {
fprintf(stderr, _("Failed to parse '%s'.\n"), url);
parser_error(error, _("Failed to parse '%s'"), url);
return;
}
reset_all();
dive_start();
#ifdef XSLT
doc = test_xslt_transforms(doc, error);
#endif
traverse(xmlDocGetRootElement(doc));
dive_end();
xmlFreeDoc(doc);
}
void parse_xml_init(void)
{
LIBXML_TEST_VERSION
}
void parse_xml_exit(void)
{
xmlCleanupParser();
}
#ifdef XSLT
/* Maybe we'll want a environment variable that can override this.. */
static const char *xslt_path = XSLT ":xslt:.";
static xsltStylesheetPtr try_get_stylesheet(const char *path, int len, const char *name)
{
xsltStylesheetPtr ret;
int namelen = strlen(name);
char *filename = malloc(len+1+namelen+1);
if (!filename)
return NULL;
memcpy(filename, path, len);
filename[len] = G_DIR_SEPARATOR;
memcpy(filename + len + 1, name, namelen+1);
ret = NULL;
if (!access(filename, R_OK))
ret = xsltParseStylesheetFile(filename);
free(filename);
return ret;
}
static xsltStylesheetPtr get_stylesheet(const char *name)
{
const char *path, *next;
path = getenv("SUBSURFACE_XSLT_PATH");
if (!path)
path = xslt_path;
do {
int len;
xsltStylesheetPtr ret;
next = strchr(path, ':');
len = strlen(path);
if (next) {
len = next - path;
next++;
}
ret = try_get_stylesheet(path, len, name);
if (ret)
return ret;
} while ((path = next) != NULL);
return NULL;
}
static struct xslt_files {
const char *root;
const char *file;
} xslt_files[] = {
{ "SUUNTO", "SuuntoSDM.xslt" },
{ "JDiveLog", "jdivelog2subsurface.xslt" },
{ "dives", "MacDive.xslt" },
{ "DIVELOGSDATA", "divelogs.xslt" },
{ NULL, }
};
static xmlDoc *test_xslt_transforms(xmlDoc *doc, GError **error)
{
struct xslt_files *info = xslt_files;
xmlDoc *transformed;
xsltStylesheetPtr xslt = NULL;
xmlNode *root_element = xmlDocGetRootElement(doc);
char *attribute;
while ((info->root) && (strcasecmp(root_element->name, info->root) != 0)) {
info++;
}
if (info->root) {
attribute = xmlGetProp(xmlFirstElementChild(root_element), "name");
if (attribute) {
if (strcasecmp(attribute, "subsurface") == 0) {
free((void *)attribute);
return doc;
}
free((void *)attribute);
}
xmlSubstituteEntitiesDefault(1);
xslt = get_stylesheet(info->file);
if (xslt == NULL) {
parser_error(error, "Can't open stylesheet (%s)/%s", xslt_path, info->file);
return doc;
}
transformed = xsltApplyStylesheet(xslt, doc, NULL);
xmlFreeDoc(doc);
xsltFreeStylesheet(xslt);
return transformed;
}
return doc;
}
#endif