mirror of
https://github.com/subsurface/subsurface.git
synced 2025-02-19 22:16:15 +00:00
Support divelogs.de exports that include Cyrillic characters
divelogs.de sends us XML files that explicitly state that they are in ISO-8859-1 encoding (which is true). These files contain the HTML encoded Cyrillic characters. Once we decode those characters the resulting file is actually UTF-8 encoded (which is a superset of ISO-8859-1). That seriously confuses libxml when it tries to parse things. So instead recognize divelogs.de files and skip the encoding declaration for them before decoding the HTML encoded non-ISO-8859-1 characters. This does show, however, that divelogs.de incorrectly truncates the encoded strings (at least in some sample data that I created the parsing throws errors because of that). Reported-by: Sergey Starosek <sergey.starosek@gmail.com> Based-on-code-by: Miika Turkia <miika.turkia@gmail.com> Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
This commit is contained in:
parent
98d769a02f
commit
757791335f
2 changed files with 24 additions and 2 deletions
24
parse-xml.c
24
parse-xml.c
|
@ -8,6 +8,7 @@
|
||||||
#define __USE_XOPEN
|
#define __USE_XOPEN
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <libxml/parser.h>
|
#include <libxml/parser.h>
|
||||||
|
#include <libxml/parserInternals.h>
|
||||||
#include <libxml/tree.h>
|
#include <libxml/tree.h>
|
||||||
#ifdef XSLT
|
#ifdef XSLT
|
||||||
#include <libxslt/transform.h>
|
#include <libxslt/transform.h>
|
||||||
|
@ -1533,13 +1534,34 @@ static void reset_all(void)
|
||||||
import_source = UNKNOWN;
|
import_source = UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* divelog.de sends us xml files that claim to be iso-8859-1
|
||||||
|
* but once we decode the HTML encoded characters they turn
|
||||||
|
* into UTF-8 instead. So skip the incorrect encoding
|
||||||
|
* declaration and decode the HTML encoded characters */
|
||||||
|
const char *preprocess_divelog_de(const char *buffer)
|
||||||
|
{
|
||||||
|
char *ret = strstr(buffer, "<DIVELOGSDATA>");
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
xmlParserCtxtPtr ctx;
|
||||||
|
char buf[] = "";
|
||||||
|
|
||||||
|
ctx = xmlCreateMemoryParserCtxt(buf, sizeof(buf));
|
||||||
|
ret = xmlStringLenDecodeEntities(ctx, ret, strlen(ret), XML_SUBSTITUTE_REF, 0, 0, 0);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
void parse_xml_buffer(const char *url, const char *buffer, int size,
|
void parse_xml_buffer(const char *url, const char *buffer, int size,
|
||||||
struct dive_table *table, GError **error)
|
struct dive_table *table, GError **error)
|
||||||
{
|
{
|
||||||
xmlDoc *doc;
|
xmlDoc *doc;
|
||||||
|
const char *res = preprocess_divelog_de(buffer);
|
||||||
|
|
||||||
target_table = table;
|
target_table = table;
|
||||||
doc = xmlReadMemory(buffer, size, url, NULL, 0);
|
doc = xmlReadMemory(res, strlen(res), url, NULL, 0);
|
||||||
if (!doc) {
|
if (!doc) {
|
||||||
fprintf(stderr, _("Failed to parse '%s'.\n"), url);
|
fprintf(stderr, _("Failed to parse '%s'.\n"), url);
|
||||||
parser_error(error, _("Failed to parse '%s'"), url);
|
parser_error(error, _("Failed to parse '%s'"), url);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
|
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
|
||||||
<xsl:strip-space elements="*"/>
|
<xsl:strip-space elements="*"/>
|
||||||
<xsl:output method="xml" indent="yes"/>
|
<xsl:output method="xml" indent="no" encoding="UTF-8" omit-xml-declaration="yes"/>
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<divelog program='subsurface-import' version='2'>
|
<divelog program='subsurface-import' version='2'>
|
||||||
|
|
Loading…
Add table
Reference in a new issue