subsurface/core/strtod.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Sane helper for 'strtod()'.
 *
 * Sad that we even need this, but the C library version has
 * insane locale behavior, and while the Qt "doDouble()" routines
 * are better in that regard, they don't have an end pointer
 * (having replaced it with the completely idiotic "ok" boolean
 * pointer instead).
 *
 * I wonder what drugs people are on sometimes.
 *
 * Right now we support the following flags to limit the
 * parsing some ways:
 *
 *   STRTOD_NO_SIGN	- don't accept signs
 *   STRTOD_NO_DOT	- no decimal dots, I'm European
 *   STRTOD_NO_COMMA	- no comma, please, I'm C locale
 *   STRTOD_NO_EXPONENT	- no exponent parsing, I'm human
 *
 * The "negative" flags are so that the common case can just
 * use a flag value of 0, and only if you have some special
 * requirements do you need to state those with explicit flags.
 *
 * So if you want the C locale kind of parsing, you'd use the
 * STRTOD_NO_COMMA flag to disallow a decimal comma. But if you
 * want a more relaxed "Hey, Europeans are people too, even if
 * they have locales with commas", just pass in a zero flag.
 */
#include <ctype.h>
#include "dive.h"

double strtod_flags(const char *str, const char **ptr, unsigned int flags)
{
	char c;
	const char *p = str, *ep;
	double val = 0.0;
	double decimal = 1.0;
	int sign = 0, esign = 0;
	int numbers = 0, dot = 0;

	/* skip spaces */
	while (isspace(c = *p++))
		/* */;

	/* optional sign */
	if (!(flags & STRTOD_NO_SIGN)) {
		switch (c) {
		case '-':
			sign = 1;
		/* fallthrough */
		case '+':
			c = *p++;
		}
	}

	/* Mantissa */
	for (;; c = *p++) {
		if ((c == '.' && !(flags & STRTOD_NO_DOT)) ||
		    (c == ',' && !(flags & STRTOD_NO_COMMA))) {
			if (dot)
				goto done;
			dot = 1;
			continue;
		}
		if (c >= '0' && c <= '9') {
			numbers++;
			val = (val * 10) + (c - '0');
			if (dot)
				decimal *= 10;
			continue;
		}
		if (c != 'e' && c != 'E')
			goto done;
		if (flags & STRTOD_NO_EXPONENT)
			goto done;
		break;
	}

	if (!numbers)
		goto done;

	/* Exponent */
	ep = p;
	c = *ep++;
	switch (c) {
	case '-':
		esign = 1;
	/* fallthrough */
	case '+':
		c = *ep++;
	}

	if (c >= '0' && c <= '9') {
		p = ep;
		int exponent = c - '0';

		for (;;) {
			c = *p++;
			if (c < '0' || c > '9')
				break;
			exponent *= 10;
			exponent += c - '0';
		}

		/* We're not going to bother playing games */
		if (exponent > 308)
			exponent = 308;

		while (exponent-- > 0) {
			if (esign)
				decimal *= 10;
			else
				decimal /= 10;
		}
	}

done:
	if (!numbers)
		goto no_conversion;
	if (ptr)
		*ptr = p - 1;
	return (sign ? -val : val) / decimal;

no_conversion:
	if (ptr)
		*ptr = str;
	return 0.0;
}
Add SPDX header to core C files Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2017-04-27 18:18:03 +00:00			`// SPDX-License-Identifier: GPL-2.0`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`/*`
			`* Sane helper for 'strtod()'.`
			`*`
			`* Sad that we even need this, but the C library version has`
			`* insane locale behavior, and while the Qt "doDouble()" routines`
			`* are better in that regard, they don't have an end pointer`
			`* (having replaced it with the completely idiotic "ok" boolean`
			`* pointer instead).`
			`*`
			`* I wonder what drugs people are on sometimes.`
			`*`
			`* Right now we support the following flags to limit the`
			`* parsing some ways:`
			`*`
			`* STRTOD_NO_SIGN - don't accept signs`
			`* STRTOD_NO_DOT - no decimal dots, I'm European`
			`* STRTOD_NO_COMMA - no comma, please, I'm C locale`
			`* STRTOD_NO_EXPONENT - no exponent parsing, I'm human`
			`*`
			`* The "negative" flags are so that the common case can just`
			`* use a flag value of 0, and only if you have some special`
			`* requirements do you need to state those with explicit flags.`
			`*`
			`* So if you want the C locale kind of parsing, you'd use the`
			`* STRTOD_NO_COMMA flag to disallow a decimal comma. But if you`
			`* want a more relaxed "Hey, Europeans are people too, even if`
			`* they have locales with commas", just pass in a zero flag.`
			`*/`
			`#include <ctype.h>`
			`#include "dive.h"`

const'ify our strtod() helper functions The C library doesn't use const char pointers for legacy reasons (and because you can modify the string the end pointer points to), but let's do it in our internal implementation just because it's a nice guarantee to have. We actually used to have a non-const end pointer and replace a decimal comma with a decimal dot, but that was because we didn't have the fancy "allow commas" flags. So by using our own strtod_flags() function, we can now keep all the strings we parse read-only rather than modify them as we parse them. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-08 06:51:22 +00:00			`double strtod_flags(const char str, const char *ptr, unsigned int flags)`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`{`
const'ify our strtod() helper functions The C library doesn't use const char pointers for legacy reasons (and because you can modify the string the end pointer points to), but let's do it in our internal implementation just because it's a nice guarantee to have. We actually used to have a non-const end pointer and replace a decimal comma with a decimal dot, but that was because we didn't have the fancy "allow commas" flags. So by using our own strtod_flags() function, we can now keep all the strings we parse read-only rather than modify them as we parse them. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-08 06:51:22 +00:00			`char c;`
			`const char p = str, ep;`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`double val = 0.0;`
			`double decimal = 1.0;`
			`int sign = 0, esign = 0;`
			`int numbers = 0, dot = 0;`

			`/* skip spaces */`
			`while (isspace(c = *p++))`
			`/* */;`

			`/* optional sign */`
			`if (!(flags & STRTOD_NO_SIGN)) {`
			`switch (c) {`
			`case '-':`
			`sign = 1;`
Massive automated whitespace cleanup I know everyone will hate it. Go ahead. Complain. Call me names. At least now things are consistent and reproducible. If you want changes, have your complaint come with a patch to scripts/whitespace.pl so that we can automate it. Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-28 04:09:57 +00:00			`/* fallthrough */`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`case '+':`
			`c = *p++;`
			`}`
			`}`

			`/* Mantissa */`
Massive automated whitespace cleanup I know everyone will hate it. Go ahead. Complain. Call me names. At least now things are consistent and reproducible. If you want changes, have your complaint come with a patch to scripts/whitespace.pl so that we can automate it. Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-28 04:09:57 +00:00			`for (;; c = *p++) {`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`if ((c == '.' && !(flags & STRTOD_NO_DOT)) \|\|`
			`(c == ',' && !(flags & STRTOD_NO_COMMA))) {`
			`if (dot)`
			`goto done;`
			`dot = 1;`
			`continue;`
			`}`
			`if (c >= '0' && c <= '9') {`
			`numbers++;`
micro-optimisation: avoid division in main strtod() loop Division is expensive, so replace it with multiplication instead. But don't multiply by 0.1 (inexact in floating point), multiply by 10 and then do one division at the end. Make sure the final division is at the very end, so that the result isn't immediately used. That allow the division to overlap with the function return overhead, hiding it further. This is silly, but while thinking about different file formats and doing profiling of loading big files, it turned out that "strtod_flags()" actually showed up in profiles. Not very high, but at more than 1%. This makes the common case (no exponent) use only addition and multiplication until the very end, and makes the division be the very last thing it does, which minimizes the data dependencies on the division. For my stupid test-case, it cut the cost of strtod_flags() in half according to the profile. The half a percent speedup on loading time isn't really noticeable or even measurable outside of profiling startup costs, but rather than carry this along in my tree or just throw it away, I'm sending it out to see if anybody cares. Note that we could avoid the final division by instead multiplying "decimal" with 0.1 rather than multiplying by 10 (and switching the sign test over), but that's a fundamentally inexact operation in binary floatig point, so doing the "multiply by tens for decimals" ends up keeping everything exact as long as possible. For our use, we probably really don't care, but whatever. End result: this should not only speed things up immeasurably, it might also make things more precise at a level that we really don't care about :^p I'm really selling this piece of crap, aren't I? [Dirk Hohndel: sorry - had to pull the full email into the commit message this is so good, you couldn't make it up] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-10 21:41:00 +00:00			`val = (val * 10) + (c - '0');`
			`if (dot)`
			`decimal *= 10;`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`continue;`
			`}`
			`if (c != 'e' && c != 'E')`
			`goto done;`
			`if (flags & STRTOD_NO_EXPONENT)`
			`goto done;`
			`break;`
			`}`

			`if (!numbers)`
			`goto done;`

			`/* Exponent */`
			`ep = p;`
			`c = *ep++;`
			`switch (c) {`
			`case '-':`
			`esign = 1;`
Massive automated whitespace cleanup I know everyone will hate it. Go ahead. Complain. Call me names. At least now things are consistent and reproducible. If you want changes, have your complaint come with a patch to scripts/whitespace.pl so that we can automate it. Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-28 04:09:57 +00:00			`/* fallthrough */`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`case '+':`
			`c = *ep++;`
			`}`

			`if (c >= '0' && c <= '9') {`
			`p = ep;`
			`int exponent = c - '0';`

			`for (;;) {`
			`c = *p++;`
			`if (c < '0' \|\| c > '9')`
			`break;`
			`exponent *= 10;`
			`exponent += c - '0';`
			`}`

			`/* We're not going to bother playing games */`
			`if (exponent > 308)`
			`exponent = 308;`

			`while (exponent-- > 0) {`
			`if (esign)`
micro-optimisation: avoid division in main strtod() loop Division is expensive, so replace it with multiplication instead. But don't multiply by 0.1 (inexact in floating point), multiply by 10 and then do one division at the end. Make sure the final division is at the very end, so that the result isn't immediately used. That allow the division to overlap with the function return overhead, hiding it further. This is silly, but while thinking about different file formats and doing profiling of loading big files, it turned out that "strtod_flags()" actually showed up in profiles. Not very high, but at more than 1%. This makes the common case (no exponent) use only addition and multiplication until the very end, and makes the division be the very last thing it does, which minimizes the data dependencies on the division. For my stupid test-case, it cut the cost of strtod_flags() in half according to the profile. The half a percent speedup on loading time isn't really noticeable or even measurable outside of profiling startup costs, but rather than carry this along in my tree or just throw it away, I'm sending it out to see if anybody cares. Note that we could avoid the final division by instead multiplying "decimal" with 0.1 rather than multiplying by 10 (and switching the sign test over), but that's a fundamentally inexact operation in binary floatig point, so doing the "multiply by tens for decimals" ends up keeping everything exact as long as possible. For our use, we probably really don't care, but whatever. End result: this should not only speed things up immeasurably, it might also make things more precise at a level that we really don't care about :^p I'm really selling this piece of crap, aren't I? [Dirk Hohndel: sorry - had to pull the full email into the commit message this is so good, you couldn't make it up] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-10 21:41:00 +00:00			`decimal *= 10;`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`else`
micro-optimisation: avoid division in main strtod() loop Division is expensive, so replace it with multiplication instead. But don't multiply by 0.1 (inexact in floating point), multiply by 10 and then do one division at the end. Make sure the final division is at the very end, so that the result isn't immediately used. That allow the division to overlap with the function return overhead, hiding it further. This is silly, but while thinking about different file formats and doing profiling of loading big files, it turned out that "strtod_flags()" actually showed up in profiles. Not very high, but at more than 1%. This makes the common case (no exponent) use only addition and multiplication until the very end, and makes the division be the very last thing it does, which minimizes the data dependencies on the division. For my stupid test-case, it cut the cost of strtod_flags() in half according to the profile. The half a percent speedup on loading time isn't really noticeable or even measurable outside of profiling startup costs, but rather than carry this along in my tree or just throw it away, I'm sending it out to see if anybody cares. Note that we could avoid the final division by instead multiplying "decimal" with 0.1 rather than multiplying by 10 (and switching the sign test over), but that's a fundamentally inexact operation in binary floatig point, so doing the "multiply by tens for decimals" ends up keeping everything exact as long as possible. For our use, we probably really don't care, but whatever. End result: this should not only speed things up immeasurably, it might also make things more precise at a level that we really don't care about :^p I'm really selling this piece of crap, aren't I? [Dirk Hohndel: sorry - had to pull the full email into the commit message this is so good, you couldn't make it up] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-10 21:41:00 +00:00			`decimal /= 10;`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00			`}`
			`}`

			`done:`
			`if (!numbers)`
			`goto no_conversion;`
			`if (ptr)`
Massive automated whitespace cleanup I know everyone will hate it. Go ahead. Complain. Call me names. At least now things are consistent and reproducible. If you want changes, have your complaint come with a patch to scripts/whitespace.pl so that we can automate it. Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-28 04:09:57 +00:00			`*ptr = p - 1;`
micro-optimisation: avoid division in main strtod() loop Division is expensive, so replace it with multiplication instead. But don't multiply by 0.1 (inexact in floating point), multiply by 10 and then do one division at the end. Make sure the final division is at the very end, so that the result isn't immediately used. That allow the division to overlap with the function return overhead, hiding it further. This is silly, but while thinking about different file formats and doing profiling of loading big files, it turned out that "strtod_flags()" actually showed up in profiles. Not very high, but at more than 1%. This makes the common case (no exponent) use only addition and multiplication until the very end, and makes the division be the very last thing it does, which minimizes the data dependencies on the division. For my stupid test-case, it cut the cost of strtod_flags() in half according to the profile. The half a percent speedup on loading time isn't really noticeable or even measurable outside of profiling startup costs, but rather than carry this along in my tree or just throw it away, I'm sending it out to see if anybody cares. Note that we could avoid the final division by instead multiplying "decimal" with 0.1 rather than multiplying by 10 (and switching the sign test over), but that's a fundamentally inexact operation in binary floatig point, so doing the "multiply by tens for decimals" ends up keeping everything exact as long as possible. For our use, we probably really don't care, but whatever. End result: this should not only speed things up immeasurably, it might also make things more precise at a level that we really don't care about :^p I'm really selling this piece of crap, aren't I? [Dirk Hohndel: sorry - had to pull the full email into the commit message this is so good, you couldn't make it up] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-02-10 21:41:00 +00:00			`return (sign ? -val : val) / decimal;`
Make our 'ascii_strtod()' helper more generic We'll want to do sane parsing of strings, but the C library makes it hard to handle user input sanely and the Qt toDouble() function interface was designed by a retarded chipmunk. So just extend our existing hacky "ascii_strtod()" to allow a more generic interface. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Dirk Hohndel <dirk@hohndel.org> 2014-01-03 04:35:35 +00:00
			`no_conversion:`
			`if (ptr)`
			`*ptr = str;`
			`return 0.0;`
			`}`