subsurface/scripts/whitespace.pl
Berthold Stoeger b95ac3f79c core: turn C dive-table into an owning table
This is a humongous commit, because it touches all parts of the
code. It removes the last user of our horrible TABLE macros, which
simulate std::vector<> in a very clumsy way.

Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
2024-08-13 19:28:30 +02:00

90 lines
3.3 KiB
Perl
Executable file

#!/usr/bin/perl
my $input = $ARGV[0];
my $source = `clang-format $input`;
# for_each (...) and friends...
$source =~ s/(?:\G|^)(.*each.*\(.*) \* (\S.*\))$/$1 *$2/img; # if a variable is declared in the argument, '*' is an indicator for a pointer, not arithmatic
$source =~ s/(?:\G|^)(.*each.*\(.*) \& (\S.*\))$/$1 &$2/img; # if a variable is declared in the argument, '&' is an indicator for a reference, not bit logic
$source =~ s/(?:\G|^)(.*each[^\s(]*)\s*(\(.*)$/$1 $2/img; # we want exactly one space between keyword and opening parenthesis '('
$source =~ s/(?:\G|^)(.*each.*\(.*\).*)\n\s*{\s*$/$1 {/img; # we want the opening curly brace on the same line, separated by a space
$source =~ s/(?:\G|^)(\s+[^#\/*].*each.*\(.*\))\n(\s*)([^{}\s])/$1\n\t$2$3/img;
# don't have '{' in the next line when declaring data types
$source =~ s/^(\s*struct[^()\n]*)\n\s*{\s*$/$1 {/img;
$source =~ s/^(\s*static\s+struct[^()\n]*)\n\s*{\s*$/$1 {/img;
$source =~ s/^(\s*union[^()\n]*)\n\s*{\s*$/$1 {/img;
$source =~ s/^(\s*static\s+union[^()\n]*)\n\s*{\s*$/$1 {/img;
$source =~ s/^(\s*class.*)\n\s*{\s*$/$1 {/img;
# a namespace shouldn't look like a function
$source =~ s/(?:\G|^)(namespace.*)\n\{/$1 {/img;
# colon goes at the end of a line
$source =~ s/^(\S*::\S*.*)\n\s*: /$1 : /img;
# odd indentations from clang-format:
# six spaces or four spaces after tabs (for continuation strings)
$source =~ s/(?:\G|^)[ ]{6}/\t/mg;
$source =~ s/(?:\G|^)(\t*)[ ]{4}"/$1\t"/mg;
# the next ones are rather awkward
# they capture multi line #define and #if definded statements
# that clang-format messes up (where does that 4 space indentation come
# from?
# I couldn't figure out how to make it apply to an arbitrary number of
# intermediate lines, so I hardcoded 0 through 5 lines between the #define
# or #if defined statements and the end of the multi line statement
$source =~ s/^(#(?:if |)define.*)\n +([^*].*)$/$1\n\t$2/mg;
$source =~ s/^(#(?:if |)define.*)((?:\\\n.*){1})\n +([^*].*)$/$1$2\n\t$3/mg;
$source =~ s/^(#(?:if |)define.*)((?:\\\n.*){2})\n +([^*].*)$/$1$2\n\t$3/mg;
$source =~ s/^(#(?:if |)define.*)((?:\\\n.*){3})\n +([^*].*)$/$1$2\n\t$3/mg;
$source =~ s/^(#(?:if |)define.*)((?:\\\n.*){4})\n +([^*].*)$/$1$2\n\t$3/mg;
$source =~ s/^(#(?:if |)define.*)((?:\\\n.*){5})\n +([^*].*)$/$1$2\n\t$3/mg;
# don't put line break before the last single term argument of a calculation
$source =~ s/(?:\G|^)(.*[+-])\n\s*(\S*\;)$/$1 $2/mg;
sub indent_ctor_init_lists {
my($content) = @_;
# all caps/_ with just an argument list is a macro invocation and shouldn't be moved
my @not_ctor_words = qw(
\bdo\b
\belse\b
\bfor\b
\bif\b
\bsizeof\b
\bswitch\b
\bwhile\b
\btr\b
\bconnect\b
^[A-Z_]+\(.*\)$
);
my $regexStr = "(" . join("|", @not_ctor_words) . ")";
my $not_ctor_regex = qr{$regexStr};
my $result = "";
for ( split(/\n/, $content) ) {
if ($_ =~ $not_ctor_regex) {
# probably not a ctor line. leave it be.
$result .= $_ . "\n";
}
else {
$_ =~ s/^\s*(\w*\(.*\),?)$/\t$1/mg;
$result .= $_ . "\n";
}
}
return $result;
}
$source = indent_ctor_init_lists($source);
$quotedinput = $input;
$quotedinput =~ s|/|\\/|g;
open (DIFF, "| diff -u $input - | sed -e 's/--- $quotedinput/--- $quotedinput.old/' | sed -e 's/+++ -/+++ $quotedinput/'");
print DIFF $source ;