diff --git a/test/lint/lint-format-strings.py b/test/lint/lint-format-strings.py new file mode 100755 index 0000000000..dcdd1dcf7e --- /dev/null +++ b/test/lint/lint-format-strings.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Lint format strings: This program checks that the number of arguments passed +# to a variadic format string function matches the number of format specifiers +# in the format string. + +import argparse +import re +import sys + +FALSE_POSITIVES = [ + ("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"), + ("src/index/base.cpp", "FatalError(const char* fmt, const Args&... args)"), + ("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"), + ("src/util.cpp", "strprintf(_(COPYRIGHT_HOLDERS), _(COPYRIGHT_HOLDERS_SUBSTITUTION))"), + ("src/util.cpp", "strprintf(COPYRIGHT_HOLDERS, COPYRIGHT_HOLDERS_SUBSTITUTION)"), +] + + +def parse_function_calls(function_name, source_code): + """Return an array with all calls to function function_name in string source_code. + Preprocessor directives and C++ style comments ("//") in source_code are removed. + + >>> len(parse_function_calls("foo", "foo();bar();foo();bar();")) + 2 + >>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[0].startswith("foo(1);") + True + >>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[1].startswith("foo(2);") + True + >>> len(parse_function_calls("foo", "foo();bar();// foo();bar();")) + 1 + >>> len(parse_function_calls("foo", "#define FOO foo();")) + 0 + """ + assert(type(function_name) is str and type(source_code) is str and function_name) + lines = [re.sub("// .*", " ", line).strip() + for line in source_code.split("\n") + if not line.strip().startswith("#")] + return re.findall(r"[^a-zA-Z_](?=({}\(.*).*)".format(function_name), " " + " ".join(lines)) + + +def normalize(s): + """Return a normalized version of string s with newlines, tabs and C style comments ("/* ... */") + replaced with spaces. Multiple spaces are replaced with a single space. + + >>> normalize(" /* nothing */ foo\tfoo /* bar */ foo ") + 'foo foo foo' + """ + assert(type(s) is str) + s = s.replace("\n", " ") + s = s.replace("\t", " ") + s = re.sub("/\*.*?\*/", " ", s) + s = re.sub(" {2,}", " ", s) + return s.strip() + + +ESCAPE_MAP = { + r"\n": "[escaped-newline]", + r"\t": "[escaped-tab]", + r'\"': "[escaped-quote]", +} + + +def escape(s): + """Return the escaped version of string s with "\\\"", "\\n" and "\\t" escaped as + "[escaped-backslash]", "[escaped-newline]" and "[escaped-tab]". + + >>> unescape(escape("foo")) == "foo" + True + >>> escape(r'foo \\t foo \\n foo \\\\ foo \\ foo \\"bar\\"') + 'foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]' + """ + assert(type(s) is str) + for raw_value, escaped_value in ESCAPE_MAP.items(): + s = s.replace(raw_value, escaped_value) + return s + + +def unescape(s): + """Return the unescaped version of escaped string s. + Reverses the replacements made in function escape(s). + + >>> unescape(escape("bar")) + 'bar' + >>> unescape("foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]") + 'foo \\\\t foo \\\\n foo \\\\\\\\ foo \\\\ foo \\\\"bar\\\\"' + """ + assert(type(s) is str) + for raw_value, escaped_value in ESCAPE_MAP.items(): + s = s.replace(escaped_value, raw_value) + return s + + +def parse_function_call_and_arguments(function_name, function_call): + """Split string function_call into an array of strings consisting of: + * the string function_call followed by "(" + * the function call argument #1 + * ... + * the function call argument #n + * a trailing ");" + + The strings returned are in escaped form. See escape(...). + + >>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");') + ['foo(', '"%s",', ' "foo"', ')'] + >>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");') + ['foo(', '"%s",', ' "foo"', ')'] + >>> parse_function_call_and_arguments("foo", 'foo("%s %s", "foo", "bar");') + ['foo(', '"%s %s",', ' "foo",', ' "bar"', ')'] + >>> parse_function_call_and_arguments("fooprintf", 'fooprintf("%050d", i);') + ['fooprintf(', '"%050d",', ' i', ')'] + >>> parse_function_call_and_arguments("foo", 'foo(bar(foobar(barfoo("foo"))), foobar); barfoo') + ['foo(', 'bar(foobar(barfoo("foo"))),', ' foobar', ')'] + >>> parse_function_call_and_arguments("foo", "foo()") + ['foo(', '', ')'] + >>> parse_function_call_and_arguments("foo", "foo(123)") + ['foo(', '123', ')'] + >>> parse_function_call_and_arguments("foo", 'foo("foo")') + ['foo(', '"foo"', ')'] + """ + assert(type(function_name) is str and type(function_call) is str and function_name) + remaining = normalize(escape(function_call)) + expected_function_call = "{}(".format(function_name) + assert(remaining.startswith(expected_function_call)) + parts = [expected_function_call] + remaining = remaining[len(expected_function_call):] + open_parentheses = 1 + in_string = False + parts.append("") + for char in remaining: + parts.append(parts.pop() + char) + if char == "\"": + in_string = not in_string + continue + if in_string: + continue + if char == "(": + open_parentheses += 1 + continue + if char == ")": + open_parentheses -= 1 + if open_parentheses > 1: + continue + if open_parentheses == 0: + parts.append(parts.pop()[:-1]) + parts.append(char) + break + if char == ",": + parts.append("") + return parts + + +def parse_string_content(argument): + """Return the text within quotes in string argument. + + >>> parse_string_content('1 "foo %d bar" 2') + 'foo %d bar' + >>> parse_string_content('1 foobar 2') + '' + >>> parse_string_content('1 "bar" 2') + 'bar' + >>> parse_string_content('1 "foo" 2 "bar" 3') + 'foobar' + >>> parse_string_content('1 "foo" 2 " " "bar" 3') + 'foo bar' + >>> parse_string_content('""') + '' + >>> parse_string_content('') + '' + >>> parse_string_content('1 2 3') + '' + """ + assert(type(argument) is str) + string_content = "" + in_string = False + for char in normalize(escape(argument)): + if char == "\"": + in_string = not in_string + elif in_string: + string_content += char + return string_content + + +def count_format_specifiers(format_string): + """Return the number of format specifiers in string format_string. + + >>> count_format_specifiers("foo bar foo") + 0 + >>> count_format_specifiers("foo %d bar foo") + 1 + >>> count_format_specifiers("foo %d bar %i foo") + 2 + >>> count_format_specifiers("foo %d bar %i foo %% foo") + 2 + >>> count_format_specifiers("foo %d bar %i foo %% foo %d foo") + 3 + >>> count_format_specifiers("foo %d bar %i foo %% foo %*d foo") + 4 + """ + assert(type(format_string) is str) + n = 0 + in_specifier = False + for i, char in enumerate(format_string): + if format_string[i - 1:i + 1] == "%%" or format_string[i:i + 2] == "%%": + pass + elif char == "%": + in_specifier = True + n += 1 + elif char in "aAcdeEfFgGinopsuxX": + in_specifier = False + elif in_specifier and char == "*": + n += 1 + return n + + +def main(): + parser = argparse.ArgumentParser(description="This program checks that the number of arguments passed " + "to a variadic format string function matches the number of format " + "specifiers in the format string.") + parser.add_argument("--skip-arguments", type=int, help="number of arguments before the format string " + "argument (e.g. 1 in the case of fprintf)", default=0) + parser.add_argument("function_name", help="function name (e.g. fprintf)", default=None) + parser.add_argument("file", type=argparse.FileType("r", encoding="utf-8"), nargs="*", help="C++ source code file (e.g. foo.cpp)") + args = parser.parse_args() + + exit_code = 0 + for f in args.file: + for function_call_str in parse_function_calls(args.function_name, f.read()): + parts = parse_function_call_and_arguments(args.function_name, function_call_str) + relevant_function_call_str = unescape("".join(parts))[:512] + if (f.name, relevant_function_call_str) in FALSE_POSITIVES: + continue + if len(parts) < 3 + args.skip_arguments: + exit_code = 1 + print("{}: Could not parse function call string \"{}(...)\": {}".format(f.name, args.function_name, relevant_function_call_str)) + continue + argument_count = len(parts) - 3 - args.skip_arguments + format_str = parse_string_content(parts[1 + args.skip_arguments]) + format_specifier_count = count_format_specifiers(format_str) + if format_specifier_count != argument_count: + exit_code = 1 + print("{}: Expected {} argument(s) after format string but found {} argument(s): {}".format(f.name, format_specifier_count, argument_count, relevant_function_call_str)) + continue + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/test/lint/lint-format-strings.sh b/test/lint/lint-format-strings.sh new file mode 100755 index 0000000000..c4041d4b3d --- /dev/null +++ b/test/lint/lint-format-strings.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Lint format strings: This program checks that the number of arguments passed +# to a variadic format string function matches the number of format specifiers +# in the format string. + +export LC_ALL=C + +FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS=( + FatalError,0 + fprintf,1 + LogConnectFailure,1 + LogPrint,1 + LogPrintf,0 + printf,0 + snprintf,2 + sprintf,1 + strprintf,0 + vfprintf,1 + vprintf,1 + vsnprintf,1 + vsprintf,1 +) + +EXIT_CODE=0 +if ! python3 -m doctest test/lint/lint-format-strings.py; then + EXIT_CODE=1 +fi +for S in "${FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS[@]}"; do + IFS="," read -r FUNCTION_NAME SKIP_ARGUMENTS <<< "${S}" + mapfile -t MATCHING_FILES < <(git grep --full-name -l "${FUNCTION_NAME}" -- "*.c" "*.cpp" "*.h" | sort | grep -vE "^src/(leveldb|secp256k1|tinyformat|univalue)") + if ! test/lint/lint-format-strings.py --skip-arguments "${SKIP_ARGUMENTS}" "${FUNCTION_NAME}" "${MATCHING_FILES[@]}"; then + EXIT_CODE=1 + fi +done +exit ${EXIT_CODE}