2022-04-20 09:59:35 -04:00
#!/usr/bin/env python3
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
# whereas no such call is made in bitcoind.
#
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
# specified by the user's LC_ALL (or LC_*) environment variable as the new
# C locale.
#
# In contrast, bitcoind does not opt in to localization -- no call to
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
# thus ignored.
#
# This results in situations where bitcoind is guaranteed to be running
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
# depending on the user's environment variables.
#
# An example: Assuming the environment variable LC_ALL=de_DE then the
# call std::to_string(1.23) will return "1.230000" in bitcoind but
# "1,230000" in bitcoin-qt.
#
# From the Qt documentation:
# "On Unix/Linux Qt is configured to use the system locale settings by default.
# This can cause a conflict when using POSIX functions, for instance, when
# converting between data types such as floats and strings, since the notation
# may differ between locales. To get around this problem, call the POSIX function
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
# or QCoreApplication to reset the locale that is used for number formatting to
# "C"-locale."
#
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.
import re
import sys
from subprocess import check_output , CalledProcessError
KNOWN_VIOLATIONS = [
" src/dbwrapper.cpp:.*vsnprintf " ,
2022-04-20 11:49:58 -04:00
" src/test/fuzz/locale.cpp:.*setlocale " ,
" src/test/fuzz/string.cpp:.*strtol " ,
" src/test/fuzz/string.cpp:.*strtoul " ,
2022-04-28 03:55:45 -04:00
" src/test/util_tests.cpp:.*strtoll " ,
" src/wallet/bdb.cpp:.*DbEnv::strerror " , # False positive
" src/util/syserror.cpp:.*strerror " , # Outside this function use `SysErrorString`
2022-04-20 09:59:35 -04:00
]
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
" src/crypto/ctaes/ " ,
" src/leveldb/ " ,
" src/secp256k1/ " ,
" src/minisketch/ " ,
" src/tinyformat.h " ,
]
LOCALE_DEPENDENT_FUNCTIONS = [
" alphasort " , # LC_COLLATE (via strcoll)
" asctime " , # LC_TIME (directly)
" asprintf " , # (via vasprintf)
" atof " , # LC_NUMERIC (via strtod)
" atoi " , # LC_NUMERIC (via strtol)
" atol " , # LC_NUMERIC (via strtol)
" atoll " , # (via strtoll)
" atoq " ,
" btowc " , # LC_CTYPE (directly)
" ctime " , # (via asctime or localtime)
" dprintf " , # (via vdprintf)
" fgetwc " ,
" fgetws " ,
" fold_case " , # boost::locale::fold_case
" fprintf " , # (via vfprintf)
" fputwc " ,
" fputws " ,
" fscanf " , # (via __vfscanf)
" fwprintf " , # (via __vfwprintf)
" getdate " , # via __getdate_r => isspace // __localtime_r
" getwc " ,
" getwchar " ,
" is_digit " , # boost::algorithm::is_digit
" is_space " , # boost::algorithm::is_space
" isalnum " , # LC_CTYPE
" isalpha " , # LC_CTYPE
" isblank " , # LC_CTYPE
" iscntrl " , # LC_CTYPE
" isctype " , # LC_CTYPE
" isdigit " , # LC_CTYPE
" isgraph " , # LC_CTYPE
" islower " , # LC_CTYPE
" isprint " , # LC_CTYPE
" ispunct " , # LC_CTYPE
" isspace " , # LC_CTYPE
" isupper " , # LC_CTYPE
" iswalnum " , # LC_CTYPE
" iswalpha " , # LC_CTYPE
" iswblank " , # LC_CTYPE
" iswcntrl " , # LC_CTYPE
" iswctype " , # LC_CTYPE
" iswdigit " , # LC_CTYPE
" iswgraph " , # LC_CTYPE
" iswlower " , # LC_CTYPE
" iswprint " , # LC_CTYPE
" iswpunct " , # LC_CTYPE
" iswspace " , # LC_CTYPE
" iswupper " , # LC_CTYPE
" iswxdigit " , # LC_CTYPE
" isxdigit " , # LC_CTYPE
" localeconv " , # LC_NUMERIC + LC_MONETARY
" mblen " , # LC_CTYPE
" mbrlen " ,
" mbrtowc " ,
" mbsinit " ,
" mbsnrtowcs " ,
" mbsrtowcs " ,
" mbstowcs " , # LC_CTYPE
" mbtowc " , # LC_CTYPE
" mktime " ,
" normalize " , # boost::locale::normalize
" printf " , # LC_NUMERIC
" putwc " ,
" putwchar " ,
" scanf " , # LC_NUMERIC
" setlocale " ,
" snprintf " ,
" sprintf " ,
" sscanf " ,
" std::locale::global " ,
" std::to_string " ,
" stod " ,
" stof " ,
" stoi " ,
" stol " ,
" stold " ,
" stoll " ,
" stoul " ,
" stoull " ,
" strcasecmp " ,
" strcasestr " ,
" strcoll " , # LC_COLLATE
2022-04-28 03:55:45 -04:00
" strerror " ,
2022-04-20 09:59:35 -04:00
" strfmon " ,
" strftime " , # LC_TIME
" strncasecmp " ,
" strptime " ,
" strtod " , # LC_NUMERIC
" strtof " ,
" strtoimax " ,
" strtol " , # LC_NUMERIC
" strtold " ,
" strtoll " ,
" strtoq " ,
" strtoul " , # LC_NUMERIC
" strtoull " ,
" strtoumax " ,
" strtouq " ,
" strxfrm " , # LC_COLLATE
" swprintf " ,
" to_lower " , # boost::locale::to_lower
" to_title " , # boost::locale::to_title
" to_upper " , # boost::locale::to_upper
" tolower " , # LC_CTYPE
" toupper " , # LC_CTYPE
" towctrans " ,
" towlower " , # LC_CTYPE
" towupper " , # LC_CTYPE
" trim " , # boost::algorithm::trim
" trim_left " , # boost::algorithm::trim_left
" trim_right " , # boost::algorithm::trim_right
" ungetwc " ,
" vasprintf " ,
" vdprintf " ,
" versionsort " ,
" vfprintf " ,
" vfscanf " ,
" vfwprintf " ,
" vprintf " ,
" vscanf " ,
" vsnprintf " ,
" vsprintf " ,
" vsscanf " ,
" vswprintf " ,
" vwprintf " ,
" wcrtomb " ,
" wcscasecmp " ,
" wcscoll " , # LC_COLLATE
" wcsftime " , # LC_TIME
" wcsncasecmp " ,
" wcsnrtombs " ,
" wcsrtombs " ,
" wcstod " , # LC_NUMERIC
" wcstof " ,
" wcstoimax " ,
" wcstol " , # LC_NUMERIC
" wcstold " ,
" wcstoll " ,
" wcstombs " , # LC_CTYPE
" wcstoul " , # LC_NUMERIC
" wcstoull " ,
" wcstoumax " ,
" wcswidth " ,
" wcsxfrm " , # LC_COLLATE
" wctob " ,
" wctomb " , # LC_CTYPE
" wctrans " ,
" wctype " ,
" wcwidth " ,
" wprintf "
]
def find_locale_dependent_function_uses ( ) :
regexp_locale_dependent_functions = " | " . join ( LOCALE_DEPENDENT_FUNCTIONS )
exclude_args = [ " :(exclude) " + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS ]
2022-04-28 03:55:45 -04:00
git_grep_command = [ " git " , " grep " , " -E " , " [^a-zA-Z0-9_ \\ ` ' \" <>]( " + regexp_locale_dependent_functions + " )(_r|_s)?[^a-zA-Z0-9_ \\ ` ' \" <>] " , " -- " , " *.cpp " , " *.h " ] + exclude_args
2022-04-20 09:59:35 -04:00
git_grep_output = list ( )
try :
2023-01-17 17:46:35 -03:00
git_grep_output = check_output ( git_grep_command , text = True , encoding = " utf8 " ) . splitlines ( )
2022-04-20 09:59:35 -04:00
except CalledProcessError as e :
if e . returncode > 1 :
raise e
return git_grep_output
def main ( ) :
exit_code = 0
regexp_ignore_known_violations = " | " . join ( KNOWN_VIOLATIONS )
git_grep_output = find_locale_dependent_function_uses ( )
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS :
matches = [ line for line in git_grep_output
if re . search ( " [^a-zA-Z0-9_ \\ ` ' \" <>] " + locale_dependent_function + " (_r|_s)?[^a-zA-Z0-9_ \\ ` ' \" <>] " , line )
and not re . search ( " \\ .(c|cpp|h): \\ s*(//| \\ *|/ \\ *| \" ).* " + locale_dependent_function , line )
and not re . search ( regexp_ignore_known_violations , line ) ]
if matches :
print ( f " The locale dependent function { locale_dependent_function } (...) appears to be used: " )
for match in matches :
print ( match )
print ( " " )
exit_code = 1
if exit_code == 1 :
2022-09-14 15:11:45 -03:00
print ( " Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible. \n " )
2022-04-20 09:59:35 -04:00
print ( f " Advice not applicable in this specific case? Add an exception by updating the ignore list in { sys . argv [ 0 ] } " )
sys . exit ( exit_code )
if __name__ == " __main__ " :
main ( )