#!/usr/bin/env python3
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
# whereas no such call is made in bitcoind.
#
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
# specified by the user's LC_ALL (or LC_*) environment variable as the new
# C locale.
#
# In contrast, bitcoind does not opt in to localization -- no call to
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
# thus ignored.
#
# This results in situations where bitcoind is guaranteed to be running
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
# depending on the user's environment variables.
#
# An example: Assuming the environment variable LC_ALL=de_DE then the
# call std::to_string(1.23) will return "1.230000" in bitcoind but
# "1,230000" in bitcoin-qt.
#
# From the Qt documentation:
# "On Unix/Linux Qt is configured to use the system locale settings by default.
#  This can cause a conflict when using POSIX functions, for instance, when
#  converting between data types such as floats and strings, since the notation
#  may differ between locales. To get around this problem, call the POSIX function
#  setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
#  or QCoreApplication to reset the locale that is used for number formatting to
#  "C"-locale."
#
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.

import re
import sys

from subprocess import check_output, CalledProcessError


KNOWN_VIOLATIONS = [
    "src/dbwrapper.cpp:.*vsnprintf",
    "src/test/fuzz/locale.cpp:.*setlocale",
    "src/test/util_tests.cpp:.*strtoll",
    "src/wallet/bdb.cpp:.*DbEnv::strerror",  # False positive
    "src/util/syserror.cpp:.*strerror",      # Outside this function use `SysErrorString`
]

REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
    "src/crypto/ctaes/",
    "src/leveldb/",
    "src/secp256k1/",
    "src/minisketch/",
    "src/tinyformat.h",
]

LOCALE_DEPENDENT_FUNCTIONS = [
    "alphasort",    # LC_COLLATE (via strcoll)
    "asctime",      # LC_TIME (directly)
    "asprintf",     # (via vasprintf)
    "atof",         # LC_NUMERIC (via strtod)
    "atoi",         # LC_NUMERIC (via strtol)
    "atol",         # LC_NUMERIC (via strtol)
    "atoll",        # (via strtoll)
    "atoq",
    "btowc",        # LC_CTYPE (directly)
    "ctime",        # (via asctime or localtime)
    "dprintf",      # (via vdprintf)
    "fgetwc",
    "fgetws",
    "fold_case",    # boost::locale::fold_case
    "fprintf",      # (via vfprintf)
    "fputwc",
    "fputws",
    "fscanf",       # (via __vfscanf)
    "fwprintf",     # (via __vfwprintf)
    "getdate",      # via __getdate_r => isspace // __localtime_r
    "getwc",
    "getwchar",
    "is_digit",     # boost::algorithm::is_digit
    "is_space",     # boost::algorithm::is_space
    "isalnum",      # LC_CTYPE
    "isalpha",      # LC_CTYPE
    "isblank",      # LC_CTYPE
    "iscntrl",      # LC_CTYPE
    "isctype",      # LC_CTYPE
    "isdigit",      # LC_CTYPE
    "isgraph",      # LC_CTYPE
    "islower",      # LC_CTYPE
    "isprint",      # LC_CTYPE
    "ispunct",      # LC_CTYPE
    "isspace",      # LC_CTYPE
    "isupper",      # LC_CTYPE
    "iswalnum",     # LC_CTYPE
    "iswalpha",     # LC_CTYPE
    "iswblank",     # LC_CTYPE
    "iswcntrl",     # LC_CTYPE
    "iswctype",     # LC_CTYPE
    "iswdigit",     # LC_CTYPE
    "iswgraph",     # LC_CTYPE
    "iswlower",     # LC_CTYPE
    "iswprint",     # LC_CTYPE
    "iswpunct",     # LC_CTYPE
    "iswspace",     # LC_CTYPE
    "iswupper",     # LC_CTYPE
    "iswxdigit",    # LC_CTYPE
    "isxdigit",     # LC_CTYPE
    "localeconv",   # LC_NUMERIC + LC_MONETARY
    "mblen",        # LC_CTYPE
    "mbrlen",
    "mbrtowc",
    "mbsinit",
    "mbsnrtowcs",
    "mbsrtowcs",
    "mbstowcs",     # LC_CTYPE
    "mbtowc",       # LC_CTYPE
    "mktime",
    "normalize",    # boost::locale::normalize
    "printf",       # LC_NUMERIC
    "putwc",
    "putwchar",
    "scanf",        # LC_NUMERIC
    "setlocale",
    "snprintf",
    "sprintf",
    "sscanf",
    "std::locale::global",
    "std::to_string",
    "stod",
    "stof",
    "stoi",
    "stol",
    "stold",
    "stoll",
    "stoul",
    "stoull",
    "strcasecmp",
    "strcasestr",
    "strcoll",      # LC_COLLATE
    "strerror",
    "strfmon",
    "strftime",     # LC_TIME
    "strncasecmp",
    "strptime",
    "strtod",       # LC_NUMERIC
    "strtof",
    "strtoimax",
    "strtol",       # LC_NUMERIC
    "strtold",
    "strtoll",
    "strtoq",
    "strtoul",      # LC_NUMERIC
    "strtoull",
    "strtoumax",
    "strtouq",
    "strxfrm",      # LC_COLLATE
    "swprintf",
    "to_lower",     # boost::locale::to_lower
    "to_title",     # boost::locale::to_title
    "to_upper",     # boost::locale::to_upper
    "tolower",      # LC_CTYPE
    "toupper",      # LC_CTYPE
    "towctrans",
    "towlower",     # LC_CTYPE
    "towupper",     # LC_CTYPE
    "trim",         # boost::algorithm::trim
    "trim_left",    # boost::algorithm::trim_left
    "trim_right",   # boost::algorithm::trim_right
    "ungetwc",
    "vasprintf",
    "vdprintf",
    "versionsort",
    "vfprintf",
    "vfscanf",
    "vfwprintf",
    "vprintf",
    "vscanf",
    "vsnprintf",
    "vsprintf",
    "vsscanf",
    "vswprintf",
    "vwprintf",
    "wcrtomb",
    "wcscasecmp",
    "wcscoll",      # LC_COLLATE
    "wcsftime",     # LC_TIME
    "wcsncasecmp",
    "wcsnrtombs",
    "wcsrtombs",
    "wcstod",       # LC_NUMERIC
    "wcstof",
    "wcstoimax",
    "wcstol",       # LC_NUMERIC
    "wcstold",
    "wcstoll",
    "wcstombs",     # LC_CTYPE
    "wcstoul",      # LC_NUMERIC
    "wcstoull",
    "wcstoumax",
    "wcswidth",
    "wcsxfrm",      # LC_COLLATE
    "wctob",
    "wctomb",       # LC_CTYPE
    "wctrans",
    "wctype",
    "wcwidth",
    "wprintf"
]


def find_locale_dependent_function_uses():
    regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
    exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
    git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" +  regexp_locale_dependent_functions + ")(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
    git_grep_output = list()

    try:
        git_grep_output = check_output(git_grep_command, text=True, encoding="utf8").splitlines()
    except CalledProcessError as e:
        if e.returncode > 1:
            raise e

    return git_grep_output


def main():
    exit_code = 0

    regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
    git_grep_output = find_locale_dependent_function_uses()

    for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
        matches =  [line for line in git_grep_output
                    if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
                    and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
                    and not re.search(regexp_ignore_known_violations, line)]
        if matches:
            print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
            for match in matches:
                print(match)
            print("")
            exit_code = 1

    if exit_code == 1:
        print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n")
        print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")

    sys.exit(exit_code)


if __name__ == "__main__":
    main()