2019-01-16 13:49:01 -03:00
#!/usr/bin/env python3
2023-10-12 11:46:55 -03:00
# Copyright (c) 2019-present The Bitcoin Core developers
2019-01-16 13:49:01 -03:00
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
""" Run fuzz test targets.
"""
2020-04-17 15:44:29 -04:00
from concurrent . futures import ThreadPoolExecutor , as_completed
2023-06-20 09:58:21 -04:00
from pathlib import Path
2019-01-16 13:49:01 -03:00
import argparse
import configparser
2020-04-17 15:44:29 -04:00
import logging
2019-01-16 13:49:01 -03:00
import os
2024-04-18 05:27:46 -04:00
import platform
2023-07-28 04:44:44 -04:00
import random
2019-01-16 13:49:01 -03:00
import subprocess
2020-04-17 15:44:29 -04:00
import sys
2019-01-16 13:49:01 -03:00
2020-02-19 11:10:22 -03:00
2021-01-26 06:44:32 -03:00
def get_fuzz_env ( * , target , source_dir ) :
2023-11-07 11:05:25 -03:00
symbolizer = os . environ . get ( ' LLVM_SYMBOLIZER_PATH ' , " /usr/bin/llvm-symbolizer " )
2024-04-18 05:27:46 -04:00
fuzz_env = {
2021-01-20 12:48:39 -03:00
' FUZZ ' : target ,
2021-01-26 06:44:32 -03:00
' UBSAN_OPTIONS ' :
f ' suppressions= { source_dir } /test/sanitizer_suppressions/ubsan:print_stacktrace=1:halt_on_error=1:report_error_type=1 ' ,
2024-08-16 09:32:43 -04:00
' UBSAN_SYMBOLIZER_PATH ' : symbolizer ,
" ASAN_OPTIONS " : " detect_leaks=1:detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1 " ,
' ASAN_SYMBOLIZER_PATH ' : symbolizer ,
' MSAN_SYMBOLIZER_PATH ' : symbolizer ,
2021-01-20 12:48:39 -03:00
}
2024-04-18 05:27:46 -04:00
if platform . system ( ) == " Windows " :
# On Windows, `env` option must include valid `SystemRoot`.
fuzz_env = { * * fuzz_env , ' SystemRoot ' : os . environ . get ( ' SystemRoot ' ) }
return fuzz_env
2021-01-20 12:48:39 -03:00
2019-01-16 13:49:01 -03:00
def main ( ) :
2019-02-19 17:46:29 -03:00
parser = argparse . ArgumentParser (
formatter_class = argparse . ArgumentDefaultsHelpFormatter ,
2021-03-08 11:40:17 -03:00
description = ''' Run the fuzz targets with all inputs from the corpus_dir once. ''' ,
2019-02-19 17:46:29 -03:00
)
2019-01-16 13:49:01 -03:00
parser . add_argument (
" -l " ,
" --loglevel " ,
dest = " loglevel " ,
default = " INFO " ,
help = " log events at this level and higher to the console. Can be set to DEBUG, INFO, WARNING, ERROR or CRITICAL. Passing --loglevel DEBUG will output all logs to console. " ,
)
2020-02-16 07:45:05 -03:00
parser . add_argument (
' --valgrind ' ,
action = ' store_true ' ,
2020-02-17 10:34:51 -03:00
help = ' If true, run fuzzing binaries under the valgrind memory error detector ' ,
2020-02-16 07:45:05 -03:00
)
2023-06-20 09:58:21 -04:00
parser . add_argument (
" --empty_min_time " ,
type = int ,
help = " If set, run at least this long, if the existing fuzz inputs directory is empty. " ,
)
2020-02-19 11:10:22 -03:00
parser . add_argument (
' -x ' ,
' --exclude ' ,
help = " A comma-separated list of targets to exclude " ,
)
2020-04-17 15:44:29 -04:00
parser . add_argument (
' --par ' ,
2020-05-09 15:55:29 -04:00
' -j ' ,
2020-04-17 15:44:29 -04:00
type = int ,
default = 4 ,
help = ' How many targets to merge or execute in parallel. ' ,
)
2019-01-16 13:49:01 -03:00
parser . add_argument (
2021-03-08 11:40:17 -03:00
' corpus_dir ' ,
help = ' The corpus to run on (must contain subfolders for each fuzz target). ' ,
2019-01-16 13:49:01 -03:00
)
parser . add_argument (
' target ' ,
nargs = ' * ' ,
help = ' The target(s) to run. Default is to run all targets. ' ,
)
2019-02-19 17:46:29 -03:00
parser . add_argument (
' --m_dir ' ,
2023-10-16 08:20:08 -03:00
action = " append " ,
help = " Merge inputs from these directories into the corpus_dir. " ,
2019-02-19 17:46:29 -03:00
)
2020-08-04 12:42:29 -04:00
parser . add_argument (
' -g ' ,
' --generate ' ,
action = ' store_true ' ,
2021-03-08 11:40:17 -03:00
help = ' Create new corpus (or extend the existing ones) by running '
2020-08-04 12:42:29 -04:00
' the given targets for a finite number of times. Outputs them to '
2021-03-08 11:40:17 -03:00
' the passed corpus_dir. '
2020-08-04 12:42:29 -04:00
)
2019-01-16 13:49:01 -03:00
args = parser . parse_args ( )
2023-06-20 09:58:21 -04:00
args . corpus_dir = Path ( args . corpus_dir )
2019-01-16 13:49:01 -03:00
# Set up logging
logging . basicConfig (
format = ' %(message)s ' ,
level = int ( args . loglevel ) if args . loglevel . isdigit ( ) else args . loglevel . upper ( ) ,
)
# Read config generated by configure.
config = configparser . ConfigParser ( )
configfile = os . path . abspath ( os . path . dirname ( __file__ ) ) + " /../config.ini "
config . read_file ( open ( configfile , encoding = " utf8 " ) )
2023-06-22 05:49:28 -04:00
if not config [ " components " ] . getboolean ( " ENABLE_FUZZ_BINARY " ) :
logging . error ( " Must have fuzz executable built " )
2019-01-16 13:49:01 -03:00
sys . exit ( 1 )
2024-04-06 12:06:52 -03:00
fuzz_bin = os . getenv ( " BITCOINFUZZ " , default = os . path . join ( config [ " environment " ] [ " BUILDDIR " ] , ' src ' , ' test ' , ' fuzz ' , ' fuzz ' ) )
2024-04-06 12:03:39 -03:00
2019-01-16 13:49:01 -03:00
# Build list of tests
2024-03-07 07:18:32 -03:00
test_list_all = parse_test_list (
2024-04-06 12:03:39 -03:00
fuzz_bin = fuzz_bin ,
2024-03-07 07:18:32 -03:00
source_dir = config [ ' environment ' ] [ ' SRCDIR ' ] ,
)
2019-01-16 13:49:01 -03:00
if not test_list_all :
logging . error ( " No fuzz targets found " )
sys . exit ( 1 )
2020-02-19 11:10:22 -03:00
logging . debug ( " {} fuzz target(s) found: {} " . format ( len ( test_list_all ) , " " . join ( sorted ( test_list_all ) ) ) )
2019-01-16 13:49:01 -03:00
args . target = args . target or test_list_all # By default run all
test_list_error = list ( set ( args . target ) . difference ( set ( test_list_all ) ) )
if test_list_error :
logging . error ( " Unknown fuzz targets selected: {} " . format ( test_list_error ) )
test_list_selection = list ( set ( test_list_all ) . intersection ( set ( args . target ) ) )
if not test_list_selection :
logging . error ( " No fuzz targets selected " )
2020-02-19 11:10:22 -03:00
if args . exclude :
for excluded_target in args . exclude . split ( " , " ) :
if excluded_target not in test_list_selection :
logging . error ( " Target \" {} \" not found in current target list. " . format ( excluded_target ) )
continue
test_list_selection . remove ( excluded_target )
test_list_selection . sort ( )
logging . info ( " {} of {} detected fuzz target(s) selected: {} " . format ( len ( test_list_selection ) , len ( test_list_all ) , " " . join ( test_list_selection ) ) )
2019-01-16 13:49:01 -03:00
2020-08-04 12:42:29 -04:00
if not args . generate :
2021-03-08 11:40:17 -03:00
test_list_missing_corpus = [ ]
2020-08-04 12:42:29 -04:00
for t in test_list_selection :
2021-03-08 11:40:17 -03:00
corpus_path = os . path . join ( args . corpus_dir , t )
2020-08-04 12:42:29 -04:00
if not os . path . exists ( corpus_path ) or len ( os . listdir ( corpus_path ) ) == 0 :
2021-03-08 11:40:17 -03:00
test_list_missing_corpus . append ( t )
test_list_missing_corpus . sort ( )
if test_list_missing_corpus :
2020-08-04 12:42:29 -04:00
logging . info (
2021-03-08 11:40:17 -03:00
" Fuzzing harnesses lacking a corpus: {} " . format (
" " . join ( test_list_missing_corpus )
2020-08-04 12:42:29 -04:00
)
2020-03-09 14:20:52 -03:00
)
2021-03-08 11:40:17 -03:00
logging . info ( " Please consider adding a fuzz corpus at https://github.com/bitcoin-core/qa-assets " )
2020-03-09 14:20:52 -03:00
2019-02-14 17:49:13 -03:00
try :
help_output = subprocess . run (
args = [
2024-04-06 12:03:39 -03:00
fuzz_bin ,
2019-02-14 17:49:13 -03:00
' -help=1 ' ,
] ,
2021-01-26 06:44:32 -03:00
env = get_fuzz_env ( target = test_list_selection [ 0 ] , source_dir = config [ ' environment ' ] [ ' SRCDIR ' ] ) ,
2020-03-02 18:26:49 -03:00
timeout = 20 ,
2023-06-22 05:49:28 -04:00
check = False ,
2019-02-14 17:49:13 -03:00
stderr = subprocess . PIPE ,
2023-01-17 17:46:35 -03:00
text = True ,
2019-02-14 17:49:13 -03:00
) . stderr
2023-06-22 05:49:28 -04:00
using_libfuzzer = " libFuzzer " in help_output
if ( args . generate or args . m_dir ) and not using_libfuzzer :
2019-02-14 17:49:13 -03:00
logging . error ( " Must be built with libFuzzer " )
sys . exit ( 1 )
except subprocess . TimeoutExpired :
logging . error ( " subprocess timed out: Currently only libFuzzer is supported " )
2019-01-16 13:49:01 -03:00
sys . exit ( 1 )
2020-04-17 15:44:29 -04:00
with ThreadPoolExecutor ( max_workers = args . par ) as fuzz_pool :
2020-08-04 12:42:29 -04:00
if args . generate :
2021-03-08 11:40:17 -03:00
return generate_corpus (
2020-08-04 12:42:29 -04:00
fuzz_pool = fuzz_pool ,
2021-01-26 06:44:32 -03:00
src_dir = config [ ' environment ' ] [ ' SRCDIR ' ] ,
2024-04-06 12:03:39 -03:00
fuzz_bin = fuzz_bin ,
2021-03-08 11:40:17 -03:00
corpus_dir = args . corpus_dir ,
2020-08-04 12:42:29 -04:00
targets = test_list_selection ,
)
2020-04-17 15:44:29 -04:00
if args . m_dir :
merge_inputs (
fuzz_pool = fuzz_pool ,
2021-03-08 11:40:17 -03:00
corpus = args . corpus_dir ,
2020-04-17 15:44:29 -04:00
test_list = test_list_selection ,
2021-01-26 06:44:32 -03:00
src_dir = config [ ' environment ' ] [ ' SRCDIR ' ] ,
2024-04-06 12:03:39 -03:00
fuzz_bin = fuzz_bin ,
2023-10-16 08:20:08 -03:00
merge_dirs = [ Path ( m_dir ) for m_dir in args . m_dir ] ,
2020-04-17 15:44:29 -04:00
)
return
run_once (
fuzz_pool = fuzz_pool ,
2021-03-08 11:40:17 -03:00
corpus = args . corpus_dir ,
2019-02-19 17:46:29 -03:00
test_list = test_list_selection ,
2021-01-26 06:44:32 -03:00
src_dir = config [ ' environment ' ] [ ' SRCDIR ' ] ,
2024-04-06 12:03:39 -03:00
fuzz_bin = fuzz_bin ,
2023-06-22 05:49:28 -04:00
using_libfuzzer = using_libfuzzer ,
2020-04-17 15:44:29 -04:00
use_valgrind = args . valgrind ,
2023-06-20 09:58:21 -04:00
empty_min_time = args . empty_min_time ,
2019-02-19 17:46:29 -03:00
)
2019-01-16 13:49:01 -03:00
2023-07-11 09:48:42 -04:00
def transform_process_message_target ( targets , src_dir ) :
""" Add a target per process message, and also keep ( " process_message " , {} ) to allow for
cross - pollination , or unlimited search """
p2p_msg_target = " process_message "
if ( p2p_msg_target , { } ) in targets :
lines = subprocess . run (
2023-11-16 08:34:27 -03:00
[ " git " , " grep " , " --function-context " , " ALL_NET_MESSAGE_TYPES { " , src_dir / " src " / " protocol.h " ] ,
2023-07-11 09:48:42 -04:00
check = True ,
stdout = subprocess . PIPE ,
text = True ,
) . stdout . splitlines ( )
2023-11-16 08:34:27 -03:00
lines = [ l . split ( " :: " , 1 ) [ 1 ] . split ( " , " ) [ 0 ] . lower ( ) for l in lines if l . startswith ( " src/protocol.h- NetMsgType:: " ) ]
2023-07-11 09:48:42 -04:00
assert len ( lines )
targets + = [ ( p2p_msg_target , { " LIMIT_TO_MESSAGE_TYPE " : m } ) for m in lines ]
return targets
def transform_rpc_target ( targets , src_dir ) :
""" Add a target per RPC command, and also keep ( " rpc " , {} ) to allow for cross-pollination,
or unlimited search """
2020-08-04 12:42:29 -04:00
2023-06-27 10:13:05 -04:00
rpc_target = " rpc "
2023-07-11 09:48:42 -04:00
if ( rpc_target , { } ) in targets :
2023-06-27 10:13:05 -04:00
lines = subprocess . run (
2023-07-11 09:48:42 -04:00
[ " git " , " grep " , " --function-context " , " RPC_COMMANDS_SAFE_FOR_FUZZING { " , src_dir / " src " / " test " / " fuzz " / " rpc.cpp " ] ,
2023-06-27 10:13:05 -04:00
check = True ,
stdout = subprocess . PIPE ,
text = True ,
) . stdout . splitlines ( )
lines = [ l . split ( " \" " , 1 ) [ 1 ] . split ( " \" " ) [ 0 ] for l in lines if l . startswith ( " src/test/fuzz/rpc.cpp- \" " ) ]
2023-07-11 09:48:42 -04:00
assert len ( lines )
2023-06-27 10:13:05 -04:00
targets + = [ ( rpc_target , { " LIMIT_TO_RPC_COMMAND " : r } ) for r in lines ]
2023-07-11 09:48:42 -04:00
return targets
2024-04-06 12:03:39 -03:00
def generate_corpus ( * , fuzz_pool , src_dir , fuzz_bin , corpus_dir , targets ) :
2023-07-11 09:48:42 -04:00
""" Generates new corpus.
Run { targets } without input , and outputs the generated corpus to
{ corpus_dir } .
"""
logging . info ( " Generating corpus to {} " . format ( corpus_dir ) )
targets = [ ( t , { } ) for t in targets ] # expand to add dictionary for target-specific env variables
targets = transform_process_message_target ( targets , Path ( src_dir ) )
targets = transform_rpc_target ( targets , Path ( src_dir ) )
2020-08-04 12:42:29 -04:00
2023-06-27 10:13:05 -04:00
def job ( command , t , t_env ) :
logging . debug ( f " Running ' { command } ' " )
2020-08-04 12:42:29 -04:00
logging . debug ( " Command ' {} ' output: \n ' {} ' \n " . format (
2023-06-27 10:13:05 -04:00
command ,
2020-12-03 12:42:49 -03:00
subprocess . run (
command ,
2023-06-27 10:13:05 -04:00
env = {
* * t_env ,
* * get_fuzz_env ( target = t , source_dir = src_dir ) ,
} ,
2020-12-03 12:42:49 -03:00
check = True ,
stderr = subprocess . PIPE ,
2023-01-17 17:46:35 -03:00
text = True ,
2023-06-27 10:13:05 -04:00
) . stderr ,
) )
2020-08-04 12:42:29 -04:00
futures = [ ]
2023-06-27 10:13:05 -04:00
for target , t_env in targets :
target_corpus_dir = corpus_dir / target
2021-03-08 11:40:17 -03:00
os . makedirs ( target_corpus_dir , exist_ok = True )
2023-07-28 04:44:44 -04:00
use_value_profile = int ( random . random ( ) < .3 )
2020-08-04 12:42:29 -04:00
command = [
2024-04-06 12:03:39 -03:00
fuzz_bin ,
2024-02-05 12:25:57 -03:00
" -rss_limit_mb=8000 " ,
2023-07-28 04:44:44 -04:00
" -max_total_time=6000 " ,
" -reload=0 " ,
f " -use_value_profile= { use_value_profile } " ,
2021-03-08 11:40:17 -03:00
target_corpus_dir ,
2020-08-04 12:42:29 -04:00
]
2023-06-27 10:13:05 -04:00
futures . append ( fuzz_pool . submit ( job , command , target , t_env ) )
2020-08-04 12:42:29 -04:00
for future in as_completed ( futures ) :
future . result ( )
2024-04-06 12:03:39 -03:00
def merge_inputs ( * , fuzz_pool , corpus , test_list , src_dir , fuzz_bin , merge_dirs ) :
2023-10-16 08:20:08 -03:00
logging . info ( f " Merge the inputs from the passed dir into the corpus_dir. Passed dirs { merge_dirs } " )
2020-04-17 15:44:29 -04:00
jobs = [ ]
2019-02-19 17:46:29 -03:00
for t in test_list :
args = [
2024-04-06 12:03:39 -03:00
fuzz_bin ,
2023-10-20 13:18:31 -03:00
' -rss_limit_mb=8000 ' ,
2023-10-13 11:57:11 -03:00
' -set_cover_merge=1 ' ,
# set_cover_merge is used instead of -merge=1 to reduce the overall
# size of the qa-assets git repository a bit, but more importantly,
# to cut the runtime to iterate over all fuzz inputs [0].
# [0] https://github.com/bitcoin-core/qa-assets/issues/130#issuecomment-1761760866
2021-01-28 10:54:53 -03:00
' -shuffle=0 ' ,
' -prefer_small=1 ' ,
2023-10-12 11:46:55 -03:00
' -use_value_profile=0 ' ,
# use_value_profile is enabled by oss-fuzz [0], but disabled for
# now to avoid bloating the qa-assets git repository [1].
# [0] https://github.com/google/oss-fuzz/issues/1406#issuecomment-387790487
# [1] https://github.com/bitcoin-core/qa-assets/issues/130#issuecomment-1749075891
2019-02-19 17:46:29 -03:00
os . path . join ( corpus , t ) ,
2023-10-16 08:20:08 -03:00
] + [ str ( m_dir / t ) for m_dir in merge_dirs ]
2019-02-19 17:46:29 -03:00
os . makedirs ( os . path . join ( corpus , t ) , exist_ok = True )
2023-10-16 08:20:08 -03:00
for m_dir in merge_dirs :
( m_dir / t ) . mkdir ( exist_ok = True )
2019-02-19 17:46:29 -03:00
2020-04-17 15:44:29 -04:00
def job ( t , args ) :
output = ' Run {} with args {} \n ' . format ( t , " " . join ( args ) )
2020-12-03 12:42:49 -03:00
output + = subprocess . run (
args ,
2021-01-26 06:44:32 -03:00
env = get_fuzz_env ( target = t , source_dir = src_dir ) ,
2020-12-03 12:42:49 -03:00
check = True ,
stderr = subprocess . PIPE ,
2023-01-17 17:46:35 -03:00
text = True ,
2020-12-03 12:42:49 -03:00
) . stderr
2020-04-17 15:44:29 -04:00
logging . debug ( output )
jobs . append ( fuzz_pool . submit ( job , t , args ) )
for future in as_completed ( jobs ) :
future . result ( )
2019-02-19 17:46:29 -03:00
2020-04-17 15:44:29 -04:00
2024-04-06 12:03:39 -03:00
def run_once ( * , fuzz_pool , corpus , test_list , src_dir , fuzz_bin , using_libfuzzer , use_valgrind , empty_min_time ) :
2020-04-17 15:44:29 -04:00
jobs = [ ]
2019-01-16 13:49:01 -03:00
for t in test_list :
2023-06-20 09:58:21 -04:00
corpus_path = corpus / t
2020-03-09 14:20:52 -03:00
os . makedirs ( corpus_path , exist_ok = True )
2019-01-16 13:49:01 -03:00
args = [
2024-04-06 12:03:39 -03:00
fuzz_bin ,
2019-01-16 13:49:01 -03:00
]
2023-06-20 09:58:21 -04:00
empty_dir = not any ( corpus_path . iterdir ( ) )
2023-06-22 05:49:28 -04:00
if using_libfuzzer :
if empty_min_time and empty_dir :
args + = [ f " -max_total_time= { empty_min_time } " ]
else :
args + = [
" -runs=1 " ,
corpus_path ,
]
2023-06-20 09:58:21 -04:00
else :
2023-06-22 05:49:28 -04:00
args + = [ corpus_path ]
2020-02-16 07:45:05 -03:00
if use_valgrind :
2020-02-17 10:34:51 -03:00
args = [ ' valgrind ' , ' --quiet ' , ' --error-exitcode=1 ' ] + args
2020-04-17 15:44:29 -04:00
def job ( t , args ) :
output = ' Run {} with args {} ' . format ( t , args )
2021-01-20 12:48:39 -03:00
result = subprocess . run (
args ,
2021-01-26 06:44:32 -03:00
env = get_fuzz_env ( target = t , source_dir = src_dir ) ,
2021-01-20 12:48:39 -03:00
stderr = subprocess . PIPE ,
2023-01-17 17:46:35 -03:00
text = True ,
2021-01-20 12:48:39 -03:00
)
2020-04-17 15:44:29 -04:00
output + = result . stderr
2024-01-26 13:29:26 -03:00
return output , result , t
2020-04-17 15:44:29 -04:00
jobs . append ( fuzz_pool . submit ( job , t , args ) )
2024-01-26 13:29:26 -03:00
stats = [ ]
2020-04-17 15:44:29 -04:00
for future in as_completed ( jobs ) :
2024-01-26 13:29:26 -03:00
output , result , target = future . result ( )
2020-04-17 15:44:29 -04:00
logging . debug ( output )
2020-02-19 11:27:19 -03:00
try :
result . check_returncode ( )
except subprocess . CalledProcessError as e :
if e . stdout :
logging . info ( e . stdout )
if e . stderr :
logging . info ( e . stderr )
2024-12-13 10:36:01 -03:00
logging . info ( f " ⚠️ Failure generated from target with exit code { e . returncode } : { result . args } " )
2020-02-19 11:27:19 -03:00
sys . exit ( 1 )
2024-12-13 10:36:01 -03:00
if using_libfuzzer :
done_stat = [ l for l in output . splitlines ( ) if " DONE " in l ]
assert len ( done_stat ) == 1
stats . append ( ( target , done_stat [ 0 ] ) )
2019-01-16 13:49:01 -03:00
2024-01-26 13:29:26 -03:00
if using_libfuzzer :
print ( " Summary: " )
max_len = max ( len ( t [ 0 ] ) for t in stats )
for t , s in sorted ( stats ) :
t = t . ljust ( max_len + 1 )
print ( f " { t } { s } " )
2019-01-16 13:49:01 -03:00
2024-03-07 07:18:32 -03:00
def parse_test_list ( * , fuzz_bin , source_dir ) :
2020-12-03 12:42:49 -03:00
test_list_all = subprocess . run (
fuzz_bin ,
env = {
2024-03-07 07:18:32 -03:00
' PRINT_ALL_FUZZ_TARGETS_AND_ABORT ' : ' ' ,
* * get_fuzz_env ( target = " " , source_dir = source_dir )
2020-12-03 12:42:49 -03:00
} ,
stdout = subprocess . PIPE ,
2023-01-17 17:46:35 -03:00
text = True ,
2024-01-24 07:50:53 -03:00
check = True ,
2020-12-03 12:42:49 -03:00
) . stdout . splitlines ( )
2019-01-16 13:49:01 -03:00
return test_list_all
if __name__ == ' __main__ ' :
main ( )