diff --git a/doc/developer-notes.md b/doc/developer-notes.md index f10ad8e877..f4513dca1f 100644 --- a/doc/developer-notes.md +++ b/doc/developer-notes.md @@ -543,7 +543,10 @@ its upstream repository. Current subtrees include: - src/leveldb - - Upstream at https://github.com/google/leveldb ; Maintained by Google, but open important PRs to Core to avoid delay + - Upstream at https://github.com/google/leveldb ; Maintained by Google, but + open important PRs to Core to avoid delay. + - **Note**: Follow the instructions in [Upgrading LevelDB](#upgrading-leveldb) when + merging upstream changes to the leveldb subtree. - src/libsecp256k1 - Upstream at https://github.com/bitcoin-core/secp256k1/ ; actively maintaned by Core contributors. @@ -554,6 +557,52 @@ Current subtrees include: - src/univalue - Upstream at https://github.com/jgarzik/univalue ; report important PRs to Core to avoid delay. +Upgrading LevelDB +--------------------- + +Extra care must be taken when upgrading LevelDB. This section explains issues +you must be aware of. + +### File Descriptor Counts + +In most configurations we use the default LevelDB value for `max_open_files`, +which is 1000 at the time of this writing. If LevelDB actually uses this many +file descriptors it will cause problems with Bitcoin's `select()` loop, because +it may cause new sockets to be created where the fd value is >= 1024. For this +reason, on 64-bit Unix systems we rely on an internal LevelDB optimization that +uses `mmap()` + `close()` to open table files without actually retaining +references to the table file descriptors. If you are upgrading LevelDB, you must +sanity check the changes to make sure that this assumption remains valid. + +In addition to reviewing the upstream changes in `env_posix.cc`, you can use `lsof` to +check this. For example, on Linux this command will show open `.ldb` file counts: + +```bash +$ lsof -p $(pidof bitcoind) |\ + awk 'BEGIN { fd=0; mem=0; } /ldb$/ { if ($4 == "mem") mem++; else fd++ } END { printf "mem = %s, fd = %s\n", mem, fd}' +mem = 119, fd = 0 +``` + +The `mem` value shows how many files are mmap'ed, and the `fd` value shows you +many file descriptors these files are using. You should check that `fd` is a +small number (usually 0 on 64-bit hosts). + +See the notes in the `SetMaxOpenFiles()` function in `dbwrapper.cc` for more +details. + +### Consensus Compatibility + +It is possible for LevelDB changes to inadvertently change consensus +compatibility between nodes. This happened in Bitcoin 0.8 (when LevelDB was +first introduced). When upgrading LevelDB you should review the upstream changes +to check for issues affecting consensus compatibility. + +For example, if LevelDB had a bug that accidentally prevented a key from being +returned in an edge case, and that bug was fixed upstream, the bug "fix" would +be an incompatible consensus change. In this situation the correct behavior +would be to revert the upstream fix before applying the updates to Bitcoin's +copy of LevelDB. In general you should be wary of any upstream changes affecting +what data is returned from LevelDB queries. Git and GitHub tips --------------------- diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index fb0d4215a2..fef8eedd14 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -71,6 +71,31 @@ public: } }; +static void SetMaxOpenFiles(leveldb::Options *options) { + // On most platforms the default setting of max_open_files (which is 1000) + // is optimal. On Windows using a large file count is OK because the handles + // do not interfere with select() loops. On 64-bit Unix hosts this value is + // also OK, because up to that amount LevelDB will use an mmap + // implementation that does not use extra file descriptors (the fds are + // closed after being mmaped). + // + // Increasing the value beyond the default is dangerous because LevelDB will + // fall back to a non-mmap implementation when the file count is too large. + // On 32-bit Unix host we should decrease the value because the handles use + // up real fds, and we want to avoid fd exhaustion issues. + // + // See PR #12495 for further discussion. + + int default_open_files = options->max_open_files; +#ifndef WIN32 + if (sizeof(void*) < 8) { + options->max_open_files = 64; + } +#endif + LogPrint(BCLog::LEVELDB, "LevelDB using max_open_files=%d (default=%d)\n", + options->max_open_files, default_open_files); +} + static leveldb::Options GetOptions(size_t nCacheSize) { leveldb::Options options; @@ -78,13 +103,13 @@ static leveldb::Options GetOptions(size_t nCacheSize) options.write_buffer_size = nCacheSize / 4; // up to two write buffers may be held in memory simultaneously options.filter_policy = leveldb::NewBloomFilterPolicy(10); options.compression = leveldb::kNoCompression; - options.max_open_files = 64; options.info_log = new CBitcoinLevelDBLogger(); if (leveldb::kMajorVersion > 1 || (leveldb::kMajorVersion == 1 && leveldb::kMinorVersion >= 16)) { // LevelDB versions before 1.16 consider short writes to be corruption. Only trigger error // on corruption in later versions. options.paranoid_checks = true; } + SetMaxOpenFiles(&options); return options; }