Merge bitcoin/bitcoin#30884: streams: cache file position within AutoFile
Some checks are pending
CI / Win64 native, VS 2022 (push) Waiting to run
CI / ASan + LSan + UBSan + integer, no depends, USDT (push) Waiting to run
CI / test each commit (push) Waiting to run
CI / macOS 13 native, x86_64, no depends, sqlite only, gui (push) Waiting to run

a240e150e8 streams: remove AutoFile::Get() entirely (Pieter Wuille)
e624a9bef1 streams: cache file position within AutoFile (Pieter Wuille)

Pull request description:

  Fixes #30833.

  Instead of relying on frequent `ftell` calls (which appear to cause a significant slowdown on some systems) in XOR-enabled `AutoFile`s, cache the file position within `AutoFile` itself.

ACKs for top commit:
  achow101:
    ACK a240e150e8
  davidgumberg:
    untested reACK a240e150e8
  theStack:
    Code-review ACK a240e150e8

Tree-SHA512: fd3681edc018afaf955dc7a41a0c953ca80d46c1129e3c5b306c87c95aae93b2fe7b900794eb8b6f10491f9211645e7939918a28838295e6873eb226fca7006f
This commit is contained in:
Ava Chow 2024-09-16 23:09:16 -04:00
commit 9f1aa88d4d
No known key found for this signature in database
GPG key ID: 17565732E08E5E41
12 changed files with 75 additions and 49 deletions

View file

@ -73,7 +73,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
remove(pathTmp);
return false;
}
if (!FileCommit(fileout.Get())) {
if (!fileout.Commit()) {
fileout.fclose();
remove(pathTmp);
LogError("%s: Failed to flush file %s\n", __func__, fs::PathToString(pathTmp));

View file

@ -19,7 +19,7 @@ static void FindByte(benchmark::Bench& bench)
uint8_t data[file_size] = {0};
data[file_size-1] = 1;
file << data;
std::rewind(file.Get());
file.seek(0, SEEK_SET);
BufferedFile bf{file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size};
bench.run([&] {

View file

@ -151,7 +151,7 @@ bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
LogError("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return false;
}
if (!FileCommit(file.Get())) {
if (!file.Commit()) {
LogError("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return false;
}
@ -201,11 +201,11 @@ size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter&
LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return 0;
}
if (!TruncateFile(last_file.Get(), pos.nPos)) {
if (!last_file.Truncate(pos.nPos)) {
LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
return 0;
}
if (!FileCommit(last_file.Get())) {
if (!last_file.Commit()) {
LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return 0;
}

View file

@ -87,10 +87,7 @@ bool TxIndex::FindTx(const uint256& tx_hash, uint256& block_hash, CTransactionRe
CBlockHeader header;
try {
file >> header;
if (fseek(file.Get(), postx.nTxOffset, SEEK_CUR)) {
LogError("%s: fseek(...) failed\n", __func__);
return false;
}
file.seek(postx.nTxOffset, SEEK_CUR);
file >> TX_WITH_WITNESS(tx);
} catch (const std::exception& e) {
LogError("%s: Deserialize or I/O error - %s\n", __func__, e.what());

View file

@ -683,7 +683,7 @@ bool BlockManager::UndoWriteToDisk(const CBlockUndo& blockundo, FlatFilePos& pos
fileout << GetParams().MessageStart() << nSize;
// Write undo data
long fileOutPos = ftell(fileout.Get());
long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;
@ -981,7 +981,7 @@ bool BlockManager::WriteBlockToDisk(const CBlock& block, FlatFilePos& pos) const
fileout << GetParams().MessageStart() << nSize;
// Write block
long fileOutPos = ftell(fileout.Get());
long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;

View file

@ -199,8 +199,8 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock
LogInfo("Writing %d unbroadcast transactions to file.\n", unbroadcast_txids.size());
file << unbroadcast_txids;
if (!skip_file_commit && !FileCommit(file.Get()))
throw std::runtime_error("FileCommit failed");
if (!skip_file_commit && !file.Commit())
throw std::runtime_error("Commit failed");
file.fclose();
if (!RenameOver(dump_path + ".new", dump_path)) {
throw std::runtime_error("Rename failed");

View file

@ -73,9 +73,11 @@ std::optional<uint256> ReadSnapshotBaseBlockhash(fs::path chaindir)
}
afile >> base_blockhash;
if (std::fgetc(afile.Get()) != EOF) {
int64_t position = afile.tell();
afile.seek(0, SEEK_END);
if (position != afile.tell()) {
LogPrintf("[snapshot] warning: unexpected trailing data in %s\n", read_from_str);
} else if (std::ferror(afile.Get())) {
} else if (afile.IsError()) {
LogPrintf("[snapshot] warning: i/o error reading %s\n", read_from_str);
}
return base_blockhash;

View file

@ -4,21 +4,29 @@
#include <span.h>
#include <streams.h>
#include <util/fs_helpers.h>
#include <array>
AutoFile::AutoFile(std::FILE* file, std::vector<std::byte> data_xor)
: m_file{file}, m_xor{std::move(data_xor)}
{
if (!IsNull()) {
auto pos{std::ftell(m_file)};
if (pos >= 0) m_position = pos;
}
}
std::size_t AutoFile::detail_fread(Span<std::byte> dst)
{
if (!m_file) throw std::ios_base::failure("AutoFile::read: file handle is nullptr");
if (m_xor.empty()) {
return std::fread(dst.data(), 1, dst.size(), m_file);
} else {
const auto init_pos{std::ftell(m_file)};
if (init_pos < 0) throw std::ios_base::failure("AutoFile::read: ftell failed");
std::size_t ret{std::fread(dst.data(), 1, dst.size(), m_file)};
util::Xor(dst.subspan(0, ret), m_xor, init_pos);
return ret;
size_t ret = std::fread(dst.data(), 1, dst.size(), m_file);
if (!m_xor.empty()) {
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::read: position unknown");
util::Xor(dst.subspan(0, ret), m_xor, *m_position);
}
if (m_position.has_value()) *m_position += ret;
return ret;
}
void AutoFile::seek(int64_t offset, int origin)
@ -29,18 +37,23 @@ void AutoFile::seek(int64_t offset, int origin)
if (std::fseek(m_file, offset, origin) != 0) {
throw std::ios_base::failure(feof() ? "AutoFile::seek: end of file" : "AutoFile::seek: fseek failed");
}
if (origin == SEEK_SET) {
m_position = offset;
} else if (origin == SEEK_CUR && m_position.has_value()) {
*m_position += offset;
} else {
int64_t r{std::ftell(m_file)};
if (r < 0) {
throw std::ios_base::failure("AutoFile::seek: ftell failed");
}
m_position = r;
}
}
int64_t AutoFile::tell()
{
if (IsNull()) {
throw std::ios_base::failure("AutoFile::tell: file handle is nullptr");
}
int64_t r{std::ftell(m_file)};
if (r < 0) {
throw std::ios_base::failure("AutoFile::tell: ftell failed");
}
return r;
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::tell: position unknown");
return *m_position;
}
void AutoFile::read(Span<std::byte> dst)
@ -60,6 +73,7 @@ void AutoFile::ignore(size_t nSize)
throw std::ios_base::failure(feof() ? "AutoFile::ignore: end of file" : "AutoFile::ignore: fread failed");
}
nSize -= nNow;
if (m_position.has_value()) *m_position += nNow;
}
}
@ -70,19 +84,34 @@ void AutoFile::write(Span<const std::byte> src)
if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
throw std::ios_base::failure("AutoFile::write: write failed");
}
if (m_position.has_value()) *m_position += src.size();
} else {
auto current_pos{std::ftell(m_file)};
if (current_pos < 0) throw std::ios_base::failure("AutoFile::write: ftell failed");
if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown");
std::array<std::byte, 4096> buf;
while (src.size() > 0) {
auto buf_now{Span{buf}.first(std::min<size_t>(src.size(), buf.size()))};
std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin());
util::Xor(buf_now, m_xor, current_pos);
util::Xor(buf_now, m_xor, *m_position);
if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) {
throw std::ios_base::failure{"XorFile::write: failed"};
}
src = src.subspan(buf_now.size());
current_pos += buf_now.size();
*m_position += buf_now.size();
}
}
}
bool AutoFile::Commit()
{
return ::FileCommit(m_file);
}
bool AutoFile::IsError()
{
return ferror(m_file);
}
bool AutoFile::Truncate(unsigned size)
{
return ::TruncateFile(m_file, size);
}

View file

@ -390,9 +390,10 @@ class AutoFile
protected:
std::FILE* m_file;
std::vector<std::byte> m_xor;
std::optional<int64_t> m_position;
public:
explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={}) : m_file{file}, m_xor{std::move(data_xor)} {}
explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={});
~AutoFile() { fclose(); }
@ -419,12 +420,6 @@ public:
return ret;
}
/** Get wrapped FILE* without transfer of ownership.
* @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
* AutoFile outlives use of the passed pointer.
*/
std::FILE* Get() const { return m_file; }
/** Return true if the wrapped FILE* is nullptr, false otherwise.
*/
bool IsNull() const { return m_file == nullptr; }
@ -458,6 +453,10 @@ public:
::Unserialize(*this, obj);
return *this;
}
bool Commit();
bool IsError();
bool Truncate(unsigned size);
};
/** Wrapper around an AutoFile& that implements a ring buffer to

View file

@ -56,7 +56,6 @@ FUZZ_TARGET(autofile)
WriteToStream(fuzzed_data_provider, auto_file);
});
}
(void)auto_file.Get();
(void)auto_file.IsNull();
if (fuzzed_data_provider.ConsumeBool()) {
FILE* f = auto_file.release();

View file

@ -261,7 +261,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);
// The buffer size (second arg) must be greater than the rewind
// amount (third arg).
@ -391,7 +391,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_skip)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);
// The buffer is 25 bytes, allow rewinding 10 bytes.
BufferedFile bf{file, 25, 10};
@ -444,7 +444,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
for (uint8_t i = 0; i < fileSize; ++i) {
file << i;
}
std::rewind(file.Get());
file.seek(0, SEEK_SET);
size_t bufSize = m_rng.randrange(300) + 1;
size_t rewindSize = m_rng.randrange(bufSize);

View file

@ -203,10 +203,10 @@ std::vector<bool> DecodeAsmap(fs::path path)
LogPrintf("Failed to open asmap file from disk\n");
return bits;
}
fseek(filestr, 0, SEEK_END);
int length = ftell(filestr);
file.seek(0, SEEK_END);
int length = file.tell();
LogPrintf("Opened asmap file %s (%d bytes) from disk\n", fs::quoted(fs::PathToString(path)), length);
fseek(filestr, 0, SEEK_SET);
file.seek(0, SEEK_SET);
uint8_t cur_byte;
for (int i = 0; i < length; ++i) {
file >> cur_byte;