Query Cache: Fix guest side sample counting
This commit is contained in:
parent
282ae8fa51
commit
2fea1b8407
5 changed files with 97 additions and 46 deletions
|
@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessCounterReset() {
|
||||
#if ANDROID
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
switch (regs.clear_report_value) {
|
||||
case Regs::ClearReport::ZPassPixelCount:
|
||||
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
||||
|
|
|
@ -9,16 +9,15 @@
|
|||
namespace VideoCommon {
|
||||
|
||||
enum class QueryFlagBits : u32 {
|
||||
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
|
||||
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
||||
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
||||
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
||||
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
||||
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
||||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
||||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
||||
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
||||
IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
|
||||
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
|
||||
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
||||
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
||||
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
||||
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
||||
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
||||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
||||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
||||
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
||||
|
||||
|
|
|
@ -256,30 +256,32 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
|||
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
|
||||
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
|
||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
||||
std::function<void()> operation(
|
||||
[this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] {
|
||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||
if (!is_synced) [[likely]] {
|
||||
impl->pending_unregister.push_back(query_location);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
|
||||
UNREACHABLE();
|
||||
return;
|
||||
}
|
||||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
||||
u64 timestamp = impl->gpu.GetTicks();
|
||||
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp));
|
||||
std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
|
||||
} else {
|
||||
u32 value = static_cast<u32>(query_base->value);
|
||||
std::memcpy(pointer, &value, sizeof(value));
|
||||
}
|
||||
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||
pointer, pointer_timestamp] {
|
||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||
if (!is_synced) [[likely]] {
|
||||
impl->pending_unregister.push_back(query_location);
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
|
||||
UNREACHABLE();
|
||||
return;
|
||||
}
|
||||
query_base->value += streamer->GetAmmendValue();
|
||||
streamer->SetAccumulationValue(query_base->value);
|
||||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
||||
u64 timestamp = impl->gpu.GetTicks();
|
||||
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp));
|
||||
std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
|
||||
} else {
|
||||
u32 value = static_cast<u32>(query_base->value);
|
||||
std::memcpy(pointer, &value, sizeof(value));
|
||||
}
|
||||
if (!is_synced) [[likely]] {
|
||||
impl->pending_unregister.push_back(query_location);
|
||||
}
|
||||
});
|
||||
if (is_fence) {
|
||||
impl->rasterizer.SignalFence(std::move(operation));
|
||||
} else {
|
||||
|
@ -354,9 +356,9 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
|
|||
if (resume) {
|
||||
impl->runtime.ResumeHostConditionalRendering();
|
||||
} else {
|
||||
impl->runtime.PauseHostConditionalRendering();
|
||||
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
|
||||
CounterClose(VideoCommon::QueryType::StreamingByteCount);
|
||||
impl->runtime.PauseHostConditionalRendering();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,14 @@ public:
|
|||
return dependence_mask;
|
||||
}
|
||||
|
||||
u64 GetAmmendValue() const {
|
||||
return ammend_value;
|
||||
}
|
||||
|
||||
void SetAccumulationValue(u64 new_value) {
|
||||
acumulation_value = new_value;
|
||||
}
|
||||
|
||||
protected:
|
||||
void MakeDependent(StreamerInterface* depend_on) {
|
||||
dependence_mask |= 1ULL << depend_on->id;
|
||||
|
@ -87,6 +95,8 @@ protected:
|
|||
const size_t id;
|
||||
u64 dependence_mask;
|
||||
u64 dependent_mask;
|
||||
u64 ammend_value{};
|
||||
u64 acumulation_value{};
|
||||
};
|
||||
|
||||
template <typename QueryType>
|
||||
|
|
|
@ -110,13 +110,16 @@ struct HostSyncValues {
|
|||
|
||||
class SamplesStreamer : public BaseStreamer {
|
||||
public:
|
||||
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
|
||||
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
|
||||
VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
|
||||
: BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
||||
memory_allocator{memory_allocator_} {
|
||||
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
|
||||
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
|
||||
BuildResolveBuffer();
|
||||
current_bank = nullptr;
|
||||
current_query = nullptr;
|
||||
ammend_value = 0;
|
||||
acumulation_value = 0;
|
||||
}
|
||||
|
||||
~SamplesStreamer() = default;
|
||||
|
@ -151,6 +154,11 @@ public:
|
|||
PauseCounter();
|
||||
}
|
||||
AbandonCurrentQuery();
|
||||
std::function<void()> func([this, counts = pending_flush_queries.size()] {
|
||||
ammend_value = 0;
|
||||
acumulation_value = 0;
|
||||
});
|
||||
rasterizer->SyncOperation(std::move(func));
|
||||
}
|
||||
|
||||
void CloseCounter() override {
|
||||
|
@ -244,7 +252,7 @@ public:
|
|||
}
|
||||
if (query->size_slots > 1) {
|
||||
// This is problematic.
|
||||
UNIMPLEMENTED();
|
||||
// UNIMPLEMENTED();
|
||||
}
|
||||
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
|
||||
auto loc_data = offsets[query->start_bank_id];
|
||||
|
@ -255,16 +263,20 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
ReplicateCurrentQueryIfNeeded();
|
||||
std::function<void()> func([this] { ammend_value = acumulation_value; });
|
||||
rasterizer->SyncOperation(std::move(func));
|
||||
AbandonCurrentQuery();
|
||||
pending_sync.clear();
|
||||
}
|
||||
|
||||
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
||||
[[maybe_unused]] std::optional<u32> subreport) override {
|
||||
PauseCounter();
|
||||
auto index = BuildQuery();
|
||||
auto* new_query = GetQuery(index);
|
||||
new_query->guest_address = address;
|
||||
new_query->value = 100;
|
||||
new_query->value = 0;
|
||||
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
|
||||
if (has_timestamp) {
|
||||
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
||||
|
@ -291,6 +303,7 @@ public:
|
|||
|
||||
void PushUnsyncedQueries() override {
|
||||
PauseCounter();
|
||||
current_bank->Close();
|
||||
{
|
||||
std::scoped_lock lk(flush_guard);
|
||||
pending_flush_sets.emplace_back(std::move(pending_flush_queries));
|
||||
|
@ -429,6 +442,34 @@ private:
|
|||
current_query_id = 0;
|
||||
}
|
||||
|
||||
void ReplicateCurrentQueryIfNeeded() {
|
||||
if (pending_sync.empty()) {
|
||||
return;
|
||||
}
|
||||
if (!current_query) {
|
||||
return;
|
||||
}
|
||||
auto index = BuildQuery();
|
||||
auto* new_query = GetQuery(index);
|
||||
new_query->guest_address = 0;
|
||||
new_query->value = 0;
|
||||
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
|
||||
new_query->start_bank_id = current_query->start_bank_id;
|
||||
new_query->size_banks = current_query->size_banks;
|
||||
new_query->start_slot = current_query->start_slot;
|
||||
new_query->size_slots = current_query->size_slots;
|
||||
ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
|
||||
bank->AddReference(amount);
|
||||
});
|
||||
pending_flush_queries.push_back(index);
|
||||
std::function<void()> func([this, index] {
|
||||
auto* query = GetQuery(index);
|
||||
query->value += GetAmmendValue();
|
||||
SetAccumulationValue(query->value);
|
||||
Free(index);
|
||||
});
|
||||
}
|
||||
|
||||
void BuildResolveBuffer() {
|
||||
const VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
|
@ -448,6 +489,7 @@ private:
|
|||
static constexpr size_t resolve_slots = 8;
|
||||
|
||||
QueryCacheRuntime& runtime;
|
||||
VideoCore::RasterizerInterface* rasterizer;
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
const MemoryAllocator& memory_allocator;
|
||||
|
@ -470,6 +512,7 @@ private:
|
|||
size_t current_query_id;
|
||||
VideoCommon::HostQueryBase* current_query;
|
||||
bool has_started{};
|
||||
bool current_unset{};
|
||||
std::mutex flush_guard;
|
||||
};
|
||||
|
||||
|
@ -677,7 +720,6 @@ public:
|
|||
size_t offset_base = staging_ref.offset;
|
||||
for (auto q : pending_flush_queries) {
|
||||
auto* query = GetQuery(q);
|
||||
query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
|
||||
auto& bank = bank_pool.GetBank(query->start_bank_id);
|
||||
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
|
||||
offset_base += TFBQueryBank::QUERY_SIZE;
|
||||
|
@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl {
|
|||
buffer_cache{buffer_cache_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
|
||||
guest_streamer(0, runtime),
|
||||
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device,
|
||||
scheduler, memory_allocator),
|
||||
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
|
||||
device, scheduler, memory_allocator),
|
||||
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
||||
scheduler, memory_allocator, staging_pool),
|
||||
primitives_succeeded_streamer(
|
||||
|
@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||
return true;
|
||||
}
|
||||
}
|
||||
if (!is_in_bc[0] && !is_in_bc[1]) {
|
||||
// Both queries are in query cache, it's best to just flush.
|
||||
return false;
|
||||
}
|
||||
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue