Merge branch 'master' into feature_openxr_stereo

This commit is contained in:
Lubos 2022-08-29 21:32:59 +02:00
commit 94968c3075
16 changed files with 253 additions and 211 deletions

View file

@ -31,7 +31,9 @@ void VulkanQueueRunner::CreateDeviceObjects() {
INFO_LOG(G3D, "VulkanQueueRunner::CreateDeviceObjects");
InitBackbufferRenderPass();
framebufferRenderPass_ = GetRenderPass(VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR);
RPKey key{ VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE };
framebufferRenderPass_ = GetRenderPass(key);
#if 0
// Just to check whether it makes sense to split some of these. drawidx is way bigger than the others...
@ -189,6 +191,23 @@ void VulkanQueueRunner::InitBackbufferRenderPass() {
_assert_(res == VK_SUCCESS);
}
static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) {
switch (action) {
case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR;
case VKRRenderPassLoadAction::KEEP: return VK_ATTACHMENT_LOAD_OP_LOAD;
case VKRRenderPassLoadAction::DONT_CARE: return VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
return VK_ATTACHMENT_LOAD_OP_DONT_CARE; // avoid compiler warning
}
static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
switch (action) {
case VKRRenderPassStoreAction::STORE: return VK_ATTACHMENT_STORE_OP_STORE;
case VKRRenderPassStoreAction::DONT_CARE: return VK_ATTACHMENT_STORE_OP_DONT_CARE;
}
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}
VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
auto pass = renderPasses_.Get(key);
if (pass) {
@ -198,19 +217,8 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
VkAttachmentDescription attachments[2] = {};
attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
switch (key.colorLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
default:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].loadOp = ConvertLoadAction(key.colorLoadAction);
attachments[0].storeOp = ConvertStoreAction(key.colorStoreAction);
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
@ -219,30 +227,10 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
switch (key.depthLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
switch (key.stencilLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction);
attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction);
attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction);
attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction);
attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].flags = 0;
@ -1385,7 +1373,11 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step
TransitionToOptimal(cmd, fb->color.image, fb->color.layout, fb->depth.image, fb->depth.layout, &recordBarrier_);
renderPass = GetRenderPass(step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad);
RPKey key{
step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad,
step.render.colorStore, step.render.depthStore, step.render.stencilStore,
};
renderPass = GetRenderPass(key);
// The transition from the optimal format happens after EndRenderPass, now that we don't
// do it as part of the renderpass itself anymore.

View file

@ -121,10 +121,16 @@ enum class VKRStepType : uint8_t {
READBACK_IMAGE,
};
// Must be the same order as Draw::RPAction
enum class VKRRenderPassLoadAction : uint8_t {
DONT_CARE,
KEEP, // default. avoid when possible.
CLEAR,
KEEP,
DONT_CARE,
};
enum class VKRRenderPassStoreAction : uint8_t {
STORE, // default. avoid when possible.
DONT_CARE,
};
struct TransitionRequest {
@ -156,6 +162,9 @@ struct VKRStep {
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
VKRRenderPassStoreAction colorStore;
VKRRenderPassStoreAction depthStore;
VKRRenderPassStoreAction stencilStore;
u8 clearStencil;
uint32_t clearColor;
float clearDepth;
@ -232,15 +241,11 @@ public:
VKRRenderPassLoadAction colorLoadAction;
VKRRenderPassLoadAction depthLoadAction;
VKRRenderPassLoadAction stencilLoadAction;
VKRRenderPassStoreAction colorStoreAction;
VKRRenderPassStoreAction depthStoreAction;
VKRRenderPassStoreAction stencilStoreAction;
};
// Only call this from the render thread! Also ok during initialization (LoadCache).
VkRenderPass GetRenderPass(
VKRRenderPassLoadAction colorLoadAction, VKRRenderPassLoadAction depthLoadAction, VKRRenderPassLoadAction stencilLoadAction) {
RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction };
return GetRenderPass(key);
}
VkRenderPass GetRenderPass(const RPKey &key);
bool GetRenderPassKey(VkRenderPass passToFind, RPKey *outKey) const {

View file

@ -724,6 +724,9 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
step->render.colorLoad = color;
step->render.depthLoad = depth;
step->render.stencilLoad = stencil;
step->render.colorStore = VKRRenderPassStoreAction::STORE;
step->render.depthStore = VKRRenderPassStoreAction::STORE;
step->render.stencilStore = VKRRenderPassStoreAction::STORE;
step->render.clearColor = clearColor;
step->render.clearDepth = clearDepth;
step->render.clearStencil = clearStencil;

View file

@ -361,6 +361,31 @@ public:
void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask);
// Cheaply set that we don't care about the contents of a surface at the start of the current render pass.
// This set the corresponding load-op of the current render pass to DONT_CARE.
// Useful when we don't know at bind-time whether we will overwrite the surface or not.
void SetLoadDontCare(VkImageAspectFlags aspects) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
curRenderStep_->render.colorLoad = VKRRenderPassLoadAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
curRenderStep_->render.depthLoad = VKRRenderPassLoadAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
curRenderStep_->render.stencilLoad = VKRRenderPassLoadAction::DONT_CARE;
}
// Cheaply set that we don't care about the contents of a surface at the end of the current render pass.
// This set the corresponding store-op of the current render pass to DONT_CARE.
void SetStoreDontCare(VkImageAspectFlags aspects) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
curRenderStep_->render.colorStore = VKRRenderPassStoreAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
curRenderStep_->render.depthStore = VKRRenderPassStoreAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE;
}
void Draw(VkPipelineLayout layout, VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
VkRenderData data{ VKRRenderCommand::DRAW };

View file

@ -499,6 +499,8 @@ public:
void InvalidateCachedState() override;
void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) override;
private:
VulkanTexture *GetNullTexture();
VulkanContext *vulkan_ = nullptr;
@ -1604,4 +1606,19 @@ void VKContext::HandleEvent(Event ev, int width, int height, void *param1, void
}
}
void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) {
VkImageAspectFlags flags = 0;
if (channels & FBChannel::FB_COLOR_BIT)
flags |= VK_IMAGE_ASPECT_COLOR_BIT;
if (channels & FBChannel::FB_DEPTH_BIT)
flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (channels & FBChannel::FB_STENCIL_BIT)
flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
if (stage == FB_INVALIDATION_LOAD) {
renderManager_.SetLoadDontCare(flags);
} else if (stage == FB_INVALIDATION_STORE) {
renderManager_.SetStoreDontCare(flags);
}
}
} // namespace Draw

View file

@ -261,6 +261,11 @@ enum FBChannel {
FB_FORMAT_BIT = 128, // Actually retrieves the native format instead. D3D11 only.
};
enum FBInvalidationStage {
FB_INVALIDATION_LOAD = 1,
FB_INVALIDATION_STORE = 2,
};
enum FBBlitFilter {
FB_BLIT_NEAREST = 0,
FB_BLIT_LINEAR = 1,
@ -568,9 +573,9 @@ struct TextureDesc {
};
enum class RPAction {
DONT_CARE,
CLEAR,
KEEP,
KEEP = 0,
CLEAR = 1,
DONT_CARE = 2,
};
struct RenderPassInfo {
@ -655,8 +660,11 @@ public:
virtual void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) = 0;
// Useful in OpenGL ES to give hints about framebuffers on tiler GPUs.
virtual void InvalidateFramebuffer(Framebuffer *fbo) {}
// Could be useful in OpenGL ES to give hints about framebuffers on tiler GPUs
// using glInvalidateFramebuffer, although drivers are known to botch that so we currently don't use it.
// In Vulkan, this sets the LOAD_OP or the STORE_OP (depending on stage) of the current render pass instance to DONT_CARE.
// channels is a bitwise combination of FBChannel::COLOR, DEPTH and STENCIL.
virtual void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) {}
// Dynamic state
virtual void SetScissorRect(int left, int top, int width, int height) = 0;

View file

@ -86,7 +86,6 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "ReportSmallMemstick", &flags_.ReportSmallMemstick);
CheckSetting(iniFile, gameID, "MemstickFixedFree", &flags_.MemstickFixedFree);
CheckSetting(iniFile, gameID, "DateLimited", &flags_.DateLimited);
CheckSetting(iniFile, gameID, "ReinterpretFramebuffers", &flags_.ReinterpretFramebuffers);
CheckSetting(iniFile, gameID, "ShaderColorBitmask", &flags_.ShaderColorBitmask);
CheckSetting(iniFile, gameID, "DisableFirstFrameReadback", &flags_.DisableFirstFrameReadback);
CheckSetting(iniFile, gameID, "DisableRangeCulling", &flags_.DisableRangeCulling);

View file

@ -76,7 +76,6 @@ struct CompatFlags {
bool ReportSmallMemstick;
bool MemstickFixedFree;
bool DateLimited;
bool ReinterpretFramebuffers;
bool ShaderColorBitmask;
bool DisableFirstFrameReadback;
bool DisableRangeCulling;

View file

@ -486,8 +486,10 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
void DrawEngineCommon::ApplyFramebufferRead(bool *fboTexNeedsBind) {
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
*fboTexNeedsBind = false;
} else {
gpuStats.numCopiesForShaderBlend++;
*fboTexNeedsBind = true;
}
*fboTexNeedsBind = true;
gstate_c.Dirty(DIRTY_SHADERBLEND);
}

View file

@ -125,6 +125,43 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
return match;
}
VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride, GEBufferFormat format) const {
for (auto vfb : vfbs_) {
if (vfb->fb_address == addr && vfb->fb_stride == stride && vfb->fb_format == format) {
// There'll only be one exact match, we don't allow duplicates with these conditions.
return vfb;
}
}
return nullptr;
}
VirtualFramebuffer *FramebufferManagerCommon::ResolveVFB(u32 addr, int stride, GEBufferFormat format) {
// Find the newest one matching addr and stride.
VirtualFramebuffer *newest = nullptr;
for (auto vfb : vfbs_) {
if (vfb->fb_address == addr && vfb->FbStrideInBytes() == stride * BufferFormatBytesPerPixel(format)) {
if (newest) {
if (vfb->colorBindSeq > newest->colorBindSeq) {
newest = vfb;
}
} else {
newest = vfb;
}
}
}
if (newest && newest->fb_format != format) {
WARN_LOG_ONCE(resolvevfb, G3D, "ResolveVFB: Resolving from %s to %s at %08x/%d", GeBufferFormatToString(newest->fb_format), GeBufferFormatToString(format), addr, stride);
return ResolveFramebufferColorToFormat(newest, format);
}
return newest;
}
VirtualFramebuffer *FramebufferManagerCommon::GetDisplayVFB() {
return GetExactVFB(displayFramebufPtr_, displayStride_, displayFormat_);
}
u32 FramebufferManagerCommon::ColorBufferByteSize(const VirtualFramebuffer *vfb) const {
return vfb->fb_stride * vfb->height * (vfb->fb_format == GE_FORMAT_8888 ? 4 : 2);
}
@ -316,7 +353,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
}
// Find a matching framebuffer
// Find a matching framebuffer.
VirtualFramebuffer *vfb = nullptr;
for (auto v : vfbs_) {
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
@ -616,11 +653,14 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
}
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
// Another render target at the exact same location but gotta be a different format, otherwise
// it would be the same.
_dbg_assert_(src->fb_format != dst->fb_format);
// This will result in reinterpret later, if both formats are 16-bit.
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
// Another render target at the exact same location but gotta be a different format or a different stride, otherwise
// it would be the same, and should have been detected in DoSetRenderFrameBuffer.
if (src->fb_format != dst->fb_format) {
// This will result in reinterpret later, if both formats are 16-bit.
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
} else {
// Happens in Prince of Persia - Revelations. Ignoring.
}
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
@ -706,7 +746,7 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
gpuStats.numColorCopies++;
pipeline = Get2DPipeline(DRAW2D_COPY_COLOR);
pass_name = "copy_color";
} else if (PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) {
} else {
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
WARN_LOG_ONCE(bta, G3D, "WARNING: Reinterpret encountered with BlueToAlpha on");
}
@ -737,22 +777,6 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
}
gpuStats.numReinterpretCopies++;
} else if (IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
// Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.
if (src->fb_format == GE_FORMAT_565) {
// We have to bind here instead of clear, since it can be that no framebuffer is bound.
// The backend can sometimes directly optimize it to a clear.
// Games that are marked as doing reinterpret just ignore this - better to keep the data than to clear.
// Fixes #13717.
if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers && !PSP_CoreParameter().compat.flags().BlueToAlpha) {
draw_->BindFramebufferAsRenderTarget(dst->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret");
// Need to dirty anything that has command buffer dynamic state, in case we started a new pass above.
// Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager.
gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE);
tookActions = true;
}
}
}
if (pipeline) {
@ -910,30 +934,27 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
NotifyRenderFramebufferUpdated(vfb);
}
void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt) {
void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int stride, GEBufferFormat fmt) {
// Note: UpdateFromMemory() is still called later.
// This is a special case where we have extra information prior to the invalidation.
// TODO: Could possibly be an offset...
VirtualFramebuffer *vfb = GetVFBAt(addr);
// Also, stride needs better handling.
VirtualFramebuffer *vfb = ResolveVFB(addr, stride, fmt);
if (vfb) {
if (vfb->fb_format != fmt) {
DEBUG_LOG(ME, "Changing fb_format for %08x from %d to %d", addr, vfb->fb_format, fmt);
vfb->fb_format = fmt;
// Let's count this as a "render". This will also force us to use the correct format.
vfb->last_frame_render = gpuStats.numFlips;
vfb->colorBindSeq = GetBindSeqCount();
// Let's count this as a "render". This will also force us to use the correct format.
vfb->last_frame_render = gpuStats.numFlips;
}
if (vfb->fb_stride < width) {
DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
if (vfb->fb_stride < stride) {
DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, stride);
const int bpp = BufferFormatBytesPerPixel(fmt);
ResizeFramebufFBO(vfb, width, size / (bpp * width));
ResizeFramebufFBO(vfb, stride, size / (bpp * stride));
// Resizing may change the viewport/etc.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
vfb->fb_stride = width;
vfb->fb_stride = stride;
// This might be a bit wider than necessary, but we'll redetect on next render.
vfb->width = width;
vfb->width = stride;
}
}
}
@ -943,6 +964,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
addr &= 0x3FFFFFFF;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
if (isDisplayBuf || safe) {
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
@ -1028,6 +1050,8 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
// Currently rendering to this framebuffer. Need to make a copy.
if (!skipCopy && framebuffer == currentRenderVfb_) {
// Self-texturing, need a copy currently (some backends can potentially support it though).
WARN_LOG_REPORT_ONCE(selfTextureCopy, G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
if (renderCopy) {
@ -1036,7 +1060,9 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags);
RebindFramebuffer("After BindFramebufferAsColorTexture");
draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0);
gpuStats.numCopiesForSelfTex++;
} else {
// Failed to get temp FBO? Weird.
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
}
return true;
@ -1044,7 +1070,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
return true;
} else {
ERROR_LOG_REPORT_ONCE(vulkanSelfTexture, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
ERROR_LOG_REPORT_ONCE(selfTextureFail, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
// To do this safely in Vulkan, we need to use input attachments.
// Actually if the texture region and render regions don't overlap, this is safe, but we need
// to transition to GENERAL image layout which will take some trickery.
@ -1153,7 +1179,7 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
return tex;
}
void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride) {
void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int srcStride, GEBufferFormat srcPixelFormat) {
textureCache_->ForgetLastTexture();
shaderManager_->DirtyLastShader();
@ -1227,7 +1253,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
prevDisplayFramebufPtr_ = fbaddr;
VirtualFramebuffer *vfb = GetVFBAt(fbaddr);
VirtualFramebuffer *vfb = ResolveVFB(fbaddr, displayStride_, displayFormat_);
if (!vfb) {
// Let's search for a framebuf within this range. Note that we also look for
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
@ -1260,20 +1286,11 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
}
}
if (vfb && vfb->fb_format != displayFormat_) {
if (vfb->last_frame_render + FBO_OLD_AGE < gpuStats.numFlips) {
// The game probably switched formats on us.
vfb->fb_format = displayFormat_;
} else {
vfb = 0;
}
}
if (!vfb) {
if (Memory::IsValidAddress(fbaddr)) {
// The game is displaying something directly from RAM. In GTA, it's decoded video.
if (!vfb) {
DrawFramebufferToOutput(Memory::GetPointer(fbaddr), displayFormat_, displayStride_);
DrawFramebufferToOutput(Memory::GetPointer(fbaddr), displayStride_, displayFormat_);
return;
}
} else {
@ -1307,8 +1324,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
else
DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);
// TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame.
float u0 = offsetX / (float)vfb->bufferWidth;
float v0 = offsetY / (float)vfb->bufferHeight;
float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
@ -1363,7 +1378,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
if (age > FBO_OLD_AGE) {
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%ix%i %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
@ -1385,7 +1400,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
VirtualFramebuffer *vfb = bvfbs_[i];
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
if (age > FBO_OLD_AGE) {
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%dx%d %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
DestroyFramebuf(vfb);
bvfbs_.erase(bvfbs_.begin() + i--);
}
@ -1446,7 +1461,11 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
}
bool creating = old.bufferWidth == 0;
WARN_LOG(FRAMEBUF, "%s %s FBO at %08x/%d from %dx%d to %dx%d (force=%d)", creating ? "Creating" : "Resizing", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force);
if (creating) {
WARN_LOG(FRAMEBUF, "Creating %s FBO at %08x/%d %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, vfb->bufferWidth, vfb->bufferHeight, (int)force);
} else {
WARN_LOG(FRAMEBUF, "Resizing %s FBO at %08x/%d from %dx%d to %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force);
}
// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
// It's not worth the trouble trying to support lower bit-depth rendering, just
@ -2067,7 +2086,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
bool isDisplayBuffer = DisplayFramebufAddr() == dstBasePtr;
if (isPrevDisplayBuffer || isDisplayBuffer) {
FlushBeforeCopy();
DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, dstStride);
DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), dstStride, displayFormat_);
return;
}
}
@ -2481,12 +2500,14 @@ void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb,
void FramebufferManagerCommon::FlushBeforeCopy() {
// Flush anything not yet drawn before blitting, downloading, or uploading.
// This might be a stalled list, or unflushed before a block transfer, etc.
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
// all the irrelevant state checking it'll use to decide what to do. Should
// do something more focused here.
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
drawEngine_->DispatchFlush();
// Only bother if any draws are pending.
if (drawEngine_->GetNumDrawCalls() > 0) {
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
// all the irrelevant state checking it'll use to decide what to do. Should
// do something more focused here.
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
drawEngine_->DispatchFlush();
}
}
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
@ -2770,6 +2791,11 @@ void FramebufferManagerCommon::BlitUsingRaster(
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);
if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) {
// We overwrite the whole channel of the framebuffer, so we can invalidate the current contents.
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT);
}
Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
@ -2789,6 +2815,11 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi
continue;
}
// Sanity check for things that shouldn't exist.
if (dest->fb_address == src->fb_address && dest->fb_format == src->fb_format && dest->fb_stride == src->fb_stride) {
_dbg_assert_msg_(false, "illegal clone of src found");
}
if (dest->fb_address == src->fb_address && dest->FbStrideInBytes() == src->FbStrideInBytes() && dest->fb_format == newFormat) {
vfb = dest;
break;
@ -2811,6 +2842,7 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi
vfb->safeWidth *= widthFactor;
vfb->fb_format = newFormat;
// stride stays the same since it's in pixels.
WARN_LOG(G3D, "Creating %s clone of %08x/%08x/%s (%dx%d -> %dx%d)", GeBufferFormatToString(newFormat), src->fb_address, src->z_address, GeBufferFormatToString(src->fb_format), src->width, src->height, vfb->width, vfb->height);

View file

@ -64,6 +64,8 @@ class ShaderWriter;
// Sometimes, virtual framebuffers need to share a Z buffer. We emulate this by copying from on to the next
// when such a situation is detected. In order to reliably detect this, we separately track depth buffers,
// and they know which color buffer they were used with last.
// Two VirtualFramebuffer can occupy the same address range as long as they have different fb_format.
// In that case, the one with the highest colorBindSeq number is the valid one.
struct VirtualFramebuffer {
u32 fb_address;
u32 z_address; // If 0, it's a "RAM" framebuffer.
@ -73,6 +75,7 @@ struct VirtualFramebuffer {
// The original PSP format of the framebuffer.
// In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary
// when we need to interpret the bits directly (depal or buffer aliasing).
// NOTE: CANNOT be changed after creation anymore!
GEBufferFormat fb_format;
Draw::Framebuffer *fbo;
@ -320,7 +323,7 @@ public:
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes);
void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride);
void DrawFramebufferToOutput(const u8 *srcPixels, int srcStride, GEBufferFormat srcPixelFormat);
void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
@ -358,12 +361,20 @@ public:
return currentRenderVfb_;
}
// This only checks for the color channel.
// This only checks for the color channel, and if there are multiple overlapping ones
// with different color depth, this might get things wrong.
// DEPRECATED FOR NEW USES - avoid whenever possible.
VirtualFramebuffer *GetVFBAt(u32 addr) const;
VirtualFramebuffer *GetDisplayVFB() const {
return GetVFBAt(displayFramebufPtr_);
}
// This will only return exact matches of addr+stride+format.
VirtualFramebuffer *GetExactVFB(u32 addr, int stride, GEBufferFormat format) const;
// If this doesn't find the exact VFB, but one with a different color format with matching stride,
// it'll resolve the newest one at address to the format requested, and return that.
VirtualFramebuffer *ResolveVFB(u32 addr, int stride, GEBufferFormat format);
// Utility to get the display VFB.
VirtualFramebuffer *GetDisplayVFB();
int GetRenderWidth() const { return currentRenderVfb_ ? currentRenderVfb_->renderWidth : 480; }
int GetRenderHeight() const { return currentRenderVfb_ ? currentRenderVfb_->renderHeight : 272; }

View file

@ -1956,12 +1956,27 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
}
// If min is not < max, then we don't have values (wasn't set during decode.)
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = depalWidth;
float v2 = framebuffer->renderHeight;
if (bounds.minV < bounds.maxV) {
u1 = (bounds.minU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor;
v1 = (bounds.minV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor;
u2 = (bounds.maxU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor;
v2 = (bounds.maxV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor;
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth, framebuffer->renderHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");
draw_->SetScissorRect(0, 0, (int)depalWidth, (int)framebuffer->renderHeight);
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT);
draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1);
Draw::Viewport vp{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
@ -1972,27 +1987,10 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);
// If min is not < max, then we don't have values (wasn't set during decode.)
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = depalWidth;
float v2 = framebuffer->renderHeight;
if (bounds.minV < bounds.maxV) {
u1 = bounds.minU + gstate_c.curTextureXOffset;
v1 = bounds.minV + gstate_c.curTextureYOffset;
u2 = bounds.maxU + gstate_c.curTextureXOffset;
v2 = bounds.maxV + gstate_c.curTextureYOffset;
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
u1 *= framebuffer->renderScaleFactor;
v1 *= framebuffer->renderScaleFactor;
u2 *= framebuffer->renderScaleFactor;
v2 *= framebuffer->renderScaleFactor;
draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, false, framebuffer->renderScaleFactor);
gpuStats.numDepal++;
gstate_c.curTextureWidth = texWidth;
draw_->BindTexture(0, nullptr);

View file

@ -60,8 +60,8 @@ inline unsigned int toFloat24(float f) {
struct GPUStatistics {
void Reset() {
// Never add a vtable :)
memset(this, 0, sizeof(*this));
ResetFrame();
numFlips = 0;
}
void ResetFrame() {
@ -82,10 +82,13 @@ struct GPUStatistics {
numFramebufferEvaluations = 0;
numReadbacks = 0;
numUploads = 0;
numDepal = 0;
numClears = 0;
numDepthCopies = 0;
numReinterpretCopies = 0;
numColorCopies = 0;
numCopiesForShaderBlend = 0;
numCopiesForSelfTex = 0;
msProcessingDisplayLists = 0;
vertexGPUCycles = 0;
otherGPUCycles = 0;
@ -110,10 +113,13 @@ struct GPUStatistics {
int numFramebufferEvaluations;
int numReadbacks;
int numUploads;
int numDepal;
int numClears;
int numDepthCopies;
int numReinterpretCopies;
int numColorCopies;
int numCopiesForShaderBlend;
int numCopiesForSelfTex;
double msProcessingDisplayLists;
int vertexGPUCycles;
int otherGPUCycles;

View file

@ -2936,17 +2936,18 @@ void GPUCommon::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebuffer(addr)) {
// Vempire invalidates (with writeback) after drawing, but before blitting.
// TODO: Investigate whether we can get this to work some other way.
if (type == GPU_INVALIDATE_SAFE) {
framebufferManager_->UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
}
}
}
void GPUCommon::NotifyVideoUpload(u32 addr, int size, int width, int format) {
void GPUCommon::NotifyVideoUpload(u32 addr, int size, int frameWidth, int format) {
if (Memory::IsVRAMAddress(addr)) {
framebufferManager_->NotifyVideoUpload(addr, size, width, (GEBufferFormat)format);
framebufferManager_->NotifyVideoUpload(addr, size, frameWidth, (GEBufferFormat)format);
}
textureCache_->NotifyVideoUpload(addr, size, width, (GEBufferFormat)format);
textureCache_->NotifyVideoUpload(addr, size, frameWidth, (GEBufferFormat)format);
InvalidateCache(addr, size, GPU_INVALIDATE_SAFE);
}
@ -3060,8 +3061,8 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
"Vertices: %d cached: %d uncached: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
"Readbacks: %d, uploads: %d\n"
"Copies: depth %d, color %d, reinterpret: %d\n"
"readbacks %d, uploads %d, depal %d\n"
"Copies: depth %d, color %d, reint %d, blend %d, selftex %d\n"
"GPU cycles executed: %d (%f per vertex)\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
@ -3081,9 +3082,12 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numTextureDataBytesHashed / 1024,
gpuStats.numReadbacks,
gpuStats.numUploads,
gpuStats.numDepal,
gpuStats.numDepthCopies,
gpuStats.numColorCopies,
gpuStats.numReinterpretCopies,
gpuStats.numCopiesForShaderBlend,
gpuStats.numCopiesForSelfTex,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles
);

View file

@ -358,7 +358,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM
// instantaneous.
#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 20
#define CACHE_VERSION 21
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;

View file

@ -1136,65 +1136,6 @@ ULKS46087 = true
NPUZ00043 = true
NPEZ00198 = true
# This setting will go away in the near future, hopefully we can enable it
# for all or most games.
[ReinterpretFramebuffers]
# Ultimate Ghosts & Goblins
ULJM05147 = true
ULUS10105 = true
ULES00419 = true
NPJH50235 = true
ULAS42073 = true
# Goku Makai-Mura Kai (variant of Ultimate Ghosts & Goblins)
ULJM05265 = true
ULJM05366 = true
# Kingdom Hearts (see #11223)
ULUS10505 = true
ULES01441 = true
ULJM05600 = true
ULJM05775 = true
# Spongebob - The Yellow Avenger (see #15898)
ULUS10092 = true
ULES00280 = true
# MX vs ATV Reflex
ULES01375 = true
ULUS10429 = true
# MX vs ATV Untamed
ULES00993 = true
ULES00994 = true
ULUS10330 = true
# Cars race-o-rama
ULES01333 = true
ULUS10428 = true
# God of War: Chains of Olympus
# The old hack for the shadows isn't working anymore since the framebuffers don't match.
# This is nicer anyway.
UCUS98653 = true
UCES00842 = true
UCKS45084 = true
UCUS98705 = true
ULJM05348 = true
ULJM05438 = true
NPUG80325 = true
NPEG00023 = true
NPHG00028 = true
# God of War: Ghost of Sparta
UCUS98737 = true
UCAS40323 = true
UCKS45161 = true
NPHG00092 = true
NPEG00044 = true
UCJS10114 = true
UCES01401 = true
NPJG00120 = true
[ShaderColorBitmask]
# No users right now, but keeping it around as a more accurate option than BlueToAlpha, for debugging mainly Outrun.