Merge pull request #6902 from unknownbrackets/gpu-convergence

d3d: Handle block transfers
This commit is contained in:
Henrik Rydgård 2014-09-14 11:06:15 +02:00
commit 2b4ff04c32
14 changed files with 1003 additions and 695 deletions

View file

@ -20,6 +20,7 @@
#include "Core/Config.h" #include "Core/Config.h"
#include "Core/CoreParameter.h" #include "Core/CoreParameter.h"
#include "Core/Reporting.h" #include "Core/Reporting.h"
#include "Core/ELF/ParamSFO.h"
#include "Core/System.h" #include "Core/System.h"
#include "GPU/Common/FramebufferCommon.h" #include "GPU/Common/FramebufferCommon.h"
#include "GPU/GPUInterface.h" #include "GPU/GPUInterface.h"
@ -41,6 +42,21 @@ FramebufferManagerCommon::FramebufferManagerCommon() :
FramebufferManagerCommon::~FramebufferManagerCommon() { FramebufferManagerCommon::~FramebufferManagerCommon() {
} }
void FramebufferManagerCommon::Init() {
const std::string gameId = g_paramSFO.GetValueString("DISC_ID");
// This applies a hack to Dangan Ronpa, its demo, and its sequel.
// The game draws solid colors to a small framebuffer, and then reads this directly in VRAM.
// We force this framebuffer to 1x and force download it automatically.
hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515";
// And an initial clear. We don't clear per frame as the games are supposed to handle that
// by themselves.
ClearBuffer();
BeginFrame();
}
void FramebufferManagerCommon::BeginFrame() { void FramebufferManagerCommon::BeginFrame() {
DecimateFBOs(); DecimateFBOs();
currentRenderVfb_ = 0; currentRenderVfb_ = 0;
@ -387,3 +403,331 @@ void FramebufferManagerCommon::DoSetRenderFrameBuffer() {
gstate_c.curRTRenderWidth = vfb->renderWidth; gstate_c.curRTRenderWidth = vfb->renderWidth;
gstate_c.curRTRenderHeight = vfb->renderHeight; gstate_c.curRTRenderHeight = vfb->renderHeight;
} }
void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
addr &= ~0x40000000;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
if (isDisplayBuf || safe) {
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
FlushBeforeCopy();
if (useBufferedRendering_ && vfb->fbo) {
DisableState();
GEBufferFormat fmt = vfb->format;
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
fmt = displayFormat_;
}
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height);
SetColorUpdated(vfb);
} else {
INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
}
}
RebindFramebuffer();
}
}
bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) {
if (updateVRAM_ || size == 0) {
return false;
}
dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
u32 dstY = (u32)-1;
u32 dstH = 0;
u32 srcY = (u32)-1;
u32 srcH = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const int vfb_byteWidth = vfb->width * vfb_bpp;
if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
const u32 offset = dst - vfb_address;
const u32 yOffset = offset / vfb_byteStride;
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) {
dstBuffer = vfb;
dstY = yOffset;
dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
}
}
if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
const u32 offset = src - vfb_address;
const u32 yOffset = offset / vfb_byteStride;
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) {
srcBuffer = vfb;
srcY = yOffset;
srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
}
}
}
if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) {
// MotoGP workaround - it copies a framebuffer to memory and then displays it.
// TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing.
// Unfortunately, that would force 1x render resolution.
if (Memory::IsRAMAddress(dst)) {
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
}
}
if (!useBufferedRendering_) {
// If we're copying into a recently used display buf, it's probably destined for the screen.
if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
return false;
}
}
if (dstBuffer && srcBuffer && !isMemset) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst);
} else {
WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst);
// Just do the blit!
if (g_Config.bBlockTransferGPU) {
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
}
}
return false;
} else if (dstBuffer) {
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(src);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
// This is a memcpy, let's still copy just in case.
return false;
}
return false;
} else if (srcBuffer) {
WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
FlushBeforeCopy();
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
} else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH);
}
return false;
} else {
return false;
}
}
void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const {
u32 dstYOffset = -1;
u32 dstXOffset = -1;
u32 srcYOffset = -1;
u32 srcXOffset = -1;
int width = srcWidth;
int height = srcHeight;
dstBasePtr &= 0x3FFFFFFF;
srcBasePtr &= 0x3FFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
// These heuristics are a bit annoying.
// The goal is to avoid using GPU block transfers for things that ought to be memory.
// Maybe we should even check for textures at these places instead?
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = dstBasePtr - vfb_address;
const u32 byteStride = dstStride * bpp;
const u32 yOffset = byteOffset / byteStride;
// Some games use mismatching bitdepths. But make sure the stride matches.
// If it doesn't, generally this means we detected the framebuffer with too large a height.
bool match = yOffset < dstYOffset;
if (match && vfb_byteStride != byteStride) {
// Grand Knights History copies with a mismatching stride but a full line at a time.
// Makes it hard to detect the wrong transfers in e.g. God of War.
if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
} else {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
}
} else if (match) {
dstWidth = width;
dstHeight = height;
}
if (match) {
dstYOffset = yOffset;
dstXOffset = (byteOffset / bpp) % dstStride;
dstBuffer = vfb;
}
}
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = srcBasePtr - vfb_address;
const u32 byteStride = srcStride * bpp;
const u32 yOffset = byteOffset / byteStride;
bool match = yOffset < srcYOffset;
if (match && vfb_byteStride != byteStride) {
if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
} else {
srcWidth = byteStride * height / vfb_bpp;
srcHeight = 1;
}
} else if (match) {
srcWidth = width;
srcHeight = height;
}
if (match) {
srcYOffset = yOffset;
srcXOffset = (byteOffset / bpp) % srcStride;
srcBuffer = vfb;
}
}
}
if (dstYOffset != (u32)-1) {
dstY += dstYOffset;
dstX += dstXOffset;
}
if (srcYOffset != (u32)-1) {
srcY += srcYOffset;
srcX += srcXOffset;
}
}
bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
if (!useBufferedRendering_ || updateVRAM_) {
return false;
}
// Skip checking if there's no framebuffers in that area.
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
return false;
}
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
int srcWidth = width;
int srcHeight = height;
int dstWidth = width;
int dstHeight = height;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
if (dstBuffer && srcBuffer) {
if (srcBuffer == dstBuffer) {
if (srcX != dstX || srcY != dstY) {
WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp);
RebindFramebuffer();
SetColorUpdated(dstBuffer);
return true;
}
} else {
// Ignore, nothing to do. Tales of Phantasia X does this by accident.
if (g_Config.bBlockTransferGPU) {
return true;
}
}
} else {
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
// Just do the blit!
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp);
RebindFramebuffer();
SetColorUpdated(dstBuffer);
return true; // No need to actually do the memory copy behind, probably.
}
}
return false;
} else if (dstBuffer) {
// Here we should just draw the pixels into the buffer. Copy first.
return false;
} else if (srcBuffer) {
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
FlushBeforeCopy();
if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
const float srcXFactor = (float)bpp / srcBpp;
if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) {
WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
} else {
ReadFramebufferToMemory(srcBuffer, true, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcWidth * srcXFactor), srcHeight);
}
}
return false; // Let the bit copy happen
} else {
return false;
}
}
void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
u32 backBuffer = PrevDisplayFramebufAddr();
u32 displayBuffer = DisplayFramebufAddr();
// TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy?
if (((backBuffer != 0 && dstBasePtr == backBuffer) ||
(displayBuffer != 0 && dstBasePtr == displayBuffer)) &&
dstStride == 512 && height == 272 && !useBufferedRendering_) {
FlushBeforeCopy();
DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false);
}
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
int srcWidth = width;
int srcHeight = height;
int dstWidth = width;
int dstHeight = height;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) {
return;
}
if (dstBuffer && !srcBuffer) {
WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
float dstXFactor = (float)bpp / dstBpp;
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
}
}
}
}

View file

@ -17,6 +17,7 @@
#pragma once #pragma once
#include <set>
#include <vector> #include <vector>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Core/MemMap.h" #include "Core/MemMap.h"
@ -93,10 +94,9 @@ public:
FramebufferManagerCommon(); FramebufferManagerCommon();
virtual ~FramebufferManagerCommon(); virtual ~FramebufferManagerCommon();
virtual void Init();
void BeginFrame(); void BeginFrame();
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
virtual bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false) = 0;
virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0;
void DoSetRenderFrameBuffer(); void DoSetRenderFrameBuffer();
void SetRenderFrameBuffer() { void SetRenderFrameBuffer() {
@ -110,11 +110,24 @@ public:
} }
DoSetRenderFrameBuffer(); DoSetRenderFrameBuffer();
} }
virtual void RebindFramebuffer() = 0;
bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false);
void UpdateFromMemory(u32 addr, int size, bool safe);
virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0;
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
// read framebuffers is on, in which case this should always return false).
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) = 0;
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) = 0;
virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) = 0;
size_t NumVFBs() const { return vfbs_.size(); } size_t NumVFBs() const { return vfbs_.size(); }
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
u32 PrevDisplayFramebufAddr() { u32 PrevDisplayFramebufAddr() {
return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0; return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0;
} }
@ -122,17 +135,6 @@ public:
return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0;
} }
void SetDepthUpdated() {
if (currentRenderVfb_) {
currentRenderVfb_->depthUpdated = true;
}
}
void SetColorUpdated() {
if (currentRenderVfb_) {
SetColorUpdated(currentRenderVfb_);
}
}
bool MayIntersectFramebuffer(u32 start) { bool MayIntersectFramebuffer(u32 start) {
// Clear the cache/kernel bits. // Clear the cache/kernel bits.
start = start & 0x3FFFFFFF; start = start & 0x3FFFFFFF;
@ -158,12 +160,31 @@ public:
int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; } int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; }
GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; } GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; }
void SetDepthUpdated() {
if (currentRenderVfb_) {
currentRenderVfb_->depthUpdated = true;
}
}
void SetColorUpdated() {
if (currentRenderVfb_) {
SetColorUpdated(currentRenderVfb_);
}
}
protected: protected:
virtual void DisableState() = 0;
virtual void ClearBuffer() = 0;
virtual void ClearDepthBuffer() = 0;
virtual void FlushBeforeCopy() = 0;
virtual void DecimateFBOs() = 0;
// Used by ReadFramebufferToMemory and later framebuffer block copies
virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) = 0;
void EstimateDrawingSize(int &drawing_width, int &drawing_height); void EstimateDrawingSize(int &drawing_width, int &drawing_height);
u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const; u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const;
static bool MaskedEqual(u32 addr1, u32 addr2); static bool MaskedEqual(u32 addr1, u32 addr2);
virtual void DecimateFBOs() = 0;
virtual void DestroyFramebuf(VirtualFramebuffer *vfb) = 0; virtual void DestroyFramebuf(VirtualFramebuffer *vfb) = 0;
virtual void ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force = false) = 0; virtual void ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force = false) = 0;
virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) = 0; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) = 0;
@ -171,6 +192,7 @@ protected:
virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) = 0; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) = 0;
bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const;
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const;
void SetColorUpdated(VirtualFramebuffer *dstBuffer) { void SetColorUpdated(VirtualFramebuffer *dstBuffer) {
dstBuffer->memoryUpdated = false; dstBuffer->memoryUpdated = false;
@ -182,10 +204,6 @@ protected:
dstBuffer->reallyDirtyAfterDisplay = true; dstBuffer->reallyDirtyAfterDisplay = true;
} }
virtual void DisableState() = 0;
virtual void ClearBuffer() = 0;
virtual void ClearDepthBuffer() = 0;
u32 displayFramebufPtr_; u32 displayFramebufPtr_;
u32 displayStride_; u32 displayStride_;
GEBufferFormat displayFormat_; GEBufferFormat displayFormat_;
@ -204,6 +222,7 @@ protected:
bool updateVRAM_; bool updateVRAM_;
std::vector<VirtualFramebuffer *> vfbs_; std::vector<VirtualFramebuffer *> vfbs_;
std::set<std::pair<u32, u32>> knownFramebufferRAMCopies_;
bool hackForce04154000Download_; bool hackForce04154000Download_;

View file

@ -395,13 +395,21 @@ void SoftwareTransform(
return; return;
} }
if (gstate_c.flipTexture && maxIndex >= 2) { // This means we're using a framebuffer (and one that isn't big enough.)
if (gstate_c.curTextureHeight < (u32)gstate.getTextureHeight(0) && maxIndex >= 2) {
// Even if not rectangles, this will detect if either of the first two are outside the framebuffer. // Even if not rectangles, this will detect if either of the first two are outside the framebuffer.
// HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines, // HACK: Adding one pixel margin to this detection fixes issues in Assassin's Creed : Bloodlines,
// while still keeping BOF working (see below). // while still keeping BOF working (see below).
const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel. const float invTexH = 1.0f / gstate_c.curTextureHeight; // size of one texel.
const bool tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor; bool tlOutside;
const bool brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor; bool brOutside;
if (gstate_c.flipTexture) {
tlOutside = transformed[0].v < -invTexH && transformed[0].v > 1.0f - heightFactor;
brOutside = transformed[1].v < -invTexH && transformed[1].v > 1.0f - heightFactor;
} else {
tlOutside = transformed[0].v > invTexH && transformed[0].v > heightFactor - 1.0f;
brOutside = transformed[1].v > invTexH && transformed[1].v > heightFactor - 1.0f;
}
if (tlOutside || brOutside) { if (tlOutside || brOutside) {
// Okay, so we're texturing from outside the framebuffer, but inside the texture height. // Okay, so we're texturing from outside the framebuffer, but inside the texture height.
// Breath of Fire 3 does this to access a render surface at an offset. // Breath of Fire 3 does this to access a render surface at an offset.
@ -420,9 +428,13 @@ void SoftwareTransform(
for (int index = 0; index < maxIndex; ++index) { for (int index = 0; index < maxIndex; ++index) {
transformed[index].u *= widthFactor / oldWidthFactor; transformed[index].u *= widthFactor / oldWidthFactor;
// Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO. // Inverse it back to scale to the new FBO, and add 1.0f to account for old FBO.
transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor; if (gstate_c.flipTexture) {
transformed[index].v -= yDiff; transformed[index].v = (1.0f - transformed[index].v) / oldHeightFactor;
transformed[index].v = 1.0f - (transformed[index].v * heightFactor); transformed[index].v -= yDiff;
transformed[index].v = 1.0f - (transformed[index].v * heightFactor);
} else {
transformed[index].v = (transformed[index].v / oldHeightFactor - yDiff) * heightFactor;
}
} }
} }
} }

View file

@ -28,9 +28,11 @@
#include "helper/fbo.h" #include "helper/fbo.h"
#include "GPU/Common/FramebufferCommon.h" #include "GPU/Common/FramebufferCommon.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/Directx9/FramebufferDX9.h" #include "GPU/Directx9/FramebufferDX9.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/ShaderManagerDX9.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/TransformPipelineDX9.h"
#include <algorithm> #include <algorithm>
@ -47,7 +49,15 @@ namespace DX9 {
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
} }
static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); inline u16 BGRA8888toRGB565(u32 px) {
return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800);
}
inline u16 BGRA8888toRGBA4444(u32 px) {
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
}
static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format);
void CenterRect(float *x, float *y, float *w, float *h, void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH) { float origW, float origH, float frameW, float frameH) {
@ -112,31 +122,20 @@ namespace DX9 {
FramebufferManagerDX9::FramebufferManagerDX9() : FramebufferManagerDX9::FramebufferManagerDX9() :
drawPixelsTex_(0), drawPixelsTex_(0),
drawPixelsTexFormat_(GE_FORMAT_INVALID), convBuf(0),
convBuf(0) gameUsesSequentialCopies_(false) {
{
// And an initial clear. We don't clear per frame as the games are supposed to handle that
// by themselves.
ClearBuffer();
// TODO: Check / use D3DCAPS2_DYNAMICTEXTURES?
int usage = 0;
D3DPOOL pool = D3DPOOL_MANAGED;
if (pD3DdeviceEx) {
pool = D3DPOOL_DEFAULT;
usage = D3DUSAGE_DYNAMIC;
}
HRESULT hr = pD3Ddevice->CreateTexture(512, 272, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL);
if (FAILED(hr)) {
drawPixelsTex_ = nullptr;
ERROR_LOG(G3D, "Failed to create drawpixels texture");
}
BeginFrame();
} }
FramebufferManagerDX9::~FramebufferManagerDX9() { FramebufferManagerDX9::~FramebufferManagerDX9() {
if(drawPixelsTex_) { if (drawPixelsTex_) {
drawPixelsTex_->Release(); drawPixelsTex_->Release();
} }
for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) {
fbo_destroy(it->second.fbo);
}
for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) {
it->second.surface->Release();
}
delete [] convBuf; delete [] convBuf;
} }
@ -150,15 +149,40 @@ namespace DX9 {
*dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000;
} }
static inline u32 ABGR2RGBA(u32 src) { // TODO: Swizzle the texture access instead.
return (src >> 8) | (src << 24); static inline u32 RGBA2BGRA(u32 src) {
const u32 r = (src & 0x000000FF) << 16;
const u32 ga = src & 0xFF00FF00;
const u32 b = (src & 0x00FF0000) >> 16;
return r | ga | b;
} }
void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) { void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
u8 *convBuf = NULL; u8 *convBuf = NULL;
D3DLOCKED_RECT rect; D3DLOCKED_RECT rect;
// TODO: Check / use D3DCAPS2_DYNAMICTEXTURES?
if (drawPixelsTex_ && (drawPixelsTexW_ != width || drawPixelsTexH_ != height)) {
drawPixelsTex_->Release();
drawPixelsTex_ = nullptr;
}
if (!drawPixelsTex_) {
int usage = 0;
D3DPOOL pool = D3DPOOL_MANAGED;
if (pD3DdeviceEx) {
pool = D3DPOOL_DEFAULT;
usage = D3DUSAGE_DYNAMIC;
}
HRESULT hr = pD3Ddevice->CreateTexture(width, height, 1, usage, D3DFMT(D3DFMT_A8R8G8B8), pool, &drawPixelsTex_, NULL);
if (FAILED(hr)) {
drawPixelsTex_ = nullptr;
ERROR_LOG(G3D, "Failed to create drawpixels texture");
}
drawPixelsTexW_ = width;
drawPixelsTexH_ = height;
}
if (!drawPixelsTex_) { if (!drawPixelsTex_) {
return; return;
} }
@ -167,18 +191,18 @@ namespace DX9 {
convBuf = (u8*)rect.pBits; convBuf = (u8*)rect.pBits;
// Final format is ARGB(directx) // Final format is BGRA(directx)
// TODO: We can just change the texture format and flip some bits around instead of this. // TODO: We can just change the texture format and flip some bits around instead of this.
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) { if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) {
for (int y = 0; y < 272; y++) { for (int y = 0; y < height; y++) {
switch (srcPixelFormat) { switch (srcPixelFormat) {
// not tested // not tested
case GE_FORMAT_565: case GE_FORMAT_565:
{ {
const u16 *src = (const u16 *)srcPixels + srcStride * y; const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y); u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) { for (int x = 0; x < width; x++) {
u16_le col0 = src[x+0]; u16_le col0 = src[x+0];
ARGB8From565(col0, &dst[x + 0]); ARGB8From565(col0, &dst[x + 0]);
} }
@ -187,20 +211,19 @@ namespace DX9 {
// faster // faster
case GE_FORMAT_5551: case GE_FORMAT_5551:
{ {
const u16 *src = (const u16 *)srcPixels + srcStride * y; const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y); u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) { for (int x = 0; x < width; x++) {
u16_le col0 = src[x+0]; u16_le col0 = src[x+0];
ARGB8From5551(col0, &dst[x + 0]); ARGB8From5551(col0, &dst[x + 0]);
} }
} }
break; break;
// not tested
case GE_FORMAT_4444: case GE_FORMAT_4444:
{ {
const u16 *src = (const u16 *)srcPixels + srcStride * y; const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y); u8 *dst = (u8 *)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) for (int x = 0; x < width; x++)
{ {
u16_le col = src[x]; u16_le col = src[x];
dst[x * 4 + 0] = (col >> 12) << 4; dst[x * 4 + 0] = (col >> 12) << 4;
@ -213,23 +236,23 @@ namespace DX9 {
case GE_FORMAT_8888: case GE_FORMAT_8888:
{ {
const u32 *src = (const u32 *)srcPixels + srcStride * y; const u32_le *src = (const u32_le *)srcPixels + srcStride * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y); u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) for (int x = 0; x < width; x++)
{ {
dst[x] = ABGR2RGBA(src[x]); dst[x] = RGBA2BGRA(src[x]);
} }
} }
break; break;
} }
} }
} else { } else {
for (int y = 0; y < 272; y++) { for (int y = 0; y < height; y++) {
const u32 *src = (const u32 *)srcPixels + srcStride * y; const u32_le *src = (const u32_le *)srcPixels + srcStride * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y); u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
for (int x = 0; x < 512; x++) for (int x = 0; x < width; x++)
{ {
dst[x] = ABGR2RGBA(src[x]); dst[x] = RGBA2BGRA(src[x]);
} }
} }
} }
@ -245,7 +268,8 @@ namespace DX9 {
dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight); dxstate.viewport.set(0, 0, vfb->renderWidth, vfb->renderHeight);
MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
DisableState(); DisableState();
DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); DrawActiveTexture(drawPixelsTex_, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f);
textureCache_->ForgetLastTexture();
} }
void FramebufferManagerDX9::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { void FramebufferManagerDX9::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) {
@ -281,10 +305,6 @@ namespace DX9 {
} }
// TODO: StretchRect instead? // TODO: StretchRect instead?
if (tex) {
pD3Ddevice->SetTexture(0, tex);
}
float coord[20] = { float coord[20] = {
x,y,0, u0,v0, x,y,0, u0,v0,
x+w,y,0, u1,v0, x+w,y,0, u1,v0,
@ -304,10 +324,14 @@ namespace DX9 {
pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl);
pD3Ddevice->SetPixelShader(pFramebufferPixelShader); pD3Ddevice->SetPixelShader(pFramebufferPixelShader);
pD3Ddevice->SetVertexShader(pFramebufferVertexShader); pD3Ddevice->SetVertexShader(pFramebufferVertexShader);
shaderManager_->DirtyLastShader();
if (tex != NULL) { if (tex != NULL) {
pD3Ddevice->SetTexture(0, tex); pD3Ddevice->SetTexture(0, tex);
} }
pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float));
if (FAILED(hr)) {
ERROR_LOG_REPORT(G3D, "DrawActiveTexture() failed: %08x", hr);
}
} }
void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebuffer *v) { void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebuffer *v) {
@ -330,6 +354,14 @@ namespace DX9 {
delete v; delete v;
} }
void FramebufferManagerDX9::RebindFramebuffer() {
if (currentRenderVfb_ && currentRenderVfb_->fbo) {
fbo_bind_as_render_target(currentRenderVfb_->fbo);
} else {
fbo_unbind();
}
}
void FramebufferManagerDX9::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force) { void FramebufferManagerDX9::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force) {
float renderWidthFactor = (float)vfb->renderWidth / (float)vfb->bufferWidth; float renderWidthFactor = (float)vfb->renderWidth / (float)vfb->bufferWidth;
float renderHeightFactor = (float)vfb->renderHeight / (float)vfb->bufferHeight; float renderHeightFactor = (float)vfb->renderHeight / (float)vfb->bufferHeight;
@ -391,9 +423,10 @@ namespace DX9 {
if (old.fbo) { if (old.fbo) {
INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format);
if (vfb->fbo) { if (vfb->fbo) {
fbo_bind_as_render_target(vfb->fbo);
ClearBuffer(); ClearBuffer();
if (!g_Config.bDisableSlowFramebufEffects) { if (!g_Config.bDisableSlowFramebufEffects) {
BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0);
} }
} }
fbo_destroy(old.fbo); fbo_destroy(old.fbo);
@ -422,12 +455,15 @@ namespace DX9 {
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
} }
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
}
} }
void FramebufferManagerDX9::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) { void FramebufferManagerDX9::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) {
if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) { if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) {
// TODO ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height);
//ReadFramebufferToMemory(vfb, true, 0, 0, vfb->width, vfb->height);
} }
textureCache_->ForgetLastTexture(); textureCache_->ForgetLastTexture();
@ -471,6 +507,10 @@ namespace DX9 {
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
} }
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
}
} }
void FramebufferManagerDX9::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { void FramebufferManagerDX9::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
@ -486,6 +526,52 @@ namespace DX9 {
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
} }
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
}
}
FBO *FramebufferManagerDX9::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) {
u64 key = ((u64)depth << 32) | (w << 16) | h;
auto it = tempFBOs_.find(key);
if (it != tempFBOs_.end()) {
it->second.last_frame_used = gpuStats.numFlips;
return it->second.fbo;
}
textureCache_->ForgetLastTexture();
FBO *fbo = fbo_create(w, h, 1, false, depth);
if (!fbo)
return fbo;
fbo_bind_as_render_target(fbo);
ClearBuffer();
const TempFBO info = {fbo, gpuStats.numFlips};
tempFBOs_[key] = info;
return fbo;
}
LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface) {
D3DSURFACE_DESC desc;
similarSurface->GetDesc(&desc);
u64 key = ((u64)desc.Format << 32) | (desc.Width << 16) | desc.Height;
auto it = offscreenSurfaces_.find(key);
if (it != offscreenSurfaces_.end()) {
it->second.last_frame_used = gpuStats.numFlips;
return it->second.surface;
}
textureCache_->ForgetLastTexture();
LPDIRECT3DSURFACE9 offscreen = nullptr;
HRESULT hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL);
if (FAILED(hr) || !offscreen) {
ERROR_LOG_REPORT(G3D, "Unable to create offscreen surface %dx%d @%d", desc.Width, desc.Height, desc.Format);
return nullptr;
}
const OffscreenSurface info = {offscreen, gpuStats.numFlips};
offscreenSurfaces_[key] = info;
return offscreen;
} }
void FramebufferManagerDX9::CopyDisplayToOutput() { void FramebufferManagerDX9::CopyDisplayToOutput() {
@ -542,7 +628,7 @@ namespace DX9 {
// The game is displaying something directly from RAM. In GTA, it's decoded video. // The game is displaying something directly from RAM. In GTA, it's decoded video.
// First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP // First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP
for (auto iter = knownFramebufferCopies_.begin(); iter != knownFramebufferCopies_.end(); ++iter) { for (auto iter = knownFramebufferRAMCopies_.begin(); iter != knownFramebufferRAMCopies_.end(); ++iter) {
if (iter->second == displayFramebufPtr_) { if (iter->second == displayFramebufPtr_) {
vfb = GetVFBAt(iter->first); vfb = GetVFBAt(iter->first);
} }
@ -595,18 +681,26 @@ namespace DX9 {
const float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight; const float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
if (1) { if (1) {
dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); const u32 rw = PSP_CoreParameter().pixelWidth;
// These are in the output display coordinates const u32 rh = PSP_CoreParameter().pixelHeight;
if (g_Config.iBufFilter == SCALE_LINEAR) { const RECT srcRect = {(LONG)(u0 * vfb->renderWidth), (LONG)(v0 * vfb->renderHeight), (LONG)(u1 * vfb->renderWidth), (LONG)(v1 * vfb->renderHeight)};
dxstate.texMagFilter.set(D3DTEXF_LINEAR); const RECT dstRect = {x * rw / w, y * rh / h, (x + w) * rw / w, (y + h) * rh / h};
dxstate.texMinFilter.set(D3DTEXF_LINEAR); HRESULT hr = fbo_blit_color(vfb->fbo, &srcRect, nullptr, &dstRect, g_Config.iBufFilter == SCALE_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT);
} else { if (FAILED(hr)) {
dxstate.texMagFilter.set(D3DTEXF_POINT); ERROR_LOG_REPORT(G3D, "fbo_blit_color failed on display: %08x", hr);
dxstate.texMinFilter.set(D3DTEXF_POINT); dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
// These are in the output display coordinates
if (g_Config.iBufFilter == SCALE_LINEAR) {
dxstate.texMagFilter.set(D3DTEXF_LINEAR);
dxstate.texMinFilter.set(D3DTEXF_LINEAR);
} else {
dxstate.texMagFilter.set(D3DTEXF_POINT);
dxstate.texMinFilter.set(D3DTEXF_POINT);
}
dxstate.texMipFilter.set(D3DTEXF_NONE);
dxstate.texMipLodBias.set(0);
DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1);
} }
dxstate.texMipFilter.set(D3DTEXF_NONE);
dxstate.texMipLodBias.set(0);
DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, u0, v0, u1, v1);
} }
/* /*
else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) { else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) {
@ -640,14 +734,14 @@ namespace DX9 {
} }
} }
void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) { void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) {
#if 0 #if 0
if (sync) { if (sync) {
PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update
} }
#endif #endif
if(vfb) { if (vfb) {
// We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb.
// For now we'll keep these on the same struct as the ones that can get displayed // For now we'll keep these on the same struct as the ones that can get displayed
// (and blatantly copy work already done above while at it). // (and blatantly copy work already done above while at it).
@ -682,26 +776,15 @@ namespace DX9 {
nvfb->bufferWidth = vfb->bufferWidth; nvfb->bufferWidth = vfb->bufferWidth;
nvfb->bufferHeight = vfb->bufferHeight; nvfb->bufferHeight = vfb->bufferHeight;
nvfb->format = vfb->format; nvfb->format = vfb->format;
nvfb->drawnWidth = vfb->drawnWidth;
nvfb->drawnHeight = vfb->drawnHeight;
nvfb->drawnFormat = vfb->format;
nvfb->usageFlags = FB_USAGE_RENDERTARGET; nvfb->usageFlags = FB_USAGE_RENDERTARGET;
nvfb->dirtyAfterDisplay = true; nvfb->dirtyAfterDisplay = true;
// When updating VRAM, it need to be exact format. nvfb->colorDepth = FBO_8888;
switch (vfb->format) {
case GE_FORMAT_4444:
nvfb->colorDepth = FBO_4444;
break;
case GE_FORMAT_5551:
nvfb->colorDepth = FBO_5551;
break;
case GE_FORMAT_565:
nvfb->colorDepth = FBO_565;
break;
case GE_FORMAT_8888:
default:
nvfb->colorDepth = FBO_8888;
break;
}
textureCache_->ForgetLastTexture();
nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth); nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, (FBOColorDepth)nvfb->colorDepth);
if (!(nvfb->fbo)) { if (!(nvfb->fbo)) {
ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight);
@ -710,7 +793,7 @@ namespace DX9 {
nvfb->last_frame_render = gpuStats.numFlips; nvfb->last_frame_render = gpuStats.numFlips;
bvfbs_.push_back(nvfb); bvfbs_.push_back(nvfb);
fbo_bind_as_render_target(nvfb->fbo); fbo_bind_as_render_target(nvfb->fbo);
ClearBuffer(); ClearBuffer();
} else { } else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET; nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
@ -733,142 +816,208 @@ namespace DX9 {
#endif #endif
} }
vfb->memoryUpdated = true; if (gameUsesSequentialCopies_) {
BlitFramebuffer_(nvfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, false); // Ignore the x/y/etc., read the entire thing.
x = 0;
#if 0 y = 0;
#ifdef USING_GLES2 w = vfb->width;
PackFramebufferSync_(nvfb); // synchronous glReadPixels h = vfb->height;
#else }
if (gl_extensions.PBO_ARB || !gl_extensions.ATIClampBug) { if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
if (!sync) { vfb->memoryUpdated = true;
PackFramebufferAsync_(nvfb); // asynchronous glReadPixels using PBOs } else {
} else { const static int FREQUENT_SEQUENTIAL_COPIES = 3;
PackFramebufferSync_(nvfb); // synchronous glReadPixels static int frameLastCopy = 0;
static u32 bufferLastCopy = 0;
static int copiesThisFrame = 0;
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
frameLastCopy = gpuStats.numFlips;
bufferLastCopy = vfb->fb_address;
copiesThisFrame = 0;
}
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
gameUsesSequentialCopies_ = true;
} }
} }
#endif BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, false);
#endif
PackFramebufferDirectx9_(nvfb, x, y, w, h);
RebindFramebuffer();
} }
} }
void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { void FramebufferManagerDX9::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered. // This can happen if they recently switched from non-buffered.
fbo_unbind(); fbo_unbind();
return; return;
} }
fbo_bind_as_render_target(dst->fbo); float srcXFactor = flip ? 1.0f : (float)src->renderWidth / (float)src->bufferWidth;
dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight); float srcYFactor = flip ? 1.0f : (float)src->renderHeight / (float)src->bufferHeight;
DisableState();
fbo_bind_color_as_texture(src->fbo, 0);
float srcXFactor = 1.0f;
float srcYFactor = 1.0f;
const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2; const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2;
if (srcBpp != bpp && bpp != 0) { if (srcBpp != bpp && bpp != 0) {
srcXFactor = (srcXFactor * bpp) / srcBpp; srcXFactor = (srcXFactor * bpp) / srcBpp;
} }
int srcX1 = srcX * srcXFactor; int srcX1 = srcX * srcXFactor;
int srcX2 = (srcX + w) * srcXFactor; int srcX2 = (srcX + w) * srcXFactor;
int srcY2 = src->renderHeight - (h + srcY) * srcYFactor; int srcY1 = srcY * srcYFactor;
int srcY1 = srcY2 + h * srcYFactor; int srcY2 = (srcY + h) * srcYFactor;
float dstXFactor = 1.0f; float dstXFactor = flip ? 1.0f : (float)dst->renderWidth / (float)dst->bufferWidth;
float dstYFactor = 1.0f; float dstYFactor = flip ? 1.0f : (float)dst->renderHeight / (float)dst->bufferHeight;
const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2; const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2;
if (dstBpp != bpp && bpp != 0) { if (dstBpp != bpp && bpp != 0) {
dstXFactor = (dstXFactor * bpp) / dstBpp; dstXFactor = (dstXFactor * bpp) / dstBpp;
} }
int dstX1 = dstX * dstXFactor; int dstX1 = dstX * dstXFactor;
int dstX2 = (dstX + w) * dstXFactor; int dstX2 = (dstX + w) * dstXFactor;
int dstY2 = dst->renderHeight - (h + dstY) * dstYFactor; int dstY1 = dstY * dstYFactor;
int dstY1 = dstY2 + h * dstYFactor; int dstY2 = (dstY + h) * dstYFactor;
float srcW = src->bufferWidth; if (flip) {
float srcH = src->bufferHeight; fbo_bind_as_render_target(dst->fbo);
DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, !flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH); dxstate.viewport.set(0, 0, dst->renderWidth, dst->renderHeight);
pD3Ddevice->SetTexture(0, NULL); DisableState();
textureCache_->ForgetLastTexture();
dxstate.viewport.restore();
fbo_unbind(); fbo_bind_color_as_texture(src->fbo, 0);
float srcW = src->bufferWidth;
float srcH = src->bufferHeight;
DrawActiveTexture(0, dstX1, dstY, w * dstXFactor, h, dst->bufferWidth, dst->bufferHeight, flip, srcX1 / srcW, srcY / srcH, srcX2 / srcW, (srcY + h) / srcH);
pD3Ddevice->SetTexture(0, NULL);
textureCache_->ForgetLastTexture();
dxstate.viewport.restore();
RebindFramebuffer();
} else {
LPDIRECT3DSURFACE9 srcSurf = fbo_get_color_for_read(src->fbo);
LPDIRECT3DSURFACE9 dstSurf = fbo_get_color_for_write(dst->fbo);
RECT srcRect = {srcX1, srcY1, srcX2, srcY2};
RECT dstRect = {dstX1, dstY1, dstX2, dstY2};
D3DSURFACE_DESC desc;
srcSurf->GetDesc(&desc);
srcRect.right = std::min(srcRect.right, (LONG)desc.Width);
srcRect.bottom = std::min(srcRect.bottom, (LONG)desc.Height);
dstSurf->GetDesc(&desc);
dstRect.right = std::min(dstRect.right, (LONG)desc.Width);
dstRect.bottom = std::min(dstRect.bottom, (LONG)desc.Height);
// Direct3D 9 doesn't support rect -> self.
FBO *srcFBO = src->fbo;
if (src == dst) {
FBO *tempFBO = GetTempFBO(src->renderWidth, src->renderHeight, (FBOColorDepth)src->colorDepth);
HRESULT hr = fbo_blit_color(src->fbo, &srcRect, tempFBO, &srcRect, D3DTEXF_POINT);
if (SUCCEEDED(hr)) {
srcFBO = tempFBO;
}
}
HRESULT hr = fbo_blit_color(srcFBO, &srcRect, dst->fbo, &dstRect, D3DTEXF_POINT);
if (FAILED(hr)) {
ERROR_LOG_REPORT(G3D, "fbo_blit_color failed in blit: %08x (%08x -> %08x)", hr, src->fb_address, dst->fb_address);
}
}
} }
// TODO: SSE/NEON // TODO: SSE/NEON
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) {
if(format == GE_FORMAT_8888) { // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
if(src == dst) { const u32 *src32 = (const u32 *)src;
if (format == GE_FORMAT_8888) {
u32 *dst32 = (u32 *)dst;
if (src == dst) {
return; return;
} else { // Here lets assume they don't intersect } else {
memcpy(dst, src, stride * height * 4); for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA8888(dst32, src32, width);
src32 += srcStride;
dst32 += dstStride;
}
} }
} else { // But here it shouldn't matter if they do } else {
int size = height * stride; // But here it shouldn't matter if they do intersect
const u32 *src32 = (const u32 *)src;
u16 *dst16 = (u16 *)dst; u16 *dst16 = (u16 *)dst;
switch (format) { switch (format) {
case GE_FORMAT_565: // BGR 565 case GE_FORMAT_565: // BGR 565
for(int i = 0; i < size; i++) { for (u32 y = 0; y < height; ++y) {
dst16[i] = RGBA8888toRGB565(src32[i]); for (u32 x = 0; x < width; ++x) {
dst16[x] = BGRA8888toRGB565(src32[x]);
}
src32 += srcStride;
dst16 += dstStride;
} }
break; break;
case GE_FORMAT_5551: // ABGR 1555 case GE_FORMAT_5551: // ABGR 1555
for(int i = 0; i < size; i++) { for (u32 y = 0; y < height; ++y) {
dst16[i] = RGBA8888toRGBA5551(src32[i]); ConvertBGRA8888ToRGBA5551(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
} }
break; break;
case GE_FORMAT_4444: // ABGR 4444 case GE_FORMAT_4444: // ABGR 4444
for(int i = 0; i < size; i++) { for (u32 y = 0; y < height; ++y) {
dst16[i] = RGBA8888toRGBA4444(src32[i]); for (u32 x = 0; x < width; ++x) {
dst16[x] = BGRA8888toRGBA4444(src32[x]);
}
src32 += srcStride;
dst16 += dstStride;
} }
break; break;
case GE_FORMAT_8888: case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
// Not possible. // Not possible.
break; break;
default:
break;
} }
} }
} }
void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) { void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (vfb->fbo) { if (!vfb->fbo) {
fbo_bind_for_read(vfb->fbo); ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferDirectx9_: vfb->fbo == 0");
} else {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0");
fbo_unbind(); fbo_unbind();
return; return;
} }
// Pixel size always 4 here because we always request RGBA8888 const u32 fb_address = (0x04000000) | vfb->fb_address;
size_t bufSize = vfb->fb_stride * vfb->height * 4; const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
u32 fb_address = (0x04000000) | vfb->fb_address;
u8 *packed = 0; // We always need to convert from the framebuffer native format.
if(vfb->format == GE_FORMAT_8888) { // Right now that's always 8888.
packed = (u8 *)Memory::GetPointer(fb_address); DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address);
} else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
packed = (u8 *)malloc(bufSize * sizeof(u8));
}
if(packed) { LPDIRECT3DSURFACE9 renderTarget = fbo_get_color_for_read(vfb->fbo);
DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", D3DSURFACE_DESC desc;
(u32)bufSize, packed, fb_address); renderTarget->GetDesc(&desc);
// Resolve(packed, vfb); LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget);
if (offscreen) {
if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); if (SUCCEEDED(hr)) {
free(packed); D3DLOCKED_RECT locked;
u32 widthFactor = vfb->renderWidth / vfb->bufferWidth;
u32 heightFactor = vfb->renderHeight / vfb->bufferHeight;
RECT rect = {x * widthFactor, y * heightFactor, (x + w) * widthFactor, (y + h) * heightFactor};
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
if (SUCCEEDED(hr)) {
// TODO: Handle the other formats? We don't currently create them, I think.
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format);
offscreen->UnlockRect();
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight);
}
} else {
ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address);
} }
} }
fbo_unbind();
} }
void FramebufferManagerDX9::EndFrame() { void FramebufferManagerDX9::EndFrame() {
if (resized_) { if (resized_) {
DestroyAllFBOs(); DestroyAllFBOs();
@ -905,19 +1054,6 @@ namespace DX9 {
return list; return list;
} }
// MotoGP workaround
bool FramebufferManagerDX9::NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset) {
for (size_t i = 0; i < vfbs_.size(); i++) {
// This size fits for MotoGP. Might want to make this more flexible for other games if they do the same.
if ((vfbs_[i]->fb_address | 0x04000000) == src && size == 512 * 272 * 2) {
// A framebuffer matched!
knownFramebufferCopies_.insert(std::pair<u32, u32>(src, dest));
}
}
// TODO
return false;
}
bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) {
// TODO // TODO
return false; return false;
@ -932,8 +1068,9 @@ namespace DX9 {
VirtualFramebuffer *vfb = vfbs_[i]; VirtualFramebuffer *vfb = vfbs_[i];
int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used); int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
if (updateVram && age == 0 && !vfb->memoryUpdated && vfb == displayFramebuf_) if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) {
ReadFramebufferToMemory(vfb); ReadFramebufferToMemory(vfb, false, 0, 0, vfb->width, vfb->height);
}
if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) {
continue; continue;
@ -946,6 +1083,26 @@ namespace DX9 {
} }
} }
for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
int age = frameLastFramebufUsed_ - it->second.last_frame_used;
if (age > FBO_OLD_AGE) {
fbo_destroy(it->second.fbo);
tempFBOs_.erase(it++);
} else {
++it;
}
}
for (auto it = offscreenSurfaces_.begin(); it != offscreenSurfaces_.end(); ) {
int age = frameLastFramebufUsed_ - it->second.last_frame_used;
if (age > FBO_OLD_AGE) {
it->second.surface->Release();
offscreenSurfaces_.erase(it++);
} else {
++it;
}
}
// Do the same for ReadFramebuffersToMemory's VFBs // Do the same for ReadFramebuffersToMemory's VFBs
for (size_t i = 0; i < bvfbs_.size(); ++i) { for (size_t i = 0; i < bvfbs_.size(); ++i) {
VirtualFramebuffer *vfb = bvfbs_[i]; VirtualFramebuffer *vfb = bvfbs_[i];
@ -971,45 +1128,30 @@ namespace DX9 {
DestroyFramebuf(vfb); DestroyFramebuf(vfb);
} }
vfbs_.clear(); vfbs_.clear();
for (size_t i = 0; i < bvfbs_.size(); ++i) {
VirtualFramebuffer *vfb = bvfbs_[i];
DestroyFramebuf(vfb);
}
bvfbs_.clear();
for (auto it = tempFBOs_.begin(), end = tempFBOs_.end(); it != end; ++it) {
fbo_destroy(it->second.fbo);
}
tempFBOs_.clear();
for (auto it = offscreenSurfaces_.begin(), end = offscreenSurfaces_.end(); it != end; ++it) {
it->second.surface->Release();
}
offscreenSurfaces_.clear();
DisableState();
} }
void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size, bool safe) { void FramebufferManagerDX9::FlushBeforeCopy() {
addr &= ~0x40000000; // Flush anything not yet drawn before blitting, downloading, or uploading.
// TODO: Could go through all FBOs, but probably not important? // This might be a stalled list, or unflushed before a block transfer, etc.
// TODO: Could also check for inner changes, but video is most important. SetRenderFrameBuffer();
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr(); transformDraw_->Flush();
if (isDisplayBuf || safe) {
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
// TODO
//FlushBeforeCopy();
if (useBufferedRendering_ && vfb->fbo) {
DisableState();
GEBufferFormat fmt = vfb->format;
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
fmt = displayFormat_;
}
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height);
SetColorUpdated(vfb);
} else {
INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
}
}
// TODO: RebindFramebuffer();
fbo_unbind();
currentRenderVfb_ = 0;
}
} }
void FramebufferManagerDX9::Resized() { void FramebufferManagerDX9::Resized() {
@ -1041,9 +1183,8 @@ namespace DX9 {
D3DSURFACE_DESC desc; D3DSURFACE_DESC desc;
renderTarget->GetDesc(&desc); renderTarget->GetDesc(&desc);
LPDIRECT3DSURFACE9 offscreen = nullptr; LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget);
hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL); if (!offscreen) {
if (!offscreen || !SUCCEEDED(hr)) {
renderTarget->Release(); renderTarget->Release();
return false; return false;
} }
@ -1063,7 +1204,6 @@ namespace DX9 {
} }
} }
offscreen->Release();
renderTarget->Release(); renderTarget->Release();
return success; return success;

View file

@ -19,6 +19,7 @@
#include <list> #include <list>
#include <set> #include <set>
#include <map>
#include "d3d9.h" #include "d3d9.h"
@ -34,15 +35,14 @@
namespace DX9 { namespace DX9 {
struct GLSLProgram;
class TextureCacheDX9; class TextureCacheDX9;
class TransformDrawEngineDX9;
class ShaderManagerDX9;
void CenterRect(float *x, float *y, float *w, float *h, void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH); float origW, float origH, float frameW, float frameH);
class ShaderManagerDX9;
class FramebufferManagerDX9 : public FramebufferManagerCommon { class FramebufferManagerDX9 : public FramebufferManagerCommon {
public: public:
FramebufferManagerDX9(); FramebufferManagerDX9();
@ -54,11 +54,13 @@ public:
void SetShaderManager(ShaderManagerDX9 *sm) { void SetShaderManager(ShaderManagerDX9 *sm) {
shaderManager_ = sm; shaderManager_ = sm;
} }
void SetTransformDrawEngine(TransformDrawEngineDX9 *td) {
transformDraw_ = td;
}
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override;
void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader);
void DrawActiveTexture(LPDIRECT3DTEXTURE9 texture, float x, float y, float w, float h, float destW, float destH, bool flip = false, float u0 = 0.0f, float v0 = 0.0f, float u1 = 1.0f, float v1 = 1.0f); void DrawActiveTexture(LPDIRECT3DTEXTURE9 texture, float x, float y, float w, float h, float destW, float destH, bool flip = false, float u0 = 0.0f, float v0 = 0.0f, float u1 = 1.0f, float v1 = 1.0f);
@ -68,13 +70,11 @@ public:
void Resized(); void Resized();
void DeviceLost(); void DeviceLost();
void CopyDisplayToOutput(); void CopyDisplayToOutput();
void UpdateFromMemory(u32 addr, int size, bool safe);
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync = true); virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
std::vector<FramebufferInfo> GetFramebufferList(); std::vector<FramebufferInfo> GetFramebufferList();
bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false);
bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false);
void DestroyFramebuf(VirtualFramebuffer *vfb); void DestroyFramebuf(VirtualFramebuffer *vfb);
@ -84,30 +84,37 @@ public:
bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
virtual void RebindFramebuffer() override;
FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888);
LPDIRECT3DSURFACE9 GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface);
protected: protected:
virtual void DisableState() override; virtual void DisableState() override;
virtual void ClearBuffer() override; virtual void ClearBuffer() override;
virtual void ClearDepthBuffer() override; virtual void ClearDepthBuffer() override;
virtual void FlushBeforeCopy() override;
virtual void DecimateFBOs() override;
// Used by ReadFramebufferToMemory and later framebuffer block copies
virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override;
virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override;
virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override;
virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override;
virtual void DecimateFBOs() override;
private: private:
void CompileDraw2DProgram(); void CompileDraw2DProgram();
void DestroyDraw2DProgram(); void DestroyDraw2DProgram();
void SetNumExtraFBOs(int num); void SetNumExtraFBOs(int num);
// Used by ReadFramebufferToMemory void PackFramebufferDirectx9_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false);
void PackFramebufferDirectx9_(VirtualFramebuffer *vfb);
// Used by DrawPixels // Used by DrawPixels
LPDIRECT3DTEXTURE9 drawPixelsTex_; LPDIRECT3DTEXTURE9 drawPixelsTex_;
GEBufferFormat drawPixelsTexFormat_; int drawPixelsTexW_;
int drawPixelsTexH_;
u8 *convBuf; u8 *convBuf;
@ -115,6 +122,7 @@ private:
TextureCacheDX9 *textureCache_; TextureCacheDX9 *textureCache_;
ShaderManagerDX9 *shaderManager_; ShaderManagerDX9 *shaderManager_;
TransformDrawEngineDX9 *transformDraw_;
bool usePostShader_; bool usePostShader_;
bool postShaderAtOutputResolution_; bool postShaderAtOutputResolution_;
@ -122,10 +130,20 @@ private:
std::vector<FBO *> extraFBOs_; std::vector<FBO *> extraFBOs_;
bool resized_; bool resized_;
bool gameUsesSequentialCopies_;
struct TempFBO {
FBO *fbo;
int last_frame_used;
};
struct OffscreenSurface {
LPDIRECT3DSURFACE9 surface;
int last_frame_used;
};
std::vector<VirtualFramebuffer *> bvfbs_; // blitting FBOs std::vector<VirtualFramebuffer *> bvfbs_; // blitting FBOs
std::map<u64, TempFBO> tempFBOs_;
std::set<std::pair<u32, u32>> knownFramebufferCopies_; std::map<u64, OffscreenSurface> offscreenSurfaces_;
#if 0 #if 0
AsyncPBO *pixelBufObj_; //this isn't that large AsyncPBO *pixelBufObj_; //this isn't that large

View file

@ -19,7 +19,9 @@
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "base/logging.h" #include "base/logging.h"
#include "Core/Debugger/Breakpoints.h"
#include "Core/MemMap.h" #include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/Host.h" #include "Core/Host.h"
#include "Core/Config.h" #include "Core/Config.h"
#include "Core/Reporting.h" #include "Core/Reporting.h"
@ -396,8 +398,10 @@ DIRECTX9_GPU::DIRECTX9_GPU()
transformDraw_.SetShaderManager(shaderManager_); transformDraw_.SetShaderManager(shaderManager_);
transformDraw_.SetTextureCache(&textureCache_); transformDraw_.SetTextureCache(&textureCache_);
transformDraw_.SetFramebufferManager(&framebufferManager_); transformDraw_.SetFramebufferManager(&framebufferManager_);
framebufferManager_.Init();
framebufferManager_.SetTextureCache(&textureCache_); framebufferManager_.SetTextureCache(&textureCache_);
framebufferManager_.SetShaderManager(shaderManager_); framebufferManager_.SetShaderManager(shaderManager_);
framebufferManager_.SetTransformDrawEngine(&transformDraw_);
textureCache_.SetFramebufferManager(&framebufferManager_); textureCache_.SetFramebufferManager(&framebufferManager_);
textureCache_.SetShaderManager(shaderManager_); textureCache_.SetShaderManager(shaderManager_);
@ -630,6 +634,18 @@ void DIRECTX9_GPU::ProcessEvent(GPUEvent ev) {
InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type); InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
break; break;
case GPU_EVENT_FB_MEMCPY:
PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
break;
case GPU_EVENT_FB_MEMSET:
PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size);
break;
case GPU_EVENT_FB_STENCIL_UPLOAD:
PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size);
break;
default: default:
GPUCommon::ProcessEvent(ev); GPUCommon::ProcessEvent(ev);
} }
@ -1806,31 +1822,50 @@ void DIRECTX9_GPU::DoBlockTransfer() {
return; return;
} }
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?) // Check that the last address of both source and dest are valid addresses
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers... u32 srcLastAddr = srcBasePtr + ((height - 1 + srcY) * srcStride + (srcX + width - 1)) * bpp;
for (int y = 0; y < height; y++) { u32 dstLastAddr = dstBasePtr + ((height - 1 + dstY) * dstStride + (dstX + width - 1)) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp);
u8 *dst = Memory::GetPointerUnchecked(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); if (!Memory::IsValidAddress(srcLastAddr)) {
memcpy(dst, src, width * bpp); ERROR_LOG_REPORT(G3D, "Bottom-right corner of source of block transfer is at an invalid address: %08x", srcLastAddr);
return;
}
if (!Memory::IsValidAddress(dstLastAddr)) {
ERROR_LOG_REPORT(G3D, "Bottom-right corner of destination of block transfer is at an invalid address: %08x", srcLastAddr);
return;
} }
// TODO: Notify all overlapping FBOs that they need to reload. // Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) {
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers...
if (srcStride == dstStride && (u32)width == srcStride) {
// Common case in God of War, let's do it all in one chunk.
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * height * bpp);
} else {
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT); const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
}
}
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
// the backbuffer. Detect this and have the framebuffermanager draw the pixels. framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp);
u32 backBuffer = framebufferManager_.PrevDisplayFramebufAddr();
u32 displayBuffer = framebufferManager_.DisplayFramebufAddr();
if (((backBuffer != 0 && dstBasePtr == backBuffer) ||
(displayBuffer != 0 && dstBasePtr == displayBuffer)) &&
dstStride == 512 && height == 272) {
framebufferManager_.DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512, false);
} }
CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc);
CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc);
// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.
cyclesExecuted += ((height * width * bpp) * 16) / 10; cyclesExecuted += ((height * width * bpp) * 16) / 10;
} }
@ -1858,32 +1893,103 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy
} }
} }
void DIRECTX9_GPU::PerformMemoryCopyInternal(u32 dest, u32 src, int size) {
if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) {
// We use a little hack for Download/Upload using a VRAM mirror.
// Since they're identical we don't need to copy.
if (!Memory::IsVRAMAddress(dest) || (dest ^ 0x00400000) != src) {
Memory::Memcpy(dest, Memory::GetPointer(src), size);
}
}
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}
void DIRECTX9_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) {
if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true)) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}
}
void DIRECTX9_GPU::PerformStencilUploadInternal(u32 dest, int size) {
framebufferManager_.NotifyStencilUpload(dest, size);
}
bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_MEMCPY);
ev.fb_memcpy.dst = dest;
ev.fb_memcpy.src = src;
ev.fb_memcpy.size = size;
ScheduleEvent(ev);
// This is a memcpy, so we need to wait for it to complete.
SyncThread();
} else {
PerformMemoryCopyInternal(dest, src, size);
}
return true;
}
InvalidateCache(dest, size, GPU_INVALIDATE_HINT); InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false; return false;
} }
bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) { bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) {
// This may indicate a memset, usually to 0, of a framebuffer.
if (framebufferManager_.MayIntersectFramebuffer(dest)) {
Memory::Memset(dest, v, size);
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_MEMSET);
ev.fb_memset.dst = dest;
ev.fb_memset.v = v;
ev.fb_memset.size = size;
ScheduleEvent(ev);
// We don't need to wait for the framebuffer to be updated.
} else {
PerformMemorySetInternal(dest, v, size);
}
return true;
}
// Or perhaps a texture, let's invalidate.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT); InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false; return false;
} }
bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) { bool DIRECTX9_GPU::PerformMemoryDownload(u32 dest, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT); // Cheat a bit to force a download of the framebuffer.
// VRAM + 0x00400000 is simply a VRAM mirror.
// Track stray copies of a framebuffer in RAM. MotoGP does this. if (Memory::IsVRAMAddress(dest)) {
if (Memory::IsRAMAddress(dest)) { return PerformMemoryCopy(dest ^ 0x00400000, dest, size);
// framebufferManager_.NotifyFramebufferCopy(src, dest, size);
} }
return false; return false;
} }
bool DIRECTX9_GPU::PerformMemoryUpload(u32 dest, int size) { bool DIRECTX9_GPU::PerformMemoryUpload(u32 dest, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT); // Cheat a bit to force an upload of the framebuffer.
// VRAM + 0x00400000 is simply a VRAM mirror.
if (Memory::IsVRAMAddress(dest)) {
return PerformMemoryCopy(dest, dest ^ 0x00400000, size);
}
return false; return false;
} }
bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) { bool DIRECTX9_GPU::PerformStencilUpload(u32 dest, int size) {
if (framebufferManager_.MayIntersectFramebuffer(dest)) {
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD);
ev.fb_stencil_upload.dst = dest;
ev.fb_stencil_upload.size = size;
ScheduleEvent(ev);
} else {
PerformStencilUploadInternal(dest, size);
}
return true;
}
return false; return false;
} }

View file

@ -158,6 +158,9 @@ private:
void InitClearInternal(); void InitClearInternal();
void BeginFrameInternal(); void BeginFrameInternal();
void CopyDisplayToOutputInternal(); void CopyDisplayToOutputInternal();
void PerformMemoryCopyInternal(u32 dest, u32 src, int size);
void PerformMemorySetInternal(u32 dest, u8 v, int size);
void PerformStencilUploadInternal(u32 dest, int size);
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type); void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
FramebufferManagerDX9 framebufferManager_; FramebufferManagerDX9 framebufferManager_;

View file

@ -202,11 +202,13 @@ void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) {
} }
// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it // Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invert) { static void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invertedX, bool invertedY, bool invertedZ) {
Matrix4x4 s; Matrix4x4 s;
Matrix4x4 t; Matrix4x4 t;
s.setScaling(Vec3(1, 1, invert ? -0.5 : 0.5f)); s.setScaling(Vec3(1, 1, invertedZ ? -0.5 : 0.5f));
t.setTranslation(Vec3(0, 0, 0.5f)); float xoff = 0.5f / gstate_c.curRTRenderWidth;
float yoff = 0.5f / gstate_c.curRTRenderHeight;
t.setTranslation(Vec3(invertedX ? xoff : -xoff, invertedY ? -yoff : yoff, 0.5f));
in = in * s * t; in = in * s * t;
} }
@ -230,17 +232,20 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_PROJMATRIX) { if (dirtyUniforms & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix; Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
if (gstate_c.vpHeight < 0) {
const bool invertedY = gstate_c.vpHeight < 0;
if (invertedY) {
flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[5] = -flippedMatrix[5];
flippedMatrix[13] = -flippedMatrix[13]; flippedMatrix[13] = -flippedMatrix[13];
} }
if (gstate_c.vpWidth < 0) { const bool invertedX = gstate_c.vpWidth < 0;
if (invertedX) {
flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[0] = -flippedMatrix[0];
flippedMatrix[12] = -flippedMatrix[12]; flippedMatrix[12] = -flippedMatrix[12];
} }
bool invert = gstate_c.vpDepth < 0; const bool invertedZ = gstate_c.vpDepth < 0;
ConvertProjMatrixToD3D(flippedMatrix, invert); ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY, invertedZ);
VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
} }
@ -248,7 +253,7 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
Matrix4x4 proj_through; Matrix4x4 proj_through;
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
ConvertProjMatrixToD3D(proj_through, false); ConvertProjMatrixToD3D(proj_through, false, false, false);
VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr());
} }

View file

@ -51,7 +51,7 @@ public:
~TextureCacheDX9(); ~TextureCacheDX9();
void SetTexture(bool force = false); void SetTexture(bool force = false);
bool SetOffsetTexture(u32 offset); virtual bool SetOffsetTexture(u32 offset) override;
void Clear(bool delete_them); void Clear(bool delete_them);
void StartFrame(); void StartFrame();

View file

@ -97,8 +97,12 @@ LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) {
return fbo->tex; return fbo->tex;
} }
void fbo_bind_for_read(FBO *fbo) { LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo) {
// pD3Ddevice->SetRenderTarget(0, fbo->surf); return fbo->surf;
}
LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo) {
return fbo->surf;
} }
void fbo_bind_color_as_texture(FBO *fbo, int color) { void fbo_bind_color_as_texture(FBO *fbo, int color) {
@ -110,4 +114,10 @@ void fbo_get_dimensions(FBO *fbo, int *w, int *h) {
*h = fbo->height; *h = fbo->height;
} }
HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter) {
LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf;
LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf;
return pD3Ddevice->StretchRect(srcSurf, srcRect, dstSurf, dstRect, filter);
}
} }

View file

@ -28,11 +28,13 @@ FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, F
void fbo_bind_as_render_target(FBO *fbo); void fbo_bind_as_render_target(FBO *fbo);
// color must be 0, for now. // color must be 0, for now.
void fbo_bind_color_as_texture(FBO *fbo, int color); void fbo_bind_color_as_texture(FBO *fbo, int color);
void fbo_bind_for_read(FBO *fbo); LPDIRECT3DSURFACE9 fbo_get_color_for_read(FBO *fbo);
LPDIRECT3DSURFACE9 fbo_get_color_for_write(FBO *fbo);
void fbo_unbind(); void fbo_unbind();
void fbo_destroy(FBO *fbo); void fbo_destroy(FBO *fbo);
void fbo_get_dimensions(FBO *fbo, int *w, int *h); void fbo_get_dimensions(FBO *fbo, int *w, int *h);
void fbo_resolve(FBO *fbo); void fbo_resolve(FBO *fbo);
HRESULT fbo_blit_color(FBO *src, const RECT *srcRect, FBO *dst, const RECT *dstRect, D3DTEXTUREFILTERTYPE filter);
LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo); LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo);

View file

@ -30,7 +30,6 @@
#include "Core/Config.h" #include "Core/Config.h"
#include "Core/System.h" #include "Core/System.h"
#include "Core/Reporting.h" #include "Core/Reporting.h"
#include "Core/ELF/ParamSFO.h"
#include "Core/HLE/sceDisplay.h" #include "Core/HLE/sceDisplay.h"
#include "GPU/ge_constants.h" #include "GPU/ge_constants.h"
#include "GPU/GPUState.h" #include "GPU/GPUState.h"
@ -113,7 +112,7 @@ inline u16 BGRA8888toRGBA4444(u32 px) {
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
} }
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format); void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format);
void CenterRect(float *x, float *y, float *w, float *h, void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH) { float origW, float origH, float frameW, float frameH) {
@ -212,7 +211,8 @@ void FramebufferManager::SetNumExtraFBOs(int num) {
FBO *fbo = fbo_create(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight, 1, false, FBO_8888); FBO *fbo = fbo_create(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight, 1, false, FBO_8888);
extraFBOs_.push_back(fbo); extraFBOs_.push_back(fbo);
// The new FBO is still bound after creation. // The new FBO is still bound after creation, but let's bind it anyway.
fbo_bind_as_render_target(fbo);
ClearBuffer(); ClearBuffer();
} }
@ -353,20 +353,9 @@ FramebufferManager::FramebufferManager() :
} }
void FramebufferManager::Init() { void FramebufferManager::Init() {
FramebufferManagerCommon::Init();
CompileDraw2DProgram(); CompileDraw2DProgram();
const std::string gameId = g_paramSFO.GetValueString("DISC_ID");
// This applies a hack to Dangan Ronpa, its demo, and its sequel.
// The game draws solid colors to a small framebuffer, and then reads this directly in VRAM.
// We force this framebuffer to 1x and force download it automatically.
hackForce04154000Download_ = gameId == "NPJH50631" || gameId == "NPJH50372" || gameId == "NPJH90164" || gameId == "NPJH50515";
// And an initial clear. We don't clear per frame as the games are supposed to handle that
// by themselves.
ClearBuffer();
SetLineWidth(); SetLineWidth();
BeginFrame();
} }
FramebufferManager::~FramebufferManager() { FramebufferManager::~FramebufferManager() {
@ -498,6 +487,7 @@ void FramebufferManager::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY,
MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height); MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
DisableState(); DisableState();
DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f); DrawActiveTexture(0, dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, false, 0.0f, 0.0f, 1.0f, 1.0f);
textureCache_->ForgetLastTexture();
} }
void FramebufferManager::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) { void FramebufferManager::DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) {
@ -720,9 +710,10 @@ void FramebufferManager::ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h
if (old.fbo) { if (old.fbo) {
INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format); INFO_LOG(SCEGE, "Resizing FBO for %08x : %i x %i x %i", vfb->fb_address, w, h, vfb->format);
if (vfb->fbo) { if (vfb->fbo) {
fbo_bind_as_render_target(vfb->fbo);
ClearBuffer(); ClearBuffer();
if (!g_Config.bDisableSlowFramebufEffects) { if (!g_Config.bDisableSlowFramebufEffects) {
BlitFramebuffer_(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0); BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min(vfb->bufferWidth, vfb->width), std::min(vfb->height, vfb->bufferHeight), 0);
} }
} }
fbo_destroy(old.fbo); fbo_destroy(old.fbo);
@ -912,7 +903,7 @@ void FramebufferManager::BlitFramebufferDepth(VirtualFramebuffer *sourceframebuf
} }
FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) {
u32 key = ((u64)depth << 32) | (w << 16) | h; u64 key = ((u64)depth << 32) | (w << 16) | h;
auto it = tempFBOs_.find(key); auto it = tempFBOs_.find(key);
if (it != tempFBOs_.end()) { if (it != tempFBOs_.end()) {
it->second.last_frame_used = gpuStats.numFlips; it->second.last_frame_used = gpuStats.numFlips;
@ -923,6 +914,7 @@ FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) {
FBO *fbo = fbo_create(w, h, 1, false, depth); FBO *fbo = fbo_create(w, h, 1, false, depth);
if (!fbo) if (!fbo)
return fbo; return fbo;
fbo_bind_as_render_target(fbo);
ClearBuffer(); ClearBuffer();
const TempFBO info = {fbo, gpuStats.numFlips}; const TempFBO info = {fbo, gpuStats.numFlips};
tempFBOs_[key] = info; tempFBOs_[key] = info;
@ -951,7 +943,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, b
if (renderCopy) { if (renderCopy) {
VirtualFramebuffer copyInfo = *framebuffer; VirtualFramebuffer copyInfo = *framebuffer;
copyInfo.fbo = renderCopy; copyInfo.fbo = renderCopy;
BlitFramebuffer_(&copyInfo, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); BlitFramebuffer(&copyInfo, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false);
RebindFramebuffer(); RebindFramebuffer();
fbo_bind_color_as_texture(renderCopy, 0); fbo_bind_color_as_texture(renderCopy, 0);
@ -1201,6 +1193,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
nvfb->last_frame_render = gpuStats.numFlips; nvfb->last_frame_render = gpuStats.numFlips;
bvfbs_.push_back(nvfb); bvfbs_.push_back(nvfb);
fbo_bind_as_render_target(nvfb->fbo);
ClearBuffer(); ClearBuffer();
glDisable(GL_DITHER); glDisable(GL_DITHER);
} else { } else {
@ -1247,7 +1240,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
gameUsesSequentialCopies_ = true; gameUsesSequentialCopies_ = true;
} }
} }
BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, true); BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, true);
// PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels // PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels
// PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs // PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs
@ -1269,7 +1262,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
} }
// TODO: If dimensions are the same, we can use glCopyImageSubData. // TODO: If dimensions are the same, we can use glCopyImageSubData.
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) { void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) {
if (!dst->fbo || !src->fbo || !useBufferedRendering_) { if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
// This can happen if they recently switched from non-buffered. // This can happen if they recently switched from non-buffered.
fbo_unbind(); fbo_unbind();
@ -1356,7 +1349,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
// TODO: SSE/NEON // TODO: SSE/NEON
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 height, GEBufferFormat format) { void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) {
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP. // Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
const u32 *src32 = (const u32 *)src; const u32 *src32 = (const u32 *)src;
@ -1367,20 +1360,19 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig
} else if (UseBGRA8888()) { } else if (UseBGRA8888()) {
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA8888(dst32, src32, width); ConvertBGRA8888ToRGBA8888(dst32, src32, width);
src32 += stride; src32 += srcStride;
dst32 += stride; dst32 += dstStride;
} }
} else { } else {
// Here let's assume they don't intersect // Here let's assume they don't intersect
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
memcpy(dst32, src32, width * 4); memcpy(dst32, src32, width * 4);
src32 += stride; src32 += srcStride;
dst32 += stride; dst32 += dstStride;
} }
} }
} else { } else {
// But here it shouldn't matter if they do intersect // But here it shouldn't matter if they do intersect
int size = height * stride;
u16 *dst16 = (u16 *)dst; u16 *dst16 = (u16 *)dst;
switch (format) { switch (format) {
case GE_FORMAT_565: // BGR 565 case GE_FORMAT_565: // BGR 565
@ -1389,16 +1381,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig
for (u32 x = 0; x < width; ++x) { for (u32 x = 0; x < width; ++x) {
dst16[x] = BGRA8888toRGB565(src32[x]); dst16[x] = BGRA8888toRGB565(src32[x]);
} }
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} else { } else {
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < width; ++x) { for (u32 x = 0; x < width; ++x) {
dst16[x] = RGBA8888toRGB565(src32[x]); dst16[x] = RGBA8888toRGB565(src32[x]);
} }
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} }
break; break;
@ -1406,14 +1398,14 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig
if (UseBGRA8888()) { if (UseBGRA8888()) {
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA5551(dst16, src32, width); ConvertBGRA8888ToRGBA5551(dst16, src32, width);
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} else { } else {
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGBA5551(dst16, src32, width); ConvertRGBA8888ToRGBA5551(dst16, src32, width);
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} }
break; break;
@ -1423,16 +1415,16 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 stride, u32 width, u32 heig
for (u32 x = 0; x < width; ++x) { for (u32 x = 0; x < width; ++x) {
dst16[x] = BGRA8888toRGBA4444(src32[x]); dst16[x] = BGRA8888toRGBA4444(src32[x]);
} }
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} else { } else {
for (u32 y = 0; y < height; ++y) { for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < width; ++x) { for (u32 x = 0; x < width; ++x) {
dst16[x] = RGBA8888toRGBA4444(src32[x]); dst16[x] = RGBA8888toRGBA4444(src32[x]);
} }
src32 += stride; src32 += srcStride;
dst16 += stride; dst16 += dstStride;
} }
} }
break; break;
@ -1507,7 +1499,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
if (useCPU || (UseBGRA8888() && pbo.format == GE_FORMAT_8888)) { if (useCPU || (UseBGRA8888() && pbo.format == GE_FORMAT_8888)) {
u8 *dst = Memory::GetPointer(pbo.fb_address); u8 *dst = Memory::GetPointer(pbo.fb_address);
ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.height, pbo.format); ConvertFromRGBA8888(dst, packed, pbo.stride, pbo.stride, pbo.stride, pbo.height, pbo.format);
} else { } else {
// We don't need to convert, GPU already did (or should have) // We don't need to convert, GPU already did (or should have)
Memory::Memcpy(pbo.fb_address, packed, pbo.size); Memory::Memcpy(pbo.fb_address, packed, pbo.size);
@ -1671,7 +1663,7 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, in
if (convert) { if (convert) {
int dstByteOffset = y * vfb->fb_stride * dstBpp; int dstByteOffset = y * vfb->fb_stride * dstBpp;
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->width, h, vfb->format); ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), packed + byteOffset, vfb->fb_stride, vfb->fb_stride, vfb->width, h, vfb->format);
} }
} }
@ -1803,224 +1795,6 @@ void FramebufferManager::DestroyAllFBOs() {
DisableState(); DisableState();
} }
void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) {
addr &= ~0x40000000;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
if (isDisplayBuf || safe) {
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
FlushBeforeCopy();
if (useBufferedRendering_ && vfb->fbo) {
DisableState();
GEBufferFormat fmt = vfb->format;
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
fmt = displayFormat_;
}
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr | 0x04000000), fmt, vfb->fb_stride, vfb->width, vfb->height);
SetColorUpdated(vfb);
} else {
INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
}
}
RebindFramebuffer();
}
}
bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) {
if (updateVRAM_ || size == 0) {
return false;
}
dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
u32 dstY = (u32)-1;
u32 dstH = 0;
u32 srcY = (u32)-1;
u32 srcH = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const int vfb_byteWidth = vfb->width * vfb_bpp;
if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
const u32 offset = dst - vfb_address;
const u32 yOffset = offset / vfb_byteStride;
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) {
dstBuffer = vfb;
dstY = yOffset;
dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
}
}
if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
const u32 offset = src - vfb_address;
const u32 yOffset = offset / vfb_byteStride;
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) {
srcBuffer = vfb;
srcY = yOffset;
srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
}
}
}
if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) {
// MotoGP workaround - it copies a framebuffer to memory and then displays it.
// TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing.
// Unfortunately, that would force 1x render resolution.
if (Memory::IsRAMAddress(dst)) {
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
}
}
if (!useBufferedRendering_) {
// If we're copying into a recently used display buf, it's probably destined for the screen.
if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
return false;
}
}
if (dstBuffer && srcBuffer && !isMemset) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst);
} else {
WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst);
// Just do the blit!
if (g_Config.bBlockTransferGPU) {
BlitFramebuffer_(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
}
}
return false;
} else if (dstBuffer) {
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(src);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
textureCache_->ForgetLastTexture();
// This is a memcpy, let's still copy just in case.
return false;
}
return false;
} else if (srcBuffer) {
WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
FlushBeforeCopy();
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
} else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH);
}
return false;
} else {
return false;
}
}
void FramebufferManager::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const {
u32 dstYOffset = -1;
u32 dstXOffset = -1;
u32 srcYOffset = -1;
u32 srcXOffset = -1;
int width = srcWidth;
int height = srcHeight;
dstBasePtr &= 0x3FFFFFFF;
srcBasePtr &= 0x3FFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF;
const u32 vfb_size = FramebufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
// These heuristics are a bit annoying.
// The goal is to avoid using GPU block transfers for things that ought to be memory.
// Maybe we should even check for textures at these places instead?
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = dstBasePtr - vfb_address;
const u32 byteStride = dstStride * bpp;
const u32 yOffset = byteOffset / byteStride;
// Some games use mismatching bitdepths. But make sure the stride matches.
// If it doesn't, generally this means we detected the framebuffer with too large a height.
bool match = yOffset < dstYOffset;
if (match && vfb_byteStride != byteStride) {
// Grand Knights History copies with a mismatching stride but a full line at a time.
// Makes it hard to detect the wrong transfers in e.g. God of War.
if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
} else {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
}
} else if (match) {
dstWidth = width;
dstHeight = height;
}
if (match) {
dstYOffset = yOffset;
dstXOffset = (byteOffset / bpp) % dstStride;
dstBuffer = vfb;
}
}
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = srcBasePtr - vfb_address;
const u32 byteStride = srcStride * bpp;
const u32 yOffset = byteOffset / byteStride;
bool match = yOffset < srcYOffset;
if (match && vfb_byteStride != byteStride) {
if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
} else {
srcWidth = byteStride * height / vfb_bpp;
srcHeight = 1;
}
} else if (match) {
srcWidth = width;
srcHeight = height;
}
if (match) {
srcYOffset = yOffset;
srcXOffset = (byteOffset / bpp) % srcStride;
srcBuffer = vfb;
}
}
}
if (dstYOffset != (u32)-1) {
dstY += dstYOffset;
dstX += dstXOffset;
}
if (srcYOffset != (u32)-1) {
srcY += srcYOffset;
srcX += srcXOffset;
}
}
void FramebufferManager::FlushBeforeCopy() { void FramebufferManager::FlushBeforeCopy() {
// Flush anything not yet drawn before blitting, downloading, or uploading. // Flush anything not yet drawn before blitting, downloading, or uploading.
// This might be a stalled list, or unflushed before a block transfer, etc. // This might be a stalled list, or unflushed before a block transfer, etc.
@ -2028,118 +1802,6 @@ void FramebufferManager::FlushBeforeCopy() {
transformDraw_->Flush(); transformDraw_->Flush();
} }
bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
if (!useBufferedRendering_ || updateVRAM_) {
return false;
}
// Skip checking if there's no framebuffers in that area.
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
return false;
}
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
int srcWidth = width;
int srcHeight = height;
int dstWidth = width;
int dstHeight = height;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
if (dstBuffer && srcBuffer) {
if (srcBuffer == dstBuffer) {
if (srcX != dstX || srcY != dstY) {
WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp);
RebindFramebuffer();
SetColorUpdated(dstBuffer);
return true;
}
} else {
// Ignore, nothing to do. Tales of Phantasia X does this by accident.
if (g_Config.bBlockTransferGPU) {
return true;
}
}
} else {
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
// Just do the blit!
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp);
RebindFramebuffer();
SetColorUpdated(dstBuffer);
return true; // No need to actually do the memory copy behind, probably.
}
}
return false;
} else if (dstBuffer) {
// Here we should just draw the pixels into the buffer. Copy first.
return false;
} else if (srcBuffer) {
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
FlushBeforeCopy();
if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
const float srcXFactor = (float)bpp / srcBpp;
if (srcHeight <= 0 || srcY + srcHeight > srcBuffer->bufferHeight) {
WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
} else {
ReadFramebufferToMemory(srcBuffer, true, srcX * srcXFactor, srcY, srcWidth * srcXFactor, srcHeight);
}
}
return false; // Let the bit copy happen
} else {
return false;
}
}
void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
u32 backBuffer = PrevDisplayFramebufAddr();
u32 displayBuffer = DisplayFramebufAddr();
// TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy?
if (((backBuffer != 0 && dstBasePtr == backBuffer) ||
(displayBuffer != 0 && dstBasePtr == displayBuffer)) &&
dstStride == 512 && height == 272 && !useBufferedRendering_) {
FlushBeforeCopy();
DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false);
}
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
int srcWidth = width;
int srcHeight = height;
int dstWidth = width;
int dstHeight = height;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) {
return;
}
if (dstBuffer && !srcBuffer) {
WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
float dstXFactor = (float)bpp / dstBpp;
DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, dstWidth * dstXFactor, dstHeight);
SetColorUpdated(dstBuffer);
RebindFramebuffer();
textureCache_->ForgetLastTexture();
}
}
}
}
void FramebufferManager::Resized() { void FramebufferManager::Resized() {
resized_ = true; resized_ = true;
} }

View file

@ -71,10 +71,9 @@ public:
transformDraw_ = td; transformDraw_ = td;
} }
void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
virtual void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override;
void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); virtual void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader) override;
void DrawFramebuffer(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, bool applyPostShader);
// If texture != 0, will bind it. // If texture != 0, will bind it.
// x,y,w,h are relative to destW, destH which fill out the target completely. // x,y,w,h are relative to destW, destH which fill out the target completely.
@ -84,12 +83,11 @@ public:
void DestroyAllFBOs(); void DestroyAllFBOs();
void Init(); virtual void Init() override;
void EndFrame(); void EndFrame();
void Resized(); void Resized();
void DeviceLost(); void DeviceLost();
void CopyDisplayToOutput(); void CopyDisplayToOutput();
void UpdateFromMemory(u32 addr, int size, bool safe);
void SetLineWidth(); void SetLineWidth();
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old); void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old);
@ -98,18 +96,11 @@ public:
// For use when texturing from a framebuffer. May create a duplicate if target. // For use when texturing from a framebuffer. May create a duplicate if target.
void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false); void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false);
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
// read framebuffers is on, in which case this should always return false).
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
// Reads a rectangular subregion of a framebuffer to the right position in its backing memory. // Reads a rectangular subregion of a framebuffer to the right position in its backing memory.
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h); virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
std::vector<FramebufferInfo> GetFramebufferList(); std::vector<FramebufferInfo> GetFramebufferList();
bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false);
bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false);
void DestroyFramebuf(VirtualFramebuffer *vfb); void DestroyFramebuf(VirtualFramebuffer *vfb);
@ -119,7 +110,7 @@ public:
bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
void RebindFramebuffer(); virtual void RebindFramebuffer() override;
FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888); FBO *GetTempFBO(u16 w, u16 h, FBOColorDepth depth = FBO_8888);
@ -127,26 +118,24 @@ protected:
virtual void DisableState() override; virtual void DisableState() override;
virtual void ClearBuffer() override; virtual void ClearBuffer() override;
virtual void ClearDepthBuffer() override; virtual void ClearDepthBuffer() override;
virtual void FlushBeforeCopy() override;
virtual void DecimateFBOs() override;
// Used by ReadFramebufferToMemory and later framebuffer block copies
virtual void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false) override;
virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) override;
virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override; virtual void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb) override;
virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override; virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override;
virtual void DecimateFBOs() override;
private: private:
void CompileDraw2DProgram(); void CompileDraw2DProgram();
void DestroyDraw2DProgram(); void DestroyDraw2DProgram();
void FlushBeforeCopy();
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) const;
void SetNumExtraFBOs(int num); void SetNumExtraFBOs(int num);
inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const; inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const;
// Used by ReadFramebufferToMemory and later framebuffer block copies
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false);
#ifndef USING_GLES2 #ifndef USING_GLES2
void PackFramebufferAsync_(VirtualFramebuffer *vfb); void PackFramebufferAsync_(VirtualFramebuffer *vfb);
#endif #endif
@ -187,8 +176,6 @@ private:
std::vector<VirtualFramebuffer *> bvfbs_; // blitting framebuffers (for download) std::vector<VirtualFramebuffer *> bvfbs_; // blitting framebuffers (for download)
std::map<u64, TempFBO> tempFBOs_; std::map<u64, TempFBO> tempFBOs_;
std::set<std::pair<u32, u32>> knownFramebufferRAMCopies_;
#ifndef USING_GLES2 #ifndef USING_GLES2
AsyncPBO *pixelBufObj_; //this isn't that large AsyncPBO *pixelBufObj_; //this isn't that large
u8 currentPBO_; u8 currentPBO_;

View file

@ -58,7 +58,7 @@ public:
~TextureCache(); ~TextureCache();
void SetTexture(bool force = false); void SetTexture(bool force = false);
bool SetOffsetTexture(u32 offset) override; virtual bool SetOffsetTexture(u32 offset) override;
void Clear(bool delete_them); void Clear(bool delete_them);
void StartFrame(); void StartFrame();