GPU: Use common block transfer logic in all cases.

This commit is contained in:
Unknown W. Brackets 2022-11-30 19:12:06 -08:00
parent 3589c1e826
commit cbe96d3774
3 changed files with 176 additions and 224 deletions

View file

@ -3057,72 +3057,192 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
if (!Memory::IsValidAddress(srcBasePtr)) {
ERROR_LOG_REPORT(G3D, "BlockTransfer: Bad source transfer address %08x!", srcBasePtr);
return;
}
// For VRAM, we wrap around when outside valid memory (mirrors still work.)
if ((srcBasePtr & 0x04800000) == 0x04800000)
srcBasePtr &= ~0x00800000;
if ((dstBasePtr & 0x04800000) == 0x04800000)
dstBasePtr &= ~0x00800000;
if (!Memory::IsValidAddress(dstBasePtr)) {
ERROR_LOG_REPORT(G3D, "BlockTransfer: Bad destination transfer address %08x!", dstBasePtr);
return;
}
// Use height less one to account for width, which can be greater or less than stride.
const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;
const uint32_t srcSize = (height - 1) * srcStride * bpp + width * bpp;
const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const uint32_t dstSize = (height - 1) * dstStride * bpp + width * bpp;
// Check that the last address of both source and dest are valid addresses
u32 srcLastAddr = srcBasePtr + ((srcY + height - 1) * srcStride + (srcX + width - 1)) * bpp;
u32 dstLastAddr = dstBasePtr + ((dstY + height - 1) * dstStride + (dstX + width - 1)) * bpp;
if (!Memory::IsValidAddress(srcLastAddr)) {
ERROR_LOG_N_TIMES(bad_xfer_src, 5, G3D, "Bottom-right corner of source of %dx%d src=(%d, %d) block transfer from buffer at %08x is at an invalid address: %08x. Skipping.", width, height, srcX, srcY, srcBasePtr, srcLastAddr);
return;
}
if (!Memory::IsValidAddress(dstLastAddr)) {
ERROR_LOG_N_TIMES(bad_xfer_src, 5, G3D, "Bottom-right corner of destination of %dx%d dst=(%d, %d) block transfer to buffer at %08x is at an invalid address: %08x. Skipping.", width, height, dstX, dstY, dstBasePtr, srcLastAddr);
return;
}
bool srcDstOverlap = src + srcSize > dst && dst + dstSize > src;
bool srcValid = Memory::IsValidRange(src, srcSize);
bool dstValid = Memory::IsValidRange(dst, dstSize);
bool srcWraps = Memory::IsVRAMAddress(srcBasePtr) && !srcValid;
bool dstWraps = Memory::IsVRAMAddress(dstBasePtr) && !dstValid;
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
if (!framebufferManager_->NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {
if (!framebufferManager_ || !framebufferManager_->NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers...
if (srcStride == dstStride && (u32)width == srcStride) {
// Common case in God of War, let's do it all in one chunk.
// Simple case: just a straight copy, no overlap or wrapping.
if (srcStride == dstStride && (u32)width == srcStride && !srcDstOverlap && srcValid && dstValid) {
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerWriteUnchecked(dstLineStartAddr);
memcpy(dst, src, width * height * bpp);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, width * height * bpp);
} else {
u32 bytesToCopy = width * height * bpp;
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
memcpy(dstp, srcp, bytesToCopy);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
if (MemBlockInfoDetailed(bytesToCopy)) {
const std::string tag = GetMemWriteTagAt("GPUBlockTransfer/", src, bytesToCopy);
NotifyMemInfo(MemBlockFlags::READ, src, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dst, bytesToCopy, tag.c_str(), tag.size());
}
} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {
// This path means we have either src/dst overlap, OR one or both of src and dst wrap.
// This should be uncommon so it's the slowest path.
u32 bytesToCopy = width * bpp;
static std::string tag;
bool notifyDetail = MemBlockInfoDetailed(srcWraps || dstWraps ? 64 : bytesToCopy);
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
if (notifyDetail || notifyAll) {
tag = GetMemWriteTagAt("GPUBlockTransfer/", src, srcSize);
}
auto notifyingMemmove = [&](u32 d, u32 s, u32 sz) {
const u8 *srcp = Memory::GetPointer(s);
u8 *dstp = Memory::GetPointerWrite(d);
memmove(dstp, srcp, sz);
GPURecord::NotifyMemcpy(d, s, sz);
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, s, sz, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, d, sz, tag.c_str(), tag.size());
}
};
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
// If we already passed a wrap, we can use the quicker path.
if ((srcLineStartAddr & 0x04800000) == 0x04800000)
srcLineStartAddr &= ~0x00800000;
if ((dstLineStartAddr & 0x04800000) == 0x04800000)
dstLineStartAddr &= ~0x00800000;
// These flags mean there's a wrap inside this line.
bool srcLineWrap = !Memory::IsValidRange(srcLineStartAddr, bytesToCopy);
bool dstLineWrap = !Memory::IsValidRange(dstLineStartAddr, bytesToCopy);
if (!srcLineWrap && !dstLineWrap) {
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
for (u32 i = 0; i < bytesToCopy; i += 64) {
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
memmove(dstp + i, srcp + i, chunk);
}
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
// If we're tracking detail, it's useful to have the gaps illustrated properly.
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
}
} else {
// We can wrap at any point, so along with overlap this gets a bit complicated.
// We're just going to do this the slow and easy way.
u32 srcLinePos = srcLineStartAddr;
u32 dstLinePos = dstLineStartAddr;
for (u32 i = 0; i < bytesToCopy; i += 64) {
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
u32 srcValid = Memory::ValidSize(srcLinePos, chunk);
u32 dstValid = Memory::ValidSize(dstLinePos, chunk);
// First chunk, for which both are valid.
u32 bothSize = std::min(srcValid, dstValid);
if (bothSize != 0)
notifyingMemmove(dstLinePos, srcLinePos, bothSize);
// Now, whichever side has more valid (or the rest, if only one side must wrap.)
u32 exclusiveSize = std::max(srcValid, dstValid) - bothSize;
if (exclusiveSize != 0 && srcValid >= dstValid) {
notifyingMemmove(PSP_GetVidMemBase(), srcLineStartAddr + bothSize, exclusiveSize);
} else if (exclusiveSize != 0 && srcValid < dstValid) {
notifyingMemmove(dstLineStartAddr + bothSize, PSP_GetVidMemBase(), exclusiveSize);
}
// Finally, if both src and dst wrapped, that portion.
u32 wrappedSize = chunk - bothSize - exclusiveSize;
if (wrappedSize != 0 && srcValid >= dstValid) {
notifyingMemmove(PSP_GetVidMemBase() + exclusiveSize, PSP_GetVidMemBase(), wrappedSize);
} else if (wrappedSize != 0 && srcValid < dstValid) {
notifyingMemmove(PSP_GetVidMemBase(), PSP_GetVidMemBase() + exclusiveSize, wrappedSize);
}
srcLinePos += chunk;
dstLinePos += chunk;
if ((srcLinePos & 0x04800000) == 0x04800000)
srcLinePos &= ~0x00800000;
if ((dstLinePos & 0x04800000) == 0x04800000)
dstLinePos &= ~0x00800000;
}
}
}
if (notifyAll) {
if (srcWraps) {
u32 validSize = Memory::ValidSize(src, srcSize);
NotifyMemInfo(MemBlockFlags::READ, src, validSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::READ, PSP_GetVidMemBase(), srcSize - validSize, tag.c_str(), tag.size());
} else {
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag.c_str(), tag.size());
}
if (dstWraps) {
u32 validSize = Memory::ValidSize(dst, dstSize);
NotifyMemInfo(MemBlockFlags::WRITE, dst, validSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetVidMemBase(), dstSize - validSize, tag.c_str(), tag.size());
} else {
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag.c_str(), tag.size());
}
}
} else if (srcValid && dstValid) {
u32 bytesToCopy = width * bpp;
static std::string tag;
bool notifyDetail = MemBlockInfoDetailed(bytesToCopy);
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
if (notifyDetail || notifyAll) {
tag = GetMemWriteTagAt("GPUBlockTransfer/", src, srcSize);
}
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerWriteUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, width * bpp);
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
memcpy(dstp, srcp, bytesToCopy);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
// If we're tracking detail, it's useful to have the gaps illustrated properly.
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
}
}
if (notifyAll) {
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag.c_str(), tag.size());
}
} else {
// This seems to cause the GE to require a break/reset on a PSP.
// TODO: Handle that and figure out which bytes are still copied?
ERROR_LOG_REPORT_ONCE(invalidtransfer, G3D, "Block transfer invalid: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
}
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
textureCache_->Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
framebufferManager_->NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);
}
const uint32_t numBytes = width * height * bpp;
const uint32_t srcSize = height * srcStride * bpp;
const uint32_t dstSize = height * dstStride * bpp;
// We do the check here on the number of bytes to avoid marking really tiny images.
// Helps perf in GT menu which does insane amounts of these, one for each text character per frame.
if (MemBlockInfoDetailed(numBytes, numBytes)) {
const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;
const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;
char tag[128];
size_t tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);
if (framebufferManager_) {
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
textureCache_->Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
framebufferManager_->NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);
}
}
// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.

View file

@ -301,6 +301,7 @@ protected:
void UpdateState(GPURunState state);
void FastLoadBoneMatrix(u32 target);
void FlushImm();
void DoBlockTransfer(u32 skipDrawReason);
// TODO: Unify this.
virtual void FinishDeferred() {}
@ -406,7 +407,6 @@ protected:
private:
void CheckDepthUsage(VirtualFramebuffer *vfb);
void DoBlockTransfer(u32 skipDrawReason);
void DoExecuteCall(u32 target);
void PopDLQueue();
void CheckDrawSync();

View file

@ -793,12 +793,6 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
int bpp = gstate.getTransferBpp();
// For VRAM, we wrap around when outside valid memory (mirrors still work.)
if ((srcBasePtr & 0x04800000) == 0x04800000)
srcBasePtr &= ~0x00800000;
if ((dstBasePtr & 0x04800000) == 0x04800000)
dstBasePtr &= ~0x00800000;
// Use height less one to account for width, which can be greater or less than stride.
const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;
const uint32_t srcSize = (height - 1) * srcStride * bpp + width * bpp;
@ -806,176 +800,14 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
const uint32_t dstSize = (height - 1) * dstStride * bpp + width * bpp;
// Need to flush both source and target, so we overwrite properly.
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
bool srcDstOverlap = src + srcSize > dst && dst + dstSize > src;
bool srcValid = Memory::IsValidRange(src, srcSize);
bool dstValid = Memory::IsValidRange(dst, dstSize);
bool srcWraps = Memory::IsVRAMAddress(srcBasePtr) && !srcValid;
bool dstWraps = Memory::IsVRAMAddress(dstBasePtr) && !dstValid;
// Simple case: just a straight copy, no overlap or wrapping.
if (srcStride == dstStride && (u32)width == srcStride && !srcDstOverlap && srcValid && dstValid) {
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
u32 bytesToCopy = width * height * bpp;
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
memcpy(dstp, srcp, bytesToCopy);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
if (MemBlockInfoDetailed(bytesToCopy)) {
const std::string tag = GetMemWriteTagAt("GPUBlockTransfer/", src, bytesToCopy);
NotifyMemInfo(MemBlockFlags::READ, src, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dst, bytesToCopy, tag.c_str(), tag.size());
}
} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {
// This path means we have either src/dst overlap, OR one or both of src and dst wrap.
// This should be uncommon so it's the slowest path.
u32 bytesToCopy = width * bpp;
static std::string tag;
bool notifyDetail = MemBlockInfoDetailed(srcWraps || dstWraps ? 64 : bytesToCopy);
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
if (notifyDetail || notifyAll) {
tag = GetMemWriteTagAt("GPUBlockTransfer/", src, srcSize);
}
auto notifyingMemmove = [&](u32 d, u32 s, u32 sz) {
const u8 *srcp = Memory::GetPointer(s);
u8 *dstp = Memory::GetPointerWrite(d);
memmove(dstp, srcp, sz);
GPURecord::NotifyMemcpy(d, s, sz);
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, s, sz, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, d, sz, tag.c_str(), tag.size());
}
};
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
// If we already passed a wrap, we can use the quicker path.
if ((srcLineStartAddr & 0x04800000) == 0x04800000)
srcLineStartAddr &= ~0x00800000;
if ((dstLineStartAddr & 0x04800000) == 0x04800000)
dstLineStartAddr &= ~0x00800000;
// These flags mean there's a wrap inside this line.
bool srcLineWrap = !Memory::IsValidRange(srcLineStartAddr, bytesToCopy);
bool dstLineWrap = !Memory::IsValidRange(dstLineStartAddr, bytesToCopy);
if (!srcLineWrap && !dstLineWrap) {
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
for (u32 i = 0; i < bytesToCopy; i += 64) {
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
memmove(dstp + i, srcp + i, chunk);
}
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
// If we're tracking detail, it's useful to have the gaps illustrated properly.
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
}
} else {
// We can wrap at any point, so along with overlap this gets a bit complicated.
// We're just going to do this the slow and easy way.
u32 srcLinePos = srcLineStartAddr;
u32 dstLinePos = dstLineStartAddr;
for (u32 i = 0; i < bytesToCopy; i += 64) {
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
u32 srcValid = Memory::ValidSize(srcLinePos, chunk);
u32 dstValid = Memory::ValidSize(dstLinePos, chunk);
// First chunk, for which both are valid.
u32 bothSize = std::min(srcValid, dstValid);
if (bothSize != 0)
notifyingMemmove(dstLinePos, srcLinePos, bothSize);
// Now, whichever side has more valid (or the rest, if only one side must wrap.)
u32 exclusiveSize = std::max(srcValid, dstValid) - bothSize;
if (exclusiveSize != 0 && srcValid >= dstValid) {
notifyingMemmove(PSP_GetVidMemBase(), srcLineStartAddr + bothSize, exclusiveSize);
} else if (exclusiveSize != 0 && srcValid < dstValid) {
notifyingMemmove(dstLineStartAddr + bothSize, PSP_GetVidMemBase(), exclusiveSize);
}
// Finally, if both src and dst wrapped, that portion.
u32 wrappedSize = chunk - bothSize - exclusiveSize;
if (wrappedSize != 0 && srcValid >= dstValid) {
notifyingMemmove(PSP_GetVidMemBase() + exclusiveSize, PSP_GetVidMemBase(), wrappedSize);
} else if (wrappedSize != 0 && srcValid < dstValid) {
notifyingMemmove(PSP_GetVidMemBase(), PSP_GetVidMemBase() + exclusiveSize, wrappedSize);
}
srcLinePos += chunk;
dstLinePos += chunk;
if ((srcLinePos & 0x04800000) == 0x04800000)
srcLinePos &= ~0x00800000;
if ((dstLinePos & 0x04800000) == 0x04800000)
dstLinePos &= ~0x00800000;
}
}
}
if (notifyAll) {
if (srcWraps) {
u32 validSize = Memory::ValidSize(src, srcSize);
NotifyMemInfo(MemBlockFlags::READ, src, validSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::READ, PSP_GetVidMemBase(), srcSize - validSize, tag.c_str(), tag.size());
} else {
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag.c_str(), tag.size());
}
if (dstWraps) {
u32 validSize = Memory::ValidSize(dst, dstSize);
NotifyMemInfo(MemBlockFlags::WRITE, dst, validSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetVidMemBase(), dstSize - validSize, tag.c_str(), tag.size());
} else {
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag.c_str(), tag.size());
}
}
} else if (srcValid && dstValid) {
u32 bytesToCopy = width * bpp;
static std::string tag;
bool notifyDetail = MemBlockInfoDetailed(bytesToCopy);
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
if (notifyDetail || notifyAll) {
tag = GetMemWriteTagAt("GPUBlockTransfer/", src, srcSize);
}
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
memcpy(dstp, srcp, bytesToCopy);
GPURecord::NotifyMemcpy(dstLineStartAddr, srcLineStartAddr, bytesToCopy);
// If we're tracking detail, it's useful to have the gaps illustrated properly.
if (notifyDetail) {
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag.c_str(), tag.size());
}
}
if (notifyAll) {
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag.c_str(), tag.size());
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag.c_str(), tag.size());
}
if (Memory::IsValidRange(src, srcSize) && Memory::IsValidRange(dst, dstSize)) {
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
} else {
// This seems to cause the GE to require a break/reset on a PSP.
// TODO: Handle that and figure out which bytes are still copied?
ERROR_LOG_REPORT(G3D, "Block transfer invalid: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
drawEngine_->transformUnit.Flush("blockxfer_wrap");
}
// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.
cyclesExecuted += ((height * width * bpp) * 16) / 10;
DoBlockTransfer(gstate_c.skipDrawReason);
// Could theoretically dirty the framebuffer.
MarkDirty(dst, dstSize, SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY);