update gpu code

This commit is contained in:
Ced2911 2013-08-19 20:36:43 +02:00
parent 76b11b88b8
commit 83d0b7f01e
7 changed files with 324 additions and 218 deletions

View file

@ -37,9 +37,6 @@
#include "../../Core/HLE/sceKernelInterrupt.h"
#include "../../Core/HLE/sceGe.h"
extern u32 curTextureWidth;
extern u32 curTextureHeight;
static const u8 flushOnChangedBeforeCommandList[] = {
GE_CMD_REGION1,GE_CMD_REGION2,
GE_CMD_VERTEXTYPE,
@ -118,7 +115,7 @@ static const u8 flushOnChangedBeforeCommandList[] = {
GE_CMD_CLUTFORMAT,
GE_CMD_TEXFILTER,
GE_CMD_TEXWRAP,
GE_CMD_TEXLEVEL,
// GE_CMD_TEXLEVEL, // we don't support this anyway, no need to flush.
GE_CMD_TEXFUNC,
GE_CMD_TEXENVCOLOR,
//GE_CMD_TEXFLUSH,
@ -216,13 +213,16 @@ void DIRECTX9_GPU::BuildReportingInfo() {
void DIRECTX9_GPU::DeviceLost() {
// Simply drop all caches and textures.
// FBO:s appear to survive? Or no?
// FBOs appear to survive? Or no?
shaderManager_->ClearCache(false);
textureCache_.Clear(false);
framebufferManager_.DeviceLost();
}
void DIRECTX9_GPU::InitClear() {
ScheduleEvent(GPU_EVENT_INIT_CLEAR);
}
void DIRECTX9_GPU::InitClearInternal() {
bool useBufferedRendering = g_Config.iRenderingMode != 0 ? 1 : 0;
if (useBufferedRendering) {
dxstate.depthWrite.set(true);
@ -241,6 +241,10 @@ void DIRECTX9_GPU::DumpNextFrame() {
}
void DIRECTX9_GPU::BeginFrame() {
ScheduleEvent(GPU_EVENT_BEGIN_FRAME);
}
void DIRECTX9_GPU::BeginFrameInternal() {
// Turn off vsync when unthrottled
int desiredVSyncInterval = g_Config.bVSync;
if (PSP_CoreParameter().unthrottle)
@ -273,13 +277,44 @@ void DIRECTX9_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferForma
}
bool DIRECTX9_GPU::FramebufferDirty() {
// FIXME: Workaround for displaylists sometimes hanging unprocessed. Not yet sure of the cause.
if (g_Config.bSeparateCPUThread) {
// FIXME: Workaround for displaylists sometimes hanging unprocessed. Not yet sure of the cause.
ScheduleEvent(GPU_EVENT_PROCESS_QUEUE);
// Allow it to process fully before deciding if it's dirty.
SyncThread();
}
VirtualFramebuffer *vfb = framebufferManager_.GetDisplayFBO();
if (vfb)
return vfb->dirtyAfterDisplay;
if (vfb) {
bool dirty = vfb->dirtyAfterDisplay;
vfb->dirtyAfterDisplay = false;
return dirty;
}
return true;
}
bool DIRECTX9_GPU::FramebufferReallyDirty() {
// FIXME: Workaround for displaylists sometimes hanging unprocessed. Not yet sure of the cause.
if (g_Config.bSeparateCPUThread) {
// FIXME: Workaround for displaylists sometimes hanging unprocessed. Not yet sure of the cause.
ScheduleEvent(GPU_EVENT_PROCESS_QUEUE);
// Allow it to process fully before deciding if it's dirty.
SyncThread();
}
VirtualFramebuffer *vfb = framebufferManager_.GetDisplayFBO();
if (vfb) {
bool dirty = vfb->reallyDirtyAfterDisplay;
vfb->reallyDirtyAfterDisplay = false;
return dirty;
}
return true;
}
void DIRECTX9_GPU::CopyDisplayToOutput() {
ScheduleEvent(GPU_EVENT_COPY_DISPLAY_TO_OUTPUT);
}
void DIRECTX9_GPU::CopyDisplayToOutputInternal() {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
@ -295,19 +330,17 @@ void DIRECTX9_GPU::CopyDisplayToOutput() {
// Render queue
u32 DIRECTX9_GPU::DrawSync(int mode)
{
transformDraw_.Flush();
return GPUCommon::DrawSync(mode);
}
void DIRECTX9_GPU::FastRunLoop(DisplayList &list) {
for (; downcount > 0; --downcount) {
u32 op = Memory::ReadUnchecked_U32(list.pc);
u32 cmd = op >> 24;
u32 diff = op ^ gstate.cmdmem[cmd];
CheckFlushOp(op, diff);
// Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
u8 flushCmd = flushBeforeCommand_[cmd];
if (flushCmd == 1 || (diff && flushCmd == 2)) {
transformDraw_.Flush();
}
gstate.cmdmem[cmd] = op;
ExecuteOp(op, diff);
@ -315,8 +348,30 @@ void DIRECTX9_GPU::FastRunLoop(DisplayList &list) {
}
}
inline void DIRECTX9_GPU::CheckFlushOp(u32 op, u32 diff) {
u32 cmd = op >> 24;
void DIRECTX9_GPU::ProcessEvent(GPUEvent ev) {
switch (ev.type) {
case GPU_EVENT_INIT_CLEAR:
InitClearInternal();
break;
case GPU_EVENT_BEGIN_FRAME:
BeginFrameInternal();
break;
case GPU_EVENT_COPY_DISPLAY_TO_OUTPUT:
CopyDisplayToOutputInternal();
break;
case GPU_EVENT_INVALIDATE_CACHE:
InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
break;
default:
GPUCommon::ProcessEvent(ev);
}
}
inline void DIRECTX9_GPU::CheckFlushOp(int cmd, u32 diff) {
if (flushBeforeCommand_[cmd] == 1 || (diff && flushBeforeCommand_[cmd] == 2))
{
if (dumpThisFrame_) {
@ -327,7 +382,7 @@ inline void DIRECTX9_GPU::CheckFlushOp(u32 op, u32 diff) {
}
void DIRECTX9_GPU::PreExecuteOp(u32 op, u32 diff) {
CheckFlushOp(op, diff);
CheckFlushOp(op >> 24, diff);
}
void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
@ -602,8 +657,8 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
}
case GE_CMD_TEXSIZE0:
gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf);
gstate_c.curTextureHeight = 1 << ((gstate.texsize[0] >> 8) & 0xf);
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
//fall thru - ignoring the mipmap sizes for now
case GE_CMD_TEXSIZE1:
@ -855,7 +910,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (gstate.worldmtxnum & 0xFF000000) | (num & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
}
break;
@ -873,7 +928,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (gstate.viewmtxnum & 0xFF000000) | (num & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
}
break;
@ -891,7 +946,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX);
}
num++;
gstate.projmtxnum = (gstate.projmtxnum & 0xFF000000) | (num & 0xF);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
}
break;
@ -909,7 +964,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
}
num++;
gstate.texmtxnum = (gstate.texmtxnum & 0xFF000000) | (num & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
}
break;
@ -927,7 +982,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
}
num++;
gstate.boneMatrixNumber = (gstate.boneMatrixNumber & 0xFF000000) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
}
break;
@ -1029,6 +1084,14 @@ void DIRECTX9_GPU::DoBlockTransfer() {
}
void DIRECTX9_GPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
GPUEvent ev(GPU_EVENT_INVALIDATE_CACHE);
ev.invalidate_cache.addr = addr;
ev.invalidate_cache.size = size;
ev.invalidate_cache.type = type;
ScheduleEvent(ev);
}
void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type) {
if (size > 0)
textureCache_.Invalidate(addr, size, type);
else
@ -1046,11 +1109,6 @@ void DIRECTX9_GPU::ClearCacheNextFrame() {
textureCache_.ClearNextFrame();
}
void DIRECTX9_GPU::Flush() {
transformDraw_.Flush();
}
void DIRECTX9_GPU::Resized() {
framebufferManager_.Resized();
}

View file

@ -38,7 +38,6 @@ public:
virtual void InitClear();
virtual void PreExecuteOp(u32 op, u32 diff);
virtual void ExecuteOp(u32 op, u32 diff);
virtual u32 DrawSync(int mode);
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
virtual void CopyDisplayToOutput();
@ -50,16 +49,15 @@ public:
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
virtual void DumpNextFrame();
virtual void Flush();
virtual void DoState(PointerWrap &p);
// Called by the window system if the window size changed. This will be reflected in PSPCoreParam.pixel*.
virtual void Resized();
virtual bool DecodeTexture(u8* dest, GPUgstate state)
{
virtual bool DecodeTexture(u8* dest, GPUgstate state) {
return textureCache_.DecodeTexture(dest, state);
}
virtual bool FramebufferDirty();
virtual bool FramebufferReallyDirty();
virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) {
primaryInfo = reportingPrimaryInfo_;
@ -69,12 +67,20 @@ public:
protected:
virtual void FastRunLoop(DisplayList &list);
virtual void ProcessEvent(GPUEvent ev);
private:
void Flush() {
transformDraw_.Flush();
}
void DoBlockTransfer();
void ApplyDrawState(int prim);
void CheckFlushOp(u32 op, u32 diff);
void CheckFlushOp(int cmd, u32 diff);
void BuildReportingInfo();
void InitClearInternal();
void BeginFrameInternal();
void CopyDisplayToOutputInternal();
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
FramebufferManager framebufferManager_;
TextureCache textureCache_;

View file

@ -27,8 +27,9 @@
// GL_NV_shader_framebuffer_fetch looks interesting....
static bool IsAlphaTestTriviallyTrue() {
int alphaTestFunc = gstate.alphatest & 7;
int alphaTestRef = (gstate.alphatest >> 8) & 0xFF;
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
int alphaTestRef = gstate.getAlphaTestRef();
int alphaTestMask = gstate.getAlphaTestMask();
switch (alphaTestFunc) {
case GE_COMP_ALWAYS:
@ -51,7 +52,7 @@ static bool IsAlphaTestTriviallyTrue() {
}
static bool IsColorTestTriviallyTrue() {
int colorTestFunc = gstate.colortest & 3;
GEComparison colorTestFunc = gstate.getColorTestFunction();
switch (colorTestFunc) {
case GE_COMP_ALWAYS:
return true;
@ -92,37 +93,37 @@ static bool CanDoubleSrcBlendMode() {
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FragmentShaderID *id) {
memset(&id->d[0], 0, sizeof(id->d));
if (gstate.clearmode & 1) {
if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits.
id->d[0] = 1;
} else {
int lmode = (gstate.lmode & 1) && gstate.isLightingEnabled();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = (gstate.texfunc & 0x10000) != 0;
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == 1;
bool doTextureAlpha = (gstate.texfunc & 0x100) != 0;
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
if (gstate_c.textureFullAlpha && (gstate.texfunc & 0x7) != GE_TEXFUNC_REPLACE)
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
// id->d[0] |= (gstate.clearmode & 1);
// id->d[0] |= (gstate.isModeClear() & 1);
if (gstate.isTextureMapEnabled()) {
id->d[0] |= 1 << 1;
id->d[0] |= (gstate.texfunc & 0x7) << 2;
id->d[0] |= gstate.getTextureFunction() << 2;
id->d[0] |= (doTextureAlpha & 1) << 5; // rgb or rgba
}
id->d[0] |= (lmode & 1) << 7;
id->d[0] |= gstate.isAlphaTestEnabled() << 8;
if (enableAlphaTest)
id->d[0] |= (gstate.alphatest & 0x7) << 9; // alpha test func
id->d[0] |= gstate.getAlphaTestFunction() << 9;
id->d[0] |= gstate.isColorTestEnabled() << 12;
if (enableColorTest)
id->d[0] |= (gstate.colortest & 0x3) << 13; // color test func
id->d[0] |= gstate.getColorTestFunction() << 13; // color test func
id->d[0] |= (enableFog & 1) << 15;
id->d[0] |= (doTextureProjection & 1) << 16;
id->d[0] |= (enableColorDoubling & 1) << 17;
@ -164,13 +165,13 @@ void GenerateFragmentShader(char *buffer) {
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorDoubling = (gstate.texfunc & 0x10000) != 0;
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == 1;
bool doTextureAlpha = (gstate.texfunc & 0x100) != 0;
if (gstate_c.textureFullAlpha && (gstate.texfunc & 0x7) != GE_TEXFUNC_REPLACE)
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
if (doTexture)
@ -220,7 +221,7 @@ void GenerateFragmentShader(char *buffer) {
secondary = "";
}
if (gstate.textureMapEnable & 1) {
if (gstate.isTextureMapEnabled()) {
if (doTextureProjection) {
WRITE(p, " float4 t = tex2Dproj(tex, In.v_texcoord);\n");
} else {
@ -229,7 +230,7 @@ void GenerateFragmentShader(char *buffer) {
WRITE(p, " float4 p = In.v_color0;\n");
if (doTextureAlpha) { // texfmt == RGBA
switch (gstate.texfunc & 0x7) {
switch (gstate.getTextureFunction()) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " float4 v = p * t%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
@ -245,7 +246,7 @@ void GenerateFragmentShader(char *buffer) {
}
} else { // texfmt == RGB
switch (gstate.texfunc & 0x7) {
switch (gstate.getTextureFunction()) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " float4 v = float4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
@ -268,7 +269,7 @@ void GenerateFragmentShader(char *buffer) {
}
if (enableAlphaTest) {
int alphaTestFunc = gstate.alphatest & 7;
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
// WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
@ -287,9 +288,9 @@ void GenerateFragmentShader(char *buffer) {
}
if (enableColorTest) {
int colorTestFunc = gstate.colortest & 3;
GEComparison colorTestFunc = gstate.getColorTestFunction();
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
int colorTestMask = gstate.colormask;
u32 colorTestMask = gstate.getColorTestMask();
if (colorTestFuncs[colorTestFunc][0] != '#') {
//WRITE(p, "clip((roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb)? -1:1);\n", colorTestFuncs[colorTestFunc]);
//WRITE(p, "if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) clip(-1);\n", colorTestFuncs[colorTestFunc]);

View file

@ -94,6 +94,21 @@ void CenterRect(float *x, float *y, float *w, float *h,
}
}
void ClearBuffer() {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
void DisableState() {
dxstate.blend.disable();
dxstate.cullMode.set(false, false);
dxstate.depthTest.disable();
dxstate.scissorTest.disable();
dxstate.stencilTest.disable();
}
FramebufferManager::FramebufferManager() :
ramDisplayFramebufPtr_(0),
displayFramebufPtr_(0),
@ -118,9 +133,7 @@ FramebufferManager::FramebufferManager() :
#endif
// And an initial clear. We don't clear per frame as the games are supposed to handle that
// by themselves.
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
ClearBuffer();
pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), NULL, &drawPixelsTex_, NULL);
@ -279,40 +292,45 @@ VirtualFramebuffer *FramebufferManager::GetDisplayFBO() {
return 0;
}
void GetViewportDimensions(int &w, int &h) {
float vpXa = getFloat24(gstate.viewportx1);
float vpYa = getFloat24(gstate.viewporty1);
w = (int)fabsf(vpXa * 2);
h = (int)fabsf(vpYa * 2);
}
// Heuristics to figure out the size of FBO to create.
void GuessDrawingSize(int &drawing_width, int &drawing_height) {
int viewport_width, viewport_height;
int default_width = 480;
int default_height = 272;
int regionX2 = (gstate.getRegionX2() + 1) ;
int regionY2 = (gstate.getRegionY2() + 1) ;
int fb_stride = gstate.fbwidth & 0x3C0;
GetViewportDimensions(viewport_width, viewport_height);
int viewport_width = (int) gstate.getViewportX1();
int viewport_height = (int) gstate.getViewportY1();
int region_width = (gstate.getRegionX2() + 1) ;
int region_height = (gstate.getRegionY2() + 1) ;
int fb_width = gstate.fbwidth & 0x3C0;
// Generated FBO shouldn't greate than 512x512
if ( viewport_width > 512 && viewport_height > 512 ) {
viewport_width = default_width;
viewport_height = default_height;
DEBUG_LOG(HLE,"viewport : %ix%i, region : %ix%i, stride: %i", viewport_width,viewport_height, region_width, region_height, fb_width);
// In case viewport return as 0x0 like FF Type-0
if (viewport_width <= 1 && viewport_height <=1) {
drawing_width = default_width;
drawing_height = default_height;
}
if (fb_stride < 512) {
drawing_width = std::min(viewport_width, regionX2);
drawing_height = std::min(viewport_height, regionY2);
if (fb_width < 512) {
if (fb_width != viewport_width) {
drawing_width = viewport_width;
drawing_height = viewport_height;
} else {
drawing_width = region_width;
drawing_height = region_height;
}
} else {
drawing_width = std::max(viewport_width, default_width);
drawing_height = std::max(viewport_height, default_height);
if (fb_width != region_width) {
drawing_width = default_width;
drawing_height = default_height;
} else {
drawing_width = region_width;
drawing_height = region_height;
}
}
}
void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) {
textureCache_->NotifyFramebufferDestroyed(v->fb_address, v);
textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED);
if (v->fbo) {
fbo_destroy(v->fbo);
v->fbo = 0;
@ -334,6 +352,9 @@ void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) {
void FramebufferManager::SetRenderFrameBuffer() {
if (!gstate_c.framebufChanged && currentRenderVfb_) {
currentRenderVfb_->last_frame_used = gpuStats.numFlips;
currentRenderVfb_->dirtyAfterDisplay = true;
if (!gstate_c.skipDrawReason)
currentRenderVfb_->reallyDirtyAfterDisplay = true;
return;
}
gstate_c.framebufChanged = false;
@ -398,6 +419,9 @@ void FramebufferManager::SetRenderFrameBuffer() {
vfb->format = fmt;
vfb->usageFlags = FB_USAGE_RENDERTARGET;
vfb->dirtyAfterDisplay = true;
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
vfb->reallyDirtyAfterDisplay = true;
vfb->memoryUpdated = false;
if (g_Config.bTrueColor) {
vfb->colorDepth = FBO_8888;
@ -438,15 +462,12 @@ void FramebufferManager::SetRenderFrameBuffer() {
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
}
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED);
vfb->last_frame_used = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfbs_.push_back(vfb);
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
ClearBuffer();
currentRenderVfb_ = vfb;
@ -454,6 +475,14 @@ void FramebufferManager::SetRenderFrameBuffer() {
// We already have it!
} else if (vfb != currentRenderVfb_) {
#ifndef USING_GLES2
bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU || g_Config.iRenderingMode == FB_READFBOMEMORY_CPU;
#else
bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU;
#endif
if(useMem && !vfb->memoryUpdated) {
ReadFramebufferToMemory(vfb, true);
}
// Use it as a render target.
DEBUG_LOG(HLE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
@ -461,6 +490,9 @@ void FramebufferManager::SetRenderFrameBuffer() {
vfb->last_frame_used = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
vfb->reallyDirtyAfterDisplay = true;
vfb->memoryUpdated = false;
if (useBufferedRendering_) {
if (vfb->fbo) {
@ -472,7 +504,7 @@ void FramebufferManager::SetRenderFrameBuffer() {
} else {
if (vfb->fbo) {
// wtf? This should only happen very briefly when toggling bBufferedRendering
textureCache_->NotifyFramebufferDestroyed(vfb->fb_address, vfb);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED);
fbo_destroy(vfb->fbo);
vfb->fbo = 0;
}
@ -492,7 +524,7 @@ void FramebufferManager::SetRenderFrameBuffer() {
gstate_c.skipDrawReason |= ~SKIPDRAW_SKIPNONFB;
}*/
}
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED);
#if 1
// Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering
@ -509,6 +541,9 @@ void FramebufferManager::SetRenderFrameBuffer() {
} else {
vfb->last_frame_used = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
vfb->reallyDirtyAfterDisplay = true;
}
// ugly...
@ -534,15 +569,14 @@ void FramebufferManager::CopyDisplayToOutput() {
} else {
DEBUG_LOG(HLE, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_);
// No framebuffer to display! Clear to black.
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
ClearBuffer();
}
return;
}
vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
vfb->dirtyAfterDisplay = false;
vfb->reallyDirtyAfterDisplay = false;
if (prevDisplayFramebuf_ != displayFramebuf_) {
prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
@ -552,17 +586,14 @@ void FramebufferManager::CopyDisplayToOutput() {
}
displayFramebuf_ = vfb;
if (resized_) {
ClearBuffer();
}
if (vfb->fbo) {
dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
DEBUG_LOG(HLE, "Displaying FBO %08x", vfb->fb_address);
dxstate.blend.disable();
dxstate.cullMode.set(false, false);
dxstate.depthTest.disable();
dxstate.scissorTest.disable();
dxstate.stencilTest.disable();
// Resolve
//fbo_resolve(vfb->fbo);
DisableState();
fbo_bind_color_as_texture(vfb->fbo, 0);
@ -572,19 +603,20 @@ void FramebufferManager::CopyDisplayToOutput() {
DrawActiveTexture(x, y, w, h, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
pD3Ddevice->SetTexture(0, NULL);
}
if (resized_) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
}
void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) {
void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) {
// This only works with buffered rendering
if (!useBufferedRendering_) {
return;
}
#if 0
if(sync) {
PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update
}
#endif
if(vfb) {
// We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb.
// For now we'll keep these on the same struct as the ones that can get displayed
@ -646,51 +678,32 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) {
nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth);
if (!(nvfb->fbo)) {
ERROR_LOG(HLE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight);
}
if (useBufferedRendering_) {
if (nvfb->fbo) {
fbo_bind_as_render_target(nvfb->fbo);
} else {
fbo_unbind();
return;
}
}
nvfb->last_frame_used = gpuStats.numFlips;
bvfbs_.push_back(nvfb);
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
fbo_bind_as_render_target(nvfb->fbo);
ClearBuffer();
} else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
nvfb->last_frame_used = gpuStats.numFlips;
nvfb->dirtyAfterDisplay = true;
if (useBufferedRendering_) {
if (nvfb->fbo) {
#ifdef USING_GLES2
fbo_bind_as_render_target(nvfb->fbo);
#if 1
// Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering
// to it. This broke stuff before, so now it only clears on the first use of an
// FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs
// performance-crushing framebuffer reloads from RAM, but we'll have to live with that.
if (nvfb->last_frame_used != gpuStats.numFlips) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
ClearBuffer();
}
#endif
} else {
fbo_unbind();
return;
}
}
}
vfb->memoryUpdated = true;
BlitFramebuffer_(vfb, nvfb, false);
PackFramebufferDirectx9_(nvfb);
@ -714,12 +727,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb
*/
dxstate.viewport.set(0, 0, dst->width, dst->height);
dxstate.depthTest.disable();
dxstate.blend.disable();
dxstate.cullMode.set(0, 0);
dxstate.depthTest.disable();
dxstate.scissorTest.disable();
dxstate.stencilTest.disable();
DisableState();
fbo_bind_color_as_texture(src->fbo, 0);
@ -835,7 +843,7 @@ void FramebufferManager::DeviceLost() {
void FramebufferManager::BeginFrame() {
DecimateFBOs();
currentRenderVfb_ = 0;
useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE ? 1 : 0;
useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
}
void FramebufferManager::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
@ -875,17 +883,16 @@ std::vector<FramebufferInfo> FramebufferManager::GetFramebufferList() {
void FramebufferManager::DecimateFBOs() {
fbo_unbind();
currentRenderVfb_ = 0;
int num = g_Config.iFrameSkip > 0 && g_Config.iFrameSkip != 9 ? g_Config.iFrameSkip : 3;
bool skipFrame = (gpuStats.numFlips % num == 0);
bool useFramebufferToMem = g_Config.iRenderingMode != FB_BUFFERED_MODE ? 1 : 0;
#ifndef USING_GLES2
bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU || g_Config.iRenderingMode == FB_READFBOMEMORY_CPU;
#else
bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU;
#endif
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
int age = frameLastFramebufUsed - vfb->last_frame_used;
if(useFramebufferToMem) {
// Commit framebuffers to memory
if(skipFrame && age <= FBO_OLD_AGE)
if(useMem && !age && !vfb->memoryUpdated) {
ReadFramebufferToMemory(vfb);
}
@ -944,6 +951,7 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
vfb->dirtyAfterDisplay = true;
vfb->reallyDirtyAfterDisplay = true;
// TODO: This without the fbo_unbind() above would be better than destroying the FBO.
// However, it doesn't seem to work for Star Ocean, at least
if (useBufferedRendering_) {

View file

@ -46,6 +46,7 @@ enum {
struct VirtualFramebuffer {
int last_frame_used;
bool memoryUpdated;
u32 fb_address;
u32 z_address;
@ -71,6 +72,7 @@ struct VirtualFramebuffer {
FBO *fbo;
bool dirtyAfterDisplay;
bool reallyDirtyAfterDisplay; // takes frame skipping into account
};
void CenterRect(float *x, float *y, float *w, float *h,
@ -104,7 +106,7 @@ public:
void SetRenderFrameBuffer(); // Uses parameters computed from gstate
void UpdateFromMemory(u32 addr, int size);
void ReadFramebufferToMemory(VirtualFramebuffer *vfb);
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync = true);
// TODO: Break out into some form of FBO manager
VirtualFramebuffer *GetDisplayFBO();

View file

@ -26,11 +26,10 @@
#include "GPU/Directx9/Framebuffer.h"
#include "Core/Config.h"
#include "ext/xxhash.h"
#include "native/ext/cityhash/city.h"
#ifdef _M_SSE
#include <xmmintrin.h>
#endif
#define INVALID_TEX (LPDIRECT3DTEXTURE9)(-1)
// If a texture hasn't been seen for this many frames, get rid of it.
#define TEXTURE_KILL_AGE 200
@ -38,6 +37,9 @@
// Not used in lowmem mode.
#define TEXTURE_SECOND_KILL_AGE 100
// Try to be prime to other decimation intervals.
#define TEXCACHE_DECIMATION_INTERVAL 13
extern int g_iNumVideos;
u32 RoundUpToPowerOf2(u32 v)
@ -60,7 +62,8 @@ static inline u32 GetLevelBufw(int level, u32 texaddr) {
}
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
lastBoundTexture = NULL;
lastBoundTexture = INVALID_TEX;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
// This is 5MB of temporary storage. Might be possible to shrink it.
tmpTexBuf32.resize(1024 * 512); // 2MB
tmpTexBuf16.resize(1024 * 512); // 1MB
@ -78,7 +81,7 @@ TextureCache::~TextureCache() {
void TextureCache::Clear(bool delete_them) {
pD3Ddevice->SetTexture(0, NULL);
lastBoundTexture = NULL;
lastBoundTexture = INVALID_TEX;
if (delete_them) {
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ++iter) {
DEBUG_LOG(G3D, "Deleting texture %i", iter->second.texture);
@ -98,8 +101,14 @@ void TextureCache::Clear(bool delete_them) {
// Removes old textures.
void TextureCache::Decimate() {
if (--decimationCounter_ <= 0) {
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
} else {
return;
}
pD3Ddevice->SetTexture(0, NULL);
lastBoundTexture = NULL;
lastBoundTexture = INVALID_TEX;
int killAge = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) {
if (iter->second.lastFrame + TEXTURE_KILL_AGE < gpuStats.numFlips) {
@ -163,36 +172,33 @@ void TextureCache::ClearNextFrame() {
}
TextureCache::TexCacheEntry *TextureCache::GetEntryAt(u32 texaddr) {
// If no CLUT, as in framebuffer textures, cache key is simply texaddr shifted up.
auto iter = cache.find((u64)texaddr << 32);
if (iter != cache.end() && iter->second.addr == texaddr)
return &iter->second;
else
return 0;
template <typename T>
inline void AttachFramebufferValid(T &entry, VirtualFramebuffer *framebuffer) {
entry->framebuffer = framebuffer;
entry->invalidHint = 0;
}
void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer) {
// This is a rough heuristic, because sometimes our framebuffers are too tall.
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
// Must be in VRAM so | 0x04000000 it is.
const u64 cacheKey = (u64)(address | 0x04000000) << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
auto entry = &it->second;
template <typename T>
inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer) {
if (entry->framebuffer == 0 || entry->framebuffer == framebuffer) {
entry->framebuffer = framebuffer;
entry->invalidHint = -1;
}
}
inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) {
// If they match exactly, it's non-CLUT and from the top left.
if (it->first == cacheKey) {
if (exactMatch) {
DEBUG_LOG(HLE, "Render to texture detected at %08x!", address);
if (!entry->framebuffer) {
if (entry->format != framebuffer->format) {
WARN_LOG_REPORT_ONCE(diffFormat1, HLE, "Render to texture with different formats %d != %d", entry->format, framebuffer->format);
// If it already has one, let's hope that one is correct.
// Try to not bind FB now as it seems to be attached some strange stuff on top of the original FB.
//AttachFramebufferInvalid(entry, framebuffer);
} else {
AttachFramebufferValid(entry, framebuffer);
}
entry->framebuffer = framebuffer;
// TODO: Delete the original non-fbo texture too.
}
} else if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE) {
@ -206,28 +212,45 @@ void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffe
if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, HLE, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
// TODO: Use an FBO to translate the palette?
entry->framebuffer = framebuffer;
AttachFramebufferValid(entry, framebuffer);
} else if ((entry->addr - address) / entry->bufw < framebuffer->height) {
WARN_LOG_REPORT_ONCE(subarea, HLE, "Render to area containing texture at %08x", address);
// TODO: Keep track of the y offset.
entry->framebuffer = framebuffer;
}
AttachFramebufferInvalid(entry, framebuffer);
}
}
}
}
void TextureCache::NotifyFramebufferDestroyed(u32 address, VirtualFramebuffer *framebuffer) {
TexCacheEntry *entry = GetEntryAt(address | 0x04000000);
if (entry && entry->framebuffer == framebuffer) {
// There's at least one. We're going to have to loop through all textures unfortunately to be
// 100% safe.
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ++iter) {
if (iter->second.framebuffer == framebuffer) {
iter->second.framebuffer = 0;
}
inline void TextureCache::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) {
if (entry->framebuffer == framebuffer) {
entry->framebuffer = 0;
}
}
void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
// This is a rough heuristic, because sometimes our framebuffers are too tall.
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
// Must be in VRAM so | 0x04000000 it is.
const u64 cacheKey = (u64)(address | 0x04000000) << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
switch (msg) {
case NOTIFY_FB_CREATED:
case NOTIFY_FB_UPDATED:
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
AttachFramebuffer(&it->second, address, framebuffer, it->first == cacheKey);
}
// entry->framebuffer = 0;
break;
case NOTIFY_FB_DESTROYED:
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
DetachFramebuffer(&it->second, address, framebuffer);
}
break;
}
}
@ -246,16 +269,16 @@ void *TextureCache::UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
const u32 pitch = rowWidth / 4;
const int bxc = rowWidth / 16;
int byc = ((1 << ((gstate.texsize[level] >> 8) & 0xf)) + 7) / 8;
int byc = (gstate.getTextureHeight(level) + 7) / 8;
if (byc == 0)
byc = 1;
u32 ydest = 0;
if (rowWidth >= 16) {
const u32 *src = (u32 *) Memory::GetPointer(texaddr);
u32 *ydest = tmpTexBuf32.data();
u32 *ydestp = tmpTexBuf32.data();
for (int by = 0; by < byc; by++) {
u32 *xdest = ydest;
u32 *xdest = ydestp;
for (int bx = 0; bx < bxc; bx++) {
u32 *dest = xdest;
for (int n = 0; n < 8; n++) {
@ -265,7 +288,7 @@ void *TextureCache::UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u
}
xdest += 4;
}
ydest += (rowWidth * 8) / 4;
ydestp += (rowWidth * 8) / 4;
}
} else if (rowWidth == 8) {
const u32 *src = (u32 *) Memory::GetPointer(texaddr);
@ -393,8 +416,8 @@ inline void DeIndexTexture4Optimal(ClutT *dest, const u32 texaddr, int length, C
void *TextureCache::readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt) {
int bufw = GetLevelBufw(level, texaddr);
int w = 1 << (gstate.texsize[0] & 0xf);
int h = 1 << ((gstate.texsize[0] >> 8) & 0xf);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
int length = bufw * h;
void *buf = NULL;
switch (gstate.getClutPaletteFormat()) {
@ -629,8 +652,8 @@ static inline u32 makecol(int r, int g, int b, int a) {
static void decodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, bool ignore1bitAlpha = false) {
// S3TC Decoder
// Needs more speed and debugging.
u16 c1 = src->color1;
u16 c2 = src->color2;
u16 c1 = (src->color1);
u16 c2 = (src->color2);
int red1 = Convert5To8(c1 & 0x1F);
int red2 = Convert5To8(c2 & 0x1F);
int green1 = Convert6To8((c1 >> 5) & 0x3F);
@ -891,10 +914,10 @@ inline bool TextureCache::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLe
}
void TextureCache::LoadClut() {
u32 clutAddr = GetClutAddr();
u32 clutAddr = ((gstate.clutaddr & 0xFFFFFF) | ((gstate.clutaddrupper << 8) & 0x0F000000));
clutTotalBytes_ = (gstate.loadclut & 0x3f) * 32;
if (Memory::IsValidAddress(clutAddr)) {
Memory::Memcpy(clutBufRaw_, clutAddr, clutTotalBytes_);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_);
} else {
memset(clutBufRaw_, 0xFF, clutTotalBytes_);
}
@ -910,9 +933,7 @@ void TextureCache::UpdateCurrentClut() {
// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
const u32 clutExtendedBytes = clutTotalBytes_ + clutBaseBytes;
// QuickClutHash is not quite good enough apparently.
// clutHash_ = QuickClutHash((const u8 *)clutBufRaw_, clutExtendedBytes);
clutHash_ = CityHash32((const char *)clutBufRaw_, clutExtendedBytes);
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
/*
// Avoid a copy when we don't need to convert colors.
@ -969,17 +990,17 @@ bool SetDebugTexture() {
static int lastFrames = 0;
static int mostTextures = 1;
if (lastFrames != gpuStats.numFrames) {
if (lastFrames != gpuStats.numFlips) {
mostTextures = std::max(mostTextures, numTextures);
numTextures = 0;
lastFrames = gpuStats.numFrames;
lastFrames = gpuStats.numFlips;
}
static GLuint solidTexture = 0;
bool changed = false;
if (((gpuStats.numFrames / highlightFrames) % mostTextures) == numTextures) {
if (gpuStats.numFrames % highlightFrames == 0) {
if (((gpuStats.numFlips / highlightFrames) % mostTextures) == numTextures) {
if (gpuStats.numFlips % highlightFrames == 0) {
NOTICE_LOG(HLE, "Highlighting texture # %d / %d", numTextures, mostTextures);
}
static const u32 solidTextureData[] = {0x99AA99FF};
@ -1014,7 +1035,7 @@ void TextureCache::SetTexture() {
if (!Memory::IsValidAddress(texaddr)) {
// Bind a null texture and return.
pD3Ddevice->SetTexture(0, NULL);
lastBoundTexture = NULL;
lastBoundTexture = INVALID_TEX;
return;
}
@ -1039,8 +1060,8 @@ void TextureCache::SetTexture() {
cluthash = 0;
}
int w = 1 << (gstate.texsize[0] & 0xf);
int h = 1 << ((gstate.texsize[0] >> 8) & 0xf);
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
int bufw = GetLevelBufw(0, texaddr);
int maxLevel = ((gstate.texmode >> 16) & 0x7);
@ -1060,30 +1081,29 @@ void TextureCache::SetTexture() {
if (entry->framebuffer) {
entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE;
if (useBufferedRendering) {
if (entry->framebuffer->fbo) {
// For now, let's not bind FBOs that we know are off (invalidHint will be -1.)
// But let's still not use random memory.
if (entry->framebuffer->fbo && entry->invalidHint != -1) {
fbo_bind_color_as_texture(entry->framebuffer->fbo, 0);
lastBoundTexture = NULL;
} else {
pD3Ddevice->SetTexture(0, NULL);
lastBoundTexture = NULL;
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
}
UpdateSamplingParams(*entry, false);
// This isn't right.
gstate_c.curTextureWidth = entry->framebuffer->width;
gstate_c.curTextureHeight = entry->framebuffer->height;
gstate_c.flipTexture = true;
gstate_c.textureFullAlpha = entry->framebuffer->format == GE_FORMAT_565;
entry->lastFrame = gpuStats.numFlips;
} else {
if (entry->framebuffer->fbo)
entry->framebuffer->fbo = 0;
pD3Ddevice->SetTexture(0, NULL);
lastBoundTexture = NULL;
entry->lastFrame = gpuStats.numFlips;
}
lastBoundTexture = INVALID_TEX;
entry->lastFrame = gpuStats.numFlips;
return;
}
//Validate the texture here (width, height etc)
int dim = gstate.texsize[0] & 0xF0F;
@ -1178,7 +1198,7 @@ void TextureCache::SetTexture() {
replaceImages = true;
} else {
if (entry->texture == lastBoundTexture) {
lastBoundTexture = NULL;
lastBoundTexture = INVALID_TEX;
}
entry->texture->Release();
}
@ -1268,8 +1288,8 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
int bufw = GetLevelBufw(level, texaddr);
int w = 1 << (gstate.texsize[level] & 0xf);
int h = 1 << ((gstate.texsize[level] >> 8) & 0xf);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
const u8 *texptr = Memory::GetPointer(texaddr);
switch (format)
@ -1632,11 +1652,12 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
return;
}
int w = 1 << (gstate.texsize[level] & 0xf);
int h = 1 << ((gstate.texsize[level] >> 8) & 0xf);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
gpuStats.numTexturesDecoded++;
// Can restore these and remove the above fixup on some platforms.
// Can restore these and remove the fixup at the end of DecodeTextureLevel on desktop GL and GLES 3.
// glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw);
// glPixelStorei(GL_PACK_ROW_LENGTH, bufw);

View file

@ -32,6 +32,13 @@ enum TextureFiltering {
LINEAR = 3,
LINEARFMV = 4,
};
enum FramebufferNotification {
NOTIFY_FB_CREATED,
NOTIFY_FB_UPDATED,
NOTIFY_FB_DESTROYED,
};
class TextureCache
{
public:
@ -49,8 +56,7 @@ public:
// FramebufferManager keeps TextureCache updated about what regions of memory
// are being rendered to. This is barebones so far.
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer);
void NotifyFramebufferDestroyed(u32 address, VirtualFramebuffer *framebuffer);
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg);
size_t NumLoadedTextures() const {
return cache.size();
@ -119,6 +125,8 @@ private:
const T *GetCurrentClut();
u32 GetCurrentClutHash();
void UpdateCurrentClut();
void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch);
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer);
TexCacheEntry *GetEntryAt(u32 texaddr);
@ -147,5 +155,7 @@ private:
LPDIRECT3DTEXTURE9 lastBoundTexture;
float maxAnisotropyLevel;
int decimationCounter_;
};