From ad5df976bd184e3a15dcd420df474dfdd5dae61a Mon Sep 17 00:00:00 2001 From: arnastia Date: Fri, 28 Jun 2013 14:48:36 +0100 Subject: [PATCH 1/8] Blit framebuffers using screen aligned quads (buggy); Implement asynchronous framebuffer readback for OpenGL and optional CPU-side pixel format conversion (needs optimization); Add OpenGL ES support (untested). --- Core/Config.cpp | 4 + Core/Config.h | 2 + GPU/GLES/Framebuffer.cpp | 652 +++++++++++++++++++++++++++------------ GPU/GLES/Framebuffer.h | 22 +- 4 files changed, 490 insertions(+), 190 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index f004f5b604..3778e9761c 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -115,6 +115,8 @@ void Config::Load(const char *iniFileName) graphics->Get("StretchToDisplay", &bStretchToDisplay, false); graphics->Get("TrueColor", &bTrueColor, true); graphics->Get("FramebuffersToMem", &bFramebuffersToMem, false); + graphics->Get("AsyncReadback", &bAsyncReadback, true); + graphics->Get("CPUConvert", &bCPUConvert, false); graphics->Get("MipMap", &bMipMap, true); graphics->Get("TexScalingLevel", &iTexScalingLevel, 1); graphics->Get("TexScalingType", &iTexScalingType, 0); @@ -218,6 +220,8 @@ void Config::Save() graphics->Set("StretchToDisplay", bStretchToDisplay); graphics->Set("TrueColor", bTrueColor); graphics->Set("FramebuffersToMem", bFramebuffersToMem); + graphics->Set("AsyncReadback", bAsyncReadback); + graphics->Set("CPUConvert", bCPUConvert); graphics->Set("MipMap", bMipMap); graphics->Set("TexScalingLevel", iTexScalingLevel); graphics->Set("TexScalingType", iTexScalingType); diff --git a/Core/Config.h b/Core/Config.h index 251d4229a3..ab2c402e56 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -80,6 +80,8 @@ public: int iAnisotropyLevel; bool bTrueColor; bool bFramebuffersToMem; + bool bAsyncReadback; // TEMP + bool bCPUConvert; // TEMP bool bMipMap; int iTexScalingLevel; // 1 = off, 2 = 2x, ..., 5 = 5x int iTexScalingType; // 0 = xBRZ, 1 = Hybrid diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index d936f20204..093c5c4545 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -57,6 +57,18 @@ static const char basic_vs[] = " gl_Position = u_viewproj * a_position;\n" "}\n"; +static const char blit_fs[] = + "#ifdef GL_ES\n" + "precision mediump float;\n" + "#endif\n" + "uniform sampler2D sampler0;\n" + "varying vec4 v_color;\n" + "varying vec2 v_texcoord0;\n" + "void main() {\n" + " v_color = texture2D(sampler0, v_texcoord0).bgra;\n" + " gl_FragColor = v_color;\n" + "}\n"; + // Aggressively delete unused FBO:s to save gpu memory. enum { FBO_OLD_AGE = 5, @@ -66,6 +78,32 @@ static bool MaskedEqual(u32 addr1, u32 addr2) { return (addr1 & 0x3FFFFFF) == (addr2 & 0x3FFFFFF); } +static u16 RGBA8888toRGB565(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); +} + +static u16 BGRA8888toRGB565(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); +} + +static u16 RGBA8888toRGBA4444(u32 px) { + return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); +} + +static u16 BGRA8888toRGBA4444(u32 px) { + return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); +} + +static u16 RGBA8888toRGBA1555(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); +} + +static u16 BGRA8888toRGBA1555(u32 px) { + return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); +} + +void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format, bool bgra = false); + void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) { @@ -116,7 +154,9 @@ FramebufferManager::FramebufferManager() : currentRenderVfb_(0), drawPixelsTex_(0), drawPixelsTexFormat_(-1), - convBuf(0) + convBuf(0), + pixelBufObj_(0), + currentPBO_(0) { draw2dprogram = glsl_create_source(basic_vs, tex_fs); @@ -124,6 +164,12 @@ FramebufferManager::FramebufferManager() : glUniform1i(draw2dprogram->sampler0, 0); glsl_unbind(); + blitprogram = glsl_create_source(basic_vs, blit_fs); + + glsl_bind(blitprogram); + glUniform1i(draw2dprogram->sampler0, 0); + glsl_unbind(); + // And an initial clear. We don't clear per frame as the games are supposed to handle that // by themselves. glstate.depthWrite.set(GL_TRUE); @@ -138,6 +184,9 @@ FramebufferManager::~FramebufferManager() { if (drawPixelsTex_) glDeleteTextures(1, &drawPixelsTex_); glsl_destroy(draw2dprogram); + glsl_destroy(blitprogram); + + delete [] pixelBufObj_; delete [] convBuf; } @@ -243,27 +292,33 @@ void FramebufferManager::DrawPixels(const u8 *framebuf, int pixelFormat, int lin DrawActiveTexture(x, y, w, h, false, 480.0f / 512.0f); } -void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale) { +void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale, GLSLProgram *program) { float u2 = uscale; float v1 = flip ? 1.0f : 0.0f; float v2 = flip ? 0.0f : 1.0f; const float pos[12] = {x,y,0, x+w,y,0, x+w,y+h,0, x,y+h,0}; const float texCoords[8] = {0, v1, u2, v1, u2, v2, 0, v2}; + const GLubyte indices[4] = {0,1,3,2}; - glsl_bind(draw2dprogram); + if(!program) { + program = draw2dprogram; + } + + glsl_bind(program); Matrix4x4 ortho; ortho.setOrtho(0, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, 0, -1, 1); - glUniformMatrix4fv(draw2dprogram->u_viewproj, 1, GL_FALSE, ortho.getReadPtr()); + glUniformMatrix4fv(program->u_viewproj, 1, GL_FALSE, ortho.getReadPtr()); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - glEnableVertexAttribArray(draw2dprogram->a_position); - glEnableVertexAttribArray(draw2dprogram->a_texcoord0); - glVertexAttribPointer(draw2dprogram->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); - glVertexAttribPointer(draw2dprogram->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); // TODO: TRIANGLE_STRIP is more likely to be optimized. - glDisableVertexAttribArray(draw2dprogram->a_position); - glDisableVertexAttribArray(draw2dprogram->a_texcoord0); + glEnableVertexAttribArray(program->a_position); + glEnableVertexAttribArray(program->a_texcoord0); + glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); + glVertexAttribPointer(program->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); + //glDrawArrays(GL_TRIANGLE_FAN, 0, 4); // TODO: TRIANGLE_STRIP is more likely to be optimized. + glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); // Trying glDrawElements with GL_TRIANGLE_STRIP + glDisableVertexAttribArray(program->a_position); + glDisableVertexAttribArray(program->a_texcoord0); glsl_unbind(); } @@ -587,6 +642,10 @@ void FramebufferManager::CopyDisplayToOutput() { } displayFramebuf_ = vfb; + if(g_Config.bFramebuffersToMem) { + ReadFramebufferToMemory(vfb); + } + if (vfb->fbo) { glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); DEBUG_LOG(HLE, "Displaying FBO %08x", vfb->fb_address); @@ -605,10 +664,6 @@ void FramebufferManager::CopyDisplayToOutput() { glBindTexture(GL_TEXTURE_2D, 0); } - if(g_Config.bFramebuffersToMem) { - ReadFramebufferToMemory(vfb); - } - if (resized_) { glstate.depthWrite.set(GL_TRUE); glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -628,182 +683,254 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } if(vfb) { - float renderWidthFactor = (float)PSP_CoreParameter().renderWidth / 480.0f; - float renderHeightFactor = (float)PSP_CoreParameter().renderHeight / 272.0f; + // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. + // For now we'll keep these on the same struct as the ones that can get displayed + // (and blatantly copy work already done above while at it). + VirtualFramebuffer *nvfb = 0; - // If render resolution different we blit a new framebuffer and copy that one to memory - // (assuming rendering a smaller resolution than the PSP isn't done on any device, but not sure on that) - // A more accurate (and probably costly) solution would be to draw a framebuffer at native resolution - // parallel to the rendering one? - if(renderWidthFactor > 1.0f || renderHeightFactor > 1.0f) { - // For now we'll also keep these framebuffer objects on the same struct as the ones that can get displayed - // (and blatantly copy work already done above while at it) - VirtualFramebuffer *nvfb = 0; - - // We maintain a separate vector of framebuffer objects for blitting (guessing the point is saving on FBOs?) - for (size_t i = 0; i < bvfbs_.size(); ++i) { - VirtualFramebuffer *v = bvfbs_[i]; - if (MaskedEqual(v->fb_address, vfb->fb_address) && v->format == vfb->format) { - if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { - nvfb = v; - v->fb_stride = vfb->fb_stride; - v->width = vfb->width; - v->height = vfb->height; - break; - } + // We maintain a separate vector of framebuffer objects for blitting. + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *v = bvfbs_[i]; + if (MaskedEqual(v->fb_address, vfb->fb_address) && v->format == vfb->format) { + if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { + nvfb = v; + v->fb_stride = vfb->fb_stride; + v->width = vfb->width; + v->height = vfb->height; + break; } } - - // Create a new fbo if none was found for the size - if(!nvfb) { - nvfb = new VirtualFramebuffer(); - nvfb->fbo = 0; - nvfb->fb_address = vfb->fb_address; - nvfb->fb_stride = vfb->fb_stride; - nvfb->z_address = vfb->z_address; - nvfb->z_stride = vfb->z_stride; - nvfb->width = vfb->width; - nvfb->height = vfb->height; - nvfb->renderWidth = vfb->width; - nvfb->renderHeight = vfb->height; - nvfb->bufferWidth = vfb->bufferWidth; - nvfb->bufferHeight = vfb->bufferHeight; - nvfb->format = vfb->format; - nvfb->usageFlags = FB_USAGE_RENDERTARGET; - nvfb->dirtyAfterDisplay = true; - - if (g_Config.bTrueColor) { - nvfb->colorDepth = FBO_8888; - } else { - switch (vfb->format) { - case GE_FORMAT_4444: - nvfb->colorDepth = FBO_4444; - break; - case GE_FORMAT_5551: - nvfb->colorDepth = FBO_5551; - break; - case GE_FORMAT_565: - nvfb->colorDepth = FBO_565; - break; - case GE_FORMAT_8888: - nvfb->colorDepth = FBO_8888; - break; - default: - nvfb->colorDepth = FBO_8888; - break; - } - } - - //#ifdef ANDROID - // nvfb->colorDepth = FBO_8888; - //#endif - - nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth); - if (useBufferedRendering_) { - if (nvfb->fbo) { - fbo_bind_as_render_target(nvfb->fbo); - } else { - ERROR_LOG(HLE, "Error creating FBO! %i x %i", vfb->renderWidth, vfb->renderHeight); - } - } - - nvfb->last_frame_used = gpuStats.numFrames; - bvfbs_.push_back(nvfb); - } else { - // We already have one, so we set it as a render target. - //DEBUG_LOG(HLE, "Switching render target to FBO for %08x: %i x %i x %i ", nvfb->fb_address, nvfb->width, nvfb->height, nvfb->format); - nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - nvfb->last_frame_used = gpuStats.numFrames; - nvfb->dirtyAfterDisplay = true; - - if (useBufferedRendering_) { - if (nvfb->fbo) { - fbo_bind_as_render_target(nvfb->fbo); - } else { - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - } - } -#ifdef USING_GLES2 - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (nvfb->last_frame_used != gpuStats.numFrames) { - glstate.depthWrite.set(GL_TRUE); - glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glClearColor(0,0,0,1); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - } -#endif - } - - // We bind the resized fbo for reading - if (useBufferedRendering_) { - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - } - } - - // And we check both framebuffers for completeness - if(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE - || glCheckFramebufferStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - DEBUG_LOG(HLE, "Incomplete FBOs pre-blitting"); - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - return; - } - - // TODO: glReadBuffer and glDrawBuffer should maybe be specifically set here? - - // Then we blit the color buffer using linear filtering - DEBUG_LOG(HLE, "Blitting FBOs for %08x: %i x %i to %i x %i ", nvfb->fb_address, vfb->renderWidth, vfb->renderHeight, nvfb->renderWidth, nvfb->renderHeight); - glBlitFramebuffer(0, 0, vfb->fb_stride * renderWidthFactor, vfb->renderHeight, 0, 0, nvfb->fb_stride, nvfb->height, GL_COLOR_BUFFER_BIT, GL_LINEAR); - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - vfb = nvfb; } - int pixelType, pixelSize, pixelFormat, align; + // Create a new fbo if none was found for the size + if(!nvfb) { + nvfb = new VirtualFramebuffer(); + nvfb->fbo = 0; + nvfb->fb_address = vfb->fb_address; + nvfb->fb_stride = vfb->fb_stride; + nvfb->z_address = vfb->z_address; + nvfb->z_stride = vfb->z_stride; + nvfb->width = vfb->width; + nvfb->height = vfb->height; + nvfb->renderWidth = vfb->width; + nvfb->renderHeight = vfb->height; + nvfb->bufferWidth = vfb->bufferWidth; + nvfb->bufferHeight = vfb->bufferHeight; + nvfb->format = vfb->format; + nvfb->usageFlags = FB_USAGE_RENDERTARGET; + nvfb->dirtyAfterDisplay = true; + + if(g_Config.bTrueColor) { + nvfb->colorDepth = FBO_8888; + } else { + switch (vfb->format) { + case GE_FORMAT_4444: + nvfb->colorDepth = FBO_4444; + break; + case GE_FORMAT_5551: + nvfb->colorDepth = FBO_5551; + break; + case GE_FORMAT_565: + nvfb->colorDepth = FBO_565; + break; + case GE_FORMAT_8888: + default: + nvfb->colorDepth = FBO_8888; + break; + } + } + + //#ifdef ANDROID + // nvfb->colorDepth = FBO_8888; + //#endif + + nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth); + if (!(nvfb->fbo)) { + ERROR_LOG(HLE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); + } + + if (useBufferedRendering_) { + if (nvfb->fbo) { + fbo_bind_as_render_target(nvfb->fbo); + } else { + fbo_unbind(); + return; + } + } + + nvfb->last_frame_used = gpuStats.numFrames; + bvfbs_.push_back(nvfb); + + glstate.depthWrite.set(GL_TRUE); + glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearColor(0.0f,0.0f,0.0f,1.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + //glEnable(GL_DITHER); + } else { + nvfb->usageFlags |= FB_USAGE_RENDERTARGET; + nvfb->last_frame_used = gpuStats.numFrames; + nvfb->dirtyAfterDisplay = true; + + if (useBufferedRendering_) { + if (nvfb->fbo) { + fbo_bind_as_render_target(nvfb->fbo); +#ifdef USING_GLES2 + // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering + // to it. This broke stuff before, so now it only clears on the first use of an + // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs + // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. + if (nvfb->last_frame_used != gpuStats.numFrames) { + glstate.depthWrite.set(GL_TRUE); + glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearColor(0,0,0,1); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + } +#endif + } else { + fbo_unbind(); + return; + } + } + } + + BlitFramebuffer_(vfb, nvfb, true); + + PackFramebuffer_(nvfb); + } +} + +void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip, float upscale) { + // This only works with buffered rendering + if (!useBufferedRendering_) { + return; + } + + fbo_bind_as_render_target(dst->fbo); + + if(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + ERROR_LOG(HLE, "Incomplete target framebuffer, aborting blit"); + fbo_unbind(); + return; + } + + if(src->format == GE_FORMAT_565) { + // Not sure this should be done + glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } else { + glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } + + glstate.viewport.set(0, 0, dst->width, dst->height); + glstate.depthTest.disable(); + glstate.blend.disable(); + glstate.cullFace.disable(); + glstate.depthTest.disable(); + glstate.scissorTest.disable(); + glstate.stencilTest.disable(); + + fbo_bind_color_as_texture(src->fbo, 0); + + float x, y, w, h; + CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); + +#ifdef USING_GLES2 + DrawActiveTexture(x, y, w, h, !flip, upscale, draw2dprogram); +#else + if(!g_Config.bAsyncReadback || (g_Config.bCPUConvert && src->format != GE_FORMAT_8888)) { + DrawActiveTexture(x, y, w, h, !flip, upscale, draw2dprogram); + } else { + DrawActiveTexture(x, y, w, h, !flip, upscale, blitprogram); + } +#endif + + glBindTexture(GL_TEXTURE_2D, 0); + fbo_unbind(); +} + +void FramebufferManager::PackFramebuffer_(VirtualFramebuffer *vfb) { +#ifdef USING_GLES2 + PackFramebufferGLES_(vfb); // synchronous glReadPixels +#else + if(g_Config.bAsyncReadback) { + PackFramebufferGL_(vfb); // asynchronous glReadPixels using PBOs + } else { + PackFramebufferGLES_(vfb); // synchronous glReadPixels + } +#endif +} + +void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format, bool bgra) { + if(format == GE_FORMAT_8888) { + return; + } else { + for (int j = 0; j < height; j++) { + const u32 *src32 = (u32 *)(src + stride * j*4); + u16 *dst16 = (u16 *)(dst + stride * j*2); + switch (format) { + case GE_FORMAT_565: // BGR 565 + for(int i = 0; i < stride; i++) { + u32 px = *(src32 + i); + *(dst16+i) = (bgra ? BGRA8888toRGB565(px) : RGBA8888toRGB565(px)); + } + break; + case GE_FORMAT_5551: // ABGR 1555 + for(int i = 0; i < stride; i++) { + u32 px = *(src32 + i); + *(dst16+i) = (bgra ? BGRA8888toRGBA1555(px) : RGBA8888toRGBA1555(px)); + } + + break; + case GE_FORMAT_4444: // ABGR 4444 + for(int i = 0; i < stride; i++) { + u32 px = *(src32 + i); + *(dst16+i) = (bgra ? BGRA8888toRGBA4444(px) : RGBA8888toRGBA4444(px)); + } + break; + default: + break; + } + } + } +} + +void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { + GLubyte *packed = 0; + + // Order packing/readback of the framebuffer + if(vfb) { + int pixelType, pixelFormat, pixelSize, align; + switch (vfb->format) { case GE_FORMAT_4444: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; - pixelFormat = GL_RGBA; + pixelFormat = GL_BGRA; pixelSize = 2; align = 8; break; case GE_FORMAT_5551: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_1_5_5_5_REV; - pixelFormat = GL_RGBA; + pixelFormat = GL_BGRA; pixelSize = 2; align = 8; break; case GE_FORMAT_565: // 16 bit BGR - pixelType = GL_UNSIGNED_SHORT_5_6_5_REV; - pixelFormat = GL_RGB; + pixelType = GL_UNSIGNED_SHORT_5_6_5; + pixelFormat = GL_BGR; pixelSize = 2; align = 8; break; case GE_FORMAT_8888: // 32 bit ABGR - default: // And same as above - pixelType = GL_UNSIGNED_INT_8_8_8_8_REV; - pixelFormat = GL_RGBA; + default: + pixelType = GL_UNSIGNED_BYTE; + pixelFormat = GL_BGRA; pixelSize = 4; align = 4; break; } + size_t bufSize = vfb->fb_stride * vfb->height * pixelSize; + u32 fb_address = (0x44000000) | vfb->fb_address; + if (useBufferedRendering_) { if (vfb->fbo) { fbo_bind_for_read(vfb->fbo); @@ -816,37 +943,170 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { } } - // Prepare buffers to read the pixel data into - // Ideally we'd apply the image flip inplace and only need one, - // but right now this is simpler - // (maybe it's more efficient to have the GPU flip it in the framebuffer and then flip it back?) - int bufHeight = vfb->height; - size_t bufSize = vfb->fb_stride * bufHeight; - GLubyte *buf1 = (GLubyte *) malloc(bufSize * pixelSize); - GLubyte *buf2 = (GLubyte *) malloc(bufSize * pixelSize); + if(!pixelBufObj_) { + GLuint pbos[2]; - u32 fb_address = (0x44000000) | vfb->fb_address; + glGenBuffers(2, pbos); - DEBUG_LOG(HLE, "Reading pixels to mem, bufSize = %u, buf = %08x, fb_address = %08x", bufSize, buf1, fb_address); - glPixelStorei(GL_PACK_ALIGNMENT, align); - glReadPixels(0, 0, vfb->fb_stride, vfb->height, pixelFormat, pixelType, buf1); - - // We have to flip glReadPixels data upside down - int i, j, t; - for(i = 0; i < bufHeight; i++) { - for(j = 0; j < vfb->fb_stride * pixelSize; j++) { - buf2[(bufHeight - 1 - i) * vfb->fb_stride * pixelSize + j] = buf1[i * vfb->fb_stride * pixelSize + j]; + pixelBufObj_ = new AsyncPBO[2]; + + pixelBufObj_[0].handle = pbos[0]; + pixelBufObj_[0].maxSize = 0; + pixelBufObj_[0].reading = false; + + pixelBufObj_[1].handle = pbos[1]; + pixelBufObj_[1].maxSize = 0; + pixelBufObj_[1].reading = false; + + currentPBO_ = 0; + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pixelBufObj_[currentPBO_].handle); + + if(pixelBufObj_[currentPBO_].maxSize < bufSize) { + if(g_Config.bCPUConvert && pixelType != GL_UNSIGNED_BYTE) { + glBufferData(GL_PIXEL_PACK_BUFFER, bufSize*2, NULL, GL_DYNAMIC_READ); + } else { + glBufferData(GL_PIXEL_PACK_BUFFER, bufSize, NULL, GL_DYNAMIC_READ); } + pixelBufObj_[currentPBO_].maxSize = bufSize; } - Memory::Memcpy(fb_address, buf2, bufSize * pixelSize); - free(buf1); - free(buf2); - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + pixelBufObj_[currentPBO_].fb_address = fb_address; + pixelBufObj_[currentPBO_].size = bufSize; + pixelBufObj_[currentPBO_].stride = vfb->fb_stride; + pixelBufObj_[currentPBO_].height = vfb->height; + pixelBufObj_[currentPBO_].format = vfb->format; + pixelBufObj_[currentPBO_].reading = true; + + if(g_Config.bCPUConvert) { + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glReadPixels(0, 0, vfb->fb_stride, vfb->height, GL_BGRA, GL_UNSIGNED_BYTE, 0); + } else { + glPixelStorei(GL_PACK_ALIGNMENT, align); + glReadPixels(0, 0, vfb->fb_stride, vfb->height, pixelFormat, pixelType, 0); } } + + // Receive data from previous framebuffer + u8 nextPBO = (currentPBO_ + 1) % 2; + if(pixelBufObj_[nextPBO].reading) { + glBindBuffer(GL_PIXEL_PACK_BUFFER, pixelBufObj_[nextPBO].handle); + packed = (GLubyte *)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + + if(packed) { + DEBUG_LOG(HLE, "Reading pbo to mem, bufSize = %u, packed = %08x, fb_address = %08x, pbo = %u", + pixelBufObj_[nextPBO].size, packed, pixelBufObj_[nextPBO].fb_address, nextPBO); + + if(g_Config.bCPUConvert) { + switch(pixelBufObj_[nextPBO].format) { + case GE_FORMAT_565: + case GE_FORMAT_5551: + case GE_FORMAT_4444: { + u8 *processed = (u8 *)malloc(pixelBufObj_[nextPBO].size); + + if(processed) { + ConvertFromRGBA8888(processed, packed, + pixelBufObj_[nextPBO].stride, pixelBufObj_[nextPBO].height, + pixelBufObj_[nextPBO].format, true); + + Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, processed, pixelBufObj_[nextPBO].size); + + free(processed); + } + } + break; + default: + Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, packed, pixelBufObj_[nextPBO].size); + } + } else { // we don't need to convert + Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, packed, pixelBufObj_[nextPBO].size); + } + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + + pixelBufObj_[nextPBO].reading = false; + } + } + + currentPBO_ = nextPBO; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + fbo_unbind(); + if(gl_extensions.FBO_ARB) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } +} + +void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { + // Pixel size is always 4 here because data will come in RGBA8888 + size_t bufSize = vfb->fb_stride * vfb->height * 4; // pixel size always 4 here + + u8 align = (vfb->format != GE_FORMAT_8888) ? 8 : 4; + + u32 fb_address = (0x44000000) | vfb->fb_address; + + if (useBufferedRendering_) { + if (vfb->fbo) { + fbo_bind_for_read(vfb->fbo); + } else { + fbo_unbind(); + if(gl_extensions.FBO_ARB) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } + return; + } + } + + GLubyte *packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte)); + + if(packed) { + DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %08x, fb_address = %08x", + bufSize, packed, fb_address); + + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glReadPixels(0, 0, vfb->fb_stride, vfb->height, GL_RGBA, GL_UNSIGNED_BYTE, packed); + GLenum error = glGetError(); + switch(error) { + case 0: + break; + case GL_INVALID_ENUM: + ERROR_LOG(HLE, "glReadPixels: GL_INVALID_ENUM"); + break; + case GL_INVALID_VALUE: + ERROR_LOG(HLE, "glReadPixels: GL_INVALID_VALUE"); + break; + case GL_INVALID_OPERATION: + ERROR_LOG(HLE, "glReadPixels: GL_INVALID_OPERATION"); + break; + case GL_INVALID_FRAMEBUFFER_OPERATION: + ERROR_LOG(HLE, "glReadPixels: GL_INVALID_FRAMEBUFFER_OPERATION"); + break; + default: + ERROR_LOG(HLE, "glReadPixels: UNKNOWN OPENGL ERROR %u", error); + break; + } + + if(vfb->format != GE_FORMAT_8888) { // if not RGBA 8888 we need to convert + u8 *processed = (u8 *)malloc(vfb->fb_stride * vfb->height * 2); + + if(processed) { + ConvertFromRGBA8888(processed, packed, vfb->fb_stride, vfb->height, vfb->format); + + Memory::Memcpy(fb_address, processed, vfb->fb_stride * vfb->height * 2); + + free(processed); + } + } else { + Memory::Memcpy(fb_address, packed, bufSize); + } + + free(packed); + } + + fbo_unbind(); + if(gl_extensions.FBO_ARB) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } } void FramebufferManager::EndFrame() { @@ -929,6 +1189,20 @@ void FramebufferManager::DecimateFBOs() { vfbs_.erase(vfbs_.begin() + i--); } } + + // Do the same for ReadFramebuffersToMemory's VFBs + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebuffer *vfb = bvfbs_[i]; + if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { + continue; + } + int age = frameLastFramebufUsed - vfb->last_frame_used; + if (age > FBO_OLD_AGE) { + INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age) + DestroyFramebuf(vfb); + bvfbs_.erase(bvfbs_.begin() + i--); + } + } } void FramebufferManager::DestroyAllFBOs() { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 40dbba2f76..58dedc38f9 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -77,6 +77,19 @@ struct VirtualFramebuffer { void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH); +// Simple struct for asynchronous PBO readbacks +struct AsyncPBO { + GLuint handle; + size_t maxSize; + + u32 fb_address; + u32 stride; + u32 height; + size_t size; + int format; + bool reading; +}; + class ShaderManager; class FramebufferManager { @@ -92,7 +105,7 @@ public: } void DrawPixels(const u8 *framebuf, int pixelFormat, int linesize); - void DrawActiveTexture(float x, float y, float w, float h, bool flip = false, float uscale = 1.0f); + void DrawActiveTexture(float x, float y, float w, float h, bool flip = false, float uscale = 1.0f, GLSLProgram *program = 0); void DestroyAllFBOs(); void DecimateFBOs(); @@ -144,7 +157,14 @@ private: VirtualFramebuffer *currentRenderVfb_; // Used by ReadFramebufferToMemory + void BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip = false, float upscale = 1.0f); + void PackFramebuffer_(VirtualFramebuffer *vfb); + void PackFramebufferGL_(VirtualFramebuffer *vfb); + void PackFramebufferGLES_(VirtualFramebuffer *vfb); std::vector bvfbs_; // blitting FBOs + GLSLProgram *blitprogram; + AsyncPBO *pixelBufObj_; //this isn't that large + u8 currentPBO_; // Used by DrawPixels unsigned int drawPixelsTex_; From 9696948a60425c2935e0b6eea51332f5434aa75c Mon Sep 17 00:00:00 2001 From: arnastia Date: Mon, 1 Jul 2013 13:22:19 +0100 Subject: [PATCH 2/8] Pixels should now be read correctly on ATI cards (tested on ATI Mobility Radeon HD5165); Visual artifacts on blank areas after rendering second quad remain. --- GPU/GLES/Framebuffer.cpp | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 093c5c4545..fbbff6c84c 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -39,7 +39,7 @@ static const char tex_fs[] = "uniform sampler2D sampler0;\n" "varying vec2 v_texcoord0;\n" "void main() {\n" - " gl_FragColor = texture2D(sampler0, v_texcoord0);\n" + " gl_FragColor.rgb = texture2D(sampler0, v_texcoord0).rgb;\n" " gl_FragColor.a = 1.0;\n" "}\n"; @@ -62,11 +62,10 @@ static const char blit_fs[] = "precision mediump float;\n" "#endif\n" "uniform sampler2D sampler0;\n" - "varying vec4 v_color;\n" "varying vec2 v_texcoord0;\n" "void main() {\n" - " v_color = texture2D(sampler0, v_texcoord0).bgra;\n" - " gl_FragColor = v_color;\n" + " gl_FragColor.rgb = texture2D(sampler0, v_texcoord0).bgr;\n" + " gl_FragColor.a = 1.0;\n" "}\n"; // Aggressively delete unused FBO:s to save gpu memory. @@ -78,27 +77,27 @@ static bool MaskedEqual(u32 addr1, u32 addr2) { return (addr1 & 0x3FFFFFF) == (addr2 & 0x3FFFFFF); } -static u16 RGBA8888toRGB565(u32 px) { +inline u16 RGBA8888toRGB565(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); } -static u16 BGRA8888toRGB565(u32 px) { +inline u16 BGRA8888toRGB565(u32 px) { return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); } -static u16 RGBA8888toRGBA4444(u32 px) { +inline u16 RGBA8888toRGBA4444(u32 px) { return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); } -static u16 BGRA8888toRGBA4444(u32 px) { +inline u16 BGRA8888toRGBA4444(u32 px) { return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); } -static u16 RGBA8888toRGBA1555(u32 px) { +inline u16 RGBA8888toRGBA1555(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } -static u16 BGRA8888toRGBA1555(u32 px) { +inline u16 BGRA8888toRGBA1555(u32 px) { return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); } @@ -642,10 +641,6 @@ void FramebufferManager::CopyDisplayToOutput() { } displayFramebuf_ = vfb; - if(g_Config.bFramebuffersToMem) { - ReadFramebufferToMemory(vfb); - } - if (vfb->fbo) { glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); DEBUG_LOG(HLE, "Displaying FBO %08x", vfb->fb_address); @@ -662,6 +657,10 @@ void FramebufferManager::CopyDisplayToOutput() { CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); DrawActiveTexture(x, y, w, h, true); glBindTexture(GL_TEXTURE_2D, 0); + + if(g_Config.bFramebuffersToMem) { + ReadFramebufferToMemory(vfb); + } } if (resized_) { @@ -678,10 +677,10 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { return; } - fbo_unbind(); - if(gl_extensions.FBO_ARB) { // TODO: fbo_unbind should use GL_FRAMEBUFFER to do this? Don't want to change native - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } + //fbo_unbind(); + //if(gl_extensions.FBO_ARB) { // TODO: fbo_unbind should use GL_FRAMEBUFFER to do this? Don't want to change native + // glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + //} if(vfb) { // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. // For now we'll keep these on the same struct as the ones that can get displayed @@ -836,7 +835,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb #ifdef USING_GLES2 DrawActiveTexture(x, y, w, h, !flip, upscale, draw2dprogram); #else - if(!g_Config.bAsyncReadback || (g_Config.bCPUConvert && src->format != GE_FORMAT_8888)) { + if(dst->format != GE_FORMAT_8888) { DrawActiveTexture(x, y, w, h, !flip, upscale, draw2dprogram); } else { DrawActiveTexture(x, y, w, h, !flip, upscale, blitprogram); @@ -851,11 +850,7 @@ void FramebufferManager::PackFramebuffer_(VirtualFramebuffer *vfb) { #ifdef USING_GLES2 PackFramebufferGLES_(vfb); // synchronous glReadPixels #else - if(g_Config.bAsyncReadback) { - PackFramebufferGL_(vfb); // asynchronous glReadPixels using PBOs - } else { - PackFramebufferGLES_(vfb); // synchronous glReadPixels - } + PackFramebufferGL_(vfb); // asynchronous glReadPixels using PBOs #endif } @@ -903,26 +898,26 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { switch (vfb->format) { case GE_FORMAT_4444: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; // ATIs don't seem to care about pixel format, just type pixelSize = 2; align = 8; break; case GE_FORMAT_5551: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_1_5_5_5_REV; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; pixelSize = 2; align = 8; break; case GE_FORMAT_565: // 16 bit BGR - pixelType = GL_UNSIGNED_SHORT_5_6_5; - pixelFormat = GL_BGR; + pixelType = GL_UNSIGNED_SHORT_5_6_5_REV; + pixelFormat = GL_RGB; pixelSize = 2; align = 8; break; case GE_FORMAT_8888: // 32 bit ABGR default: pixelType = GL_UNSIGNED_BYTE; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; pixelSize = 4; align = 4; break; From 50991408137b777a36f787d96e5d3c5774239440 Mon Sep 17 00:00:00 2001 From: arnastia Date: Mon, 1 Jul 2013 18:35:38 +0100 Subject: [PATCH 3/8] Fixed rendering glitch caused by calling ReadFramebuffersToMem for unfinished framebuffers (non-displayed framebuffers are now read at the beginning of next frame); Fix reversed pixel format for 32-bit textures on Nvidia cards. --- GPU/GLES/Framebuffer.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index fbbff6c84c..64d035c8e1 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -460,13 +460,6 @@ void FramebufferManager::SetRenderFrameBuffer() { float renderWidthFactor = (float)PSP_CoreParameter().renderWidth / 480.0f; float renderHeightFactor = (float)PSP_CoreParameter().renderHeight / 272.0f; - // Save current render framebuffer to memory - if(currentRenderVfb_) { - if(g_Config.bFramebuffersToMem) { - ReadFramebufferToMemory(currentRenderVfb_); - } - } - // None found? Create one. if (!vfb) { gstate_c.textureChanged = true; @@ -917,7 +910,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { case GE_FORMAT_8888: // 32 bit ABGR default: pixelType = GL_UNSIGNED_BYTE; - pixelFormat = GL_RGBA; + pixelFormat = GL_BGRA; pixelSize = 4; align = 4; break; @@ -1036,8 +1029,6 @@ void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { // Pixel size is always 4 here because data will come in RGBA8888 size_t bufSize = vfb->fb_stride * vfb->height * 4; // pixel size always 4 here - u8 align = (vfb->format != GE_FORMAT_8888) ? 8 : 4; - u32 fb_address = (0x44000000) | vfb->fb_address; if (useBufferedRendering_) { @@ -1177,8 +1168,13 @@ void FramebufferManager::DecimateFBOs() { if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { continue; } + int age = frameLastFramebufUsed - vfb->last_frame_used; - if (age > FBO_OLD_AGE) { + if(age == 0) { + if(g_Config.bFramebuffersToMem) { + ReadFramebufferToMemory(vfb); + } + } else if (age > FBO_OLD_AGE) { INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age) DestroyFramebuf(vfb); vfbs_.erase(vfbs_.begin() + i--); From 2e5e5e7a629dc8685eec7a337d02ac6d9ffafce5 Mon Sep 17 00:00:00 2001 From: arnastia Date: Mon, 1 Jul 2013 21:51:24 +0100 Subject: [PATCH 4/8] Pixel format switch (should work for both ATI/AMD and Nvidia); Code cleanup; Update submodules. --- GPU/GLES/Framebuffer.cpp | 78 ++++++++++++++++++---------------------- GPU/GLES/Framebuffer.h | 4 +-- lang | 2 +- native | 2 +- pspautotests | 2 +- 5 files changed, 40 insertions(+), 48 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 86415de37e..ba6063b041 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -64,7 +64,6 @@ static const char basic_vs[] = "attribute vec4 a_position;\n" "attribute vec2 a_texcoord0;\n" "uniform mat4 u_viewproj;\n" - "varying vec4 v_color;\n" "varying vec2 v_texcoord0;\n" "void main() {\n" " v_texcoord0 = a_texcoord0;\n" @@ -95,27 +94,29 @@ inline u16 RGBA8888toRGB565(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); } -inline u16 BGRA8888toRGB565(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); -} - inline u16 RGBA8888toRGBA4444(u32 px) { return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); } -inline u16 BGRA8888toRGBA4444(u32 px) { - return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); -} - inline u16 RGBA8888toRGBA1555(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } -inline u16 BGRA8888toRGBA1555(u32 px) { - return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); -} +// Unused right now, testing performance of handling RGBA vs BGRA +// +//inline u16 BGRA8888toRGB565(u32 px) { +// return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); +//} +// +//inline u16 BGRA8888toRGBA4444(u32 px) { +// return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); +//} +// +//inline u16 BGRA8888toRGBA1555(u32 px) { +// return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); +//} -void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format, bool bgra = false); +void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format); void CenterRect(float *x, float *y, float *w, float *h, float origW, float origH, float frameW, float frameH) @@ -311,8 +312,9 @@ void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, b float v1 = flip ? 1.0f : 1.0f - vscale; float v2 = flip ? 1.0f - vscale : 1.0f; - const float pos[12] = {x,y,0, x+w,y,0, x,y+h,0, x+w,y+h,0}; - const float texCoords[8] = {0,v1, u2,v1, 0,v2, u2,v2}; + const float pos[12] = {x,y,0, x+w,y,0, x+w,y+h,0, x,y+h,0}; + const float texCoords[8] = {0,v1, u2,v1, u2,v2, 0,v2}; + const GLubyte indices[4] = {0,1,3,2}; if(!program) { program = draw2dprogram; @@ -327,8 +329,9 @@ void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, b glEnableVertexAttribArray(program->a_position); glEnableVertexAttribArray(program->a_texcoord0); glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); - glVertexAttribPointer(program->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glVertexAttribPointer(program->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, texCoords); + //glDrawArrays(GL_TRIANGLE_FAN, 0, 4); // glDrawElements tested slightly faster on OpenGL atleast + glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices); glDisableVertexAttribArray(program->a_position); glDisableVertexAttribArray(program->a_texcoord0); glsl_unbind(); @@ -798,12 +801,12 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { } } - BlitFramebuffer_(vfb, nvfb, true); + BlitFramebuffer_(vfb, nvfb, false); #ifdef USING_GLES2 - PackFramebufferGLES_(vfb); // synchronous glReadPixels + PackFramebufferGLES_(nvfb); // synchronous glReadPixels #else - PackFramebufferGL_(vfb); // asynchronous glReadPixels using PBOs + PackFramebufferGL_(nvfb); // asynchronous glReadPixels using PBOs #endif } } @@ -821,14 +824,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb fbo_unbind(); return; } - - if(src->format == GE_FORMAT_565) { - // Not sure this should be done - glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - } else { - glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - } - + glstate.viewport.set(0, 0, dst->width, dst->height); glstate.depthTest.disable(); glstate.blend.disable(); @@ -843,20 +839,16 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); #ifdef USING_GLES2 - DrawActiveTexture(x, y, w, h, !flip, upscale, vscale, draw2dprogram); + DrawActiveTexture(x, y, w, h, flip, upscale, vscale, draw2dprogram); #else - if(dst->format != GE_FORMAT_8888) { - DrawActiveTexture(x, y, w, h, !flip, upscale, vscale, draw2dprogram); - } else { - DrawActiveTexture(x, y, w, h, !flip, upscale, vscale, blitprogram); - } + DrawActiveTexture(x, y, w, h, flip, upscale, vscale, blitprogram); #endif glBindTexture(GL_TEXTURE_2D, 0); fbo_unbind(); } -void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format, bool bgra) { +void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { if(format == GE_FORMAT_8888) { return; } else { @@ -867,20 +859,20 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format, b case GE_FORMAT_565: // BGR 565 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = (bgra ? BGRA8888toRGB565(px) : RGBA8888toRGB565(px)); + *(dst16+i) = RGBA8888toRGB565(px); } break; case GE_FORMAT_5551: // ABGR 1555 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = (bgra ? BGRA8888toRGBA1555(px) : RGBA8888toRGBA1555(px)); + *(dst16+i) = RGBA8888toRGBA1555(px); } break; case GE_FORMAT_4444: // ABGR 4444 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = (bgra ? BGRA8888toRGBA4444(px) : RGBA8888toRGBA4444(px)); + *(dst16+i) = RGBA8888toRGBA4444(px); } break; default: @@ -900,19 +892,19 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { switch (vfb->format) { case GE_FORMAT_4444: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; - pixelFormat = GL_RGBA; // ATIs don't seem to care about pixel format, just type + pixelFormat = GL_BGRA; // ATIs don't seem to care about pixel format, just type pixelSize = 2; align = 8; break; case GE_FORMAT_5551: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_1_5_5_5_REV; - pixelFormat = GL_RGBA; + pixelFormat = GL_BGRA; pixelSize = 2; align = 8; break; case GE_FORMAT_565: // 16 bit BGR pixelType = GL_UNSIGNED_SHORT_5_6_5_REV; - pixelFormat = GL_RGB; + pixelFormat = GL_BGR; pixelSize = 2; align = 8; break; @@ -925,7 +917,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { break; } - size_t bufSize = vfb->fb_stride * vfb->height * pixelSize; + u32 bufSize = vfb->fb_stride * vfb->height * pixelSize; u32 fb_address = (0x44000000) | vfb->fb_address; if (useBufferedRendering_) { @@ -1005,7 +997,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { if(processed) { ConvertFromRGBA8888(processed, packed, pixelBufObj_[nextPBO].stride, pixelBufObj_[nextPBO].height, - pixelBufObj_[nextPBO].format, true); + pixelBufObj_[nextPBO].format); Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, processed, pixelBufObj_[nextPBO].size); diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 9f73477eac..c42c4cd930 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -80,12 +80,12 @@ void CenterRect(float *x, float *y, float *w, float *h, // Simple struct for asynchronous PBO readbacks struct AsyncPBO { GLuint handle; - size_t maxSize; + u32 maxSize; u32 fb_address; u32 stride; u32 height; - size_t size; + u32 size; int format; bool reading; }; diff --git a/lang b/lang index 81dc0b9141..c670a27517 160000 --- a/lang +++ b/lang @@ -1 +1 @@ -Subproject commit 81dc0b9141e7d1445efc7b152b88ab16ff75c737 +Subproject commit c670a27517d34cd4119d1139d4aec3546866f40d diff --git a/native b/native index 80c85b1c60..94f5e57cfc 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 80c85b1c604ccead734d595dd7f67656f3fd9d85 +Subproject commit 94f5e57cfc07607ee651f1e1c77bf42c79d3be0f diff --git a/pspautotests b/pspautotests index e07589caa3..8b0c30efe5 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit e07589caa30222a8878930d909c5b3970d6b0f74 +Subproject commit 8b0c30efe52e0c799a99a632e5e743a56375d3ec From 796800adfd7674997ad619e49d920d5983443de0 Mon Sep 17 00:00:00 2001 From: arnastia Date: Tue, 2 Jul 2013 14:08:59 +0100 Subject: [PATCH 5/8] CPUConvert is now FramebuffersCPUConvert; allocated memory addressed by fb_address is now used in packing code for both OpenGL and OpenGL ES; some code cleanup, most of it pixel format conversion code. --- Core/Config.cpp | 4 +- Core/Config.h | 2 +- GPU/GLES/Framebuffer.cpp | 168 +++++++++++++++------------------------ 3 files changed, 66 insertions(+), 108 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index a9df8ff5d5..3e091e8b33 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -117,7 +117,7 @@ void Config::Load(const char *iniFileName) graphics->Get("StretchToDisplay", &bStretchToDisplay, false); graphics->Get("TrueColor", &bTrueColor, true); graphics->Get("FramebuffersToMem", &bFramebuffersToMem, false); - graphics->Get("CPUConvert", &bCPUConvert, false); + graphics->Get("FramebuffersCPUConvert", &bFramebuffersCPUConvert, false); graphics->Get("MipMap", &bMipMap, true); graphics->Get("TexScalingLevel", &iTexScalingLevel, 1); graphics->Get("TexScalingType", &iTexScalingType, 0); @@ -230,7 +230,7 @@ void Config::Save() graphics->Set("StretchToDisplay", bStretchToDisplay); graphics->Set("TrueColor", bTrueColor); graphics->Set("FramebuffersToMem", bFramebuffersToMem); - graphics->Set("CPUConvert", bCPUConvert); + graphics->Set("FramebuffersCPUConvert", bFramebuffersCPUConvert); graphics->Set("MipMap", bMipMap); graphics->Set("TexScalingLevel", iTexScalingLevel); graphics->Set("TexScalingType", iTexScalingType); diff --git a/Core/Config.h b/Core/Config.h index f7e8ed504b..1fad0d085d 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -81,7 +81,7 @@ public: int iAnisotropyLevel; bool bTrueColor; bool bFramebuffersToMem; - bool bCPUConvert; // for OpenGL devices + bool bFramebuffersCPUConvert; // for OpenGL devices bool bMipMap; int iTexScalingLevel; // 1 = off, 2 = 2x, ..., 5 = 5x int iTexScalingType; // 0 = xBRZ, 1 = Hybrid diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index ba6063b041..5fe372b711 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -90,32 +90,18 @@ static bool MaskedEqual(u32 addr1, u32 addr2) { return (addr1 & 0x3FFFFFF) == (addr2 & 0x3FFFFFF); } -inline u16 RGBA8888toRGB565(u32 px) { +inline u16 ARGB8888toBGR565(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); } -inline u16 RGBA8888toRGBA4444(u32 px) { +inline u16 ARGB8888toABGR4444(u32 px) { return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); } -inline u16 RGBA8888toRGBA1555(u32 px) { +inline u16 ARGB8888toABGR1555(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } -// Unused right now, testing performance of handling RGBA vs BGRA -// -//inline u16 BGRA8888toRGB565(u32 px) { -// return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800); -//} -// -//inline u16 BGRA8888toRGBA4444(u32 px) { -// return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000); -//} -// -//inline u16 BGRA8888toRGBA1555(u32 px) { -// return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000); -//} - void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format); void CenterRect(float *x, float *y, float *w, float *h, @@ -686,10 +672,6 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { return; } - //fbo_unbind(); - //if(gl_extensions.FBO_ARB) { // TODO: fbo_unbind should use GL_FRAMEBUFFER to do this? Don't want to change native - // glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - //} if(vfb) { // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. // For now we'll keep these on the same struct as the ones that can get displayed @@ -849,9 +831,13 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb } void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { - if(format == GE_FORMAT_8888) { - return; - } else { + if(format == GE_FORMAT_8888) { // Here lets assume they don't intersect + if(src == dst) { + return; + } else { + memcpy(dst, src, stride * height * 4); + } + } else { // But here it shouldn't matter if they do for (int j = 0; j < height; j++) { const u32 *src32 = (u32 *)(src + stride * j*4); u16 *dst16 = (u16 *)(dst + stride * j*2); @@ -859,20 +845,20 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { case GE_FORMAT_565: // BGR 565 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGB565(px); + *(dst16+i) = ARGB8888toBGR565(px); } break; case GE_FORMAT_5551: // ABGR 1555 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGBA1555(px); + *(dst16+i) = ARGB8888toABGR1555(px); } break; case GE_FORMAT_4444: // ABGR 4444 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGBA4444(px); + *(dst16+i) = ARGB8888toABGR4444(px); } break; default: @@ -884,6 +870,23 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { GLubyte *packed = 0; + bool unbind = false; + + if(!pixelBufObj_) { + GLuint pbos[2]; + + glGenBuffers(2, pbos); + + pixelBufObj_ = new AsyncPBO[2]; + + pixelBufObj_[0].handle = pbos[0]; + pixelBufObj_[0].maxSize = 0; + pixelBufObj_[0].reading = false; + + pixelBufObj_[1].handle = pbos[1]; + pixelBufObj_[1].maxSize = 0; + pixelBufObj_[1].reading = false; + } // Order packing/readback of the framebuffer if(vfb) { @@ -892,7 +895,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { switch (vfb->format) { case GE_FORMAT_4444: // 16 bit ABGR pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; - pixelFormat = GL_BGRA; // ATIs don't seem to care about pixel format, just type + pixelFormat = GL_BGRA; pixelSize = 2; align = 8; break; @@ -932,28 +935,10 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { } } - if(!pixelBufObj_) { - GLuint pbos[2]; - - glGenBuffers(2, pbos); - - pixelBufObj_ = new AsyncPBO[2]; - - pixelBufObj_[0].handle = pbos[0]; - pixelBufObj_[0].maxSize = 0; - pixelBufObj_[0].reading = false; - - pixelBufObj_[1].handle = pbos[1]; - pixelBufObj_[1].maxSize = 0; - pixelBufObj_[1].reading = false; - - currentPBO_ = 0; - } - glBindBuffer(GL_PIXEL_PACK_BUFFER, pixelBufObj_[currentPBO_].handle); if(pixelBufObj_[currentPBO_].maxSize < bufSize) { - if(g_Config.bCPUConvert && pixelType != GL_UNSIGNED_BYTE) { + if(g_Config.bFramebuffersCPUConvert && pixelType != GL_UNSIGNED_BYTE) { glBufferData(GL_PIXEL_PACK_BUFFER, bufSize*2, NULL, GL_DYNAMIC_READ); } else { glBufferData(GL_PIXEL_PACK_BUFFER, bufSize, NULL, GL_DYNAMIC_READ); @@ -968,13 +953,15 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { pixelBufObj_[currentPBO_].format = vfb->format; pixelBufObj_[currentPBO_].reading = true; - if(g_Config.bCPUConvert) { + if(g_Config.bFramebuffersCPUConvert) { glPixelStorei(GL_PACK_ALIGNMENT, 4); glReadPixels(0, 0, vfb->fb_stride, vfb->height, GL_BGRA, GL_UNSIGNED_BYTE, 0); } else { glPixelStorei(GL_PACK_ALIGNMENT, align); glReadPixels(0, 0, vfb->fb_stride, vfb->height, pixelFormat, pixelType, 0); } + + unbind = true; } // Receive data from previous framebuffer @@ -987,27 +974,10 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { DEBUG_LOG(HLE, "Reading pbo to mem, bufSize = %u, packed = %08x, fb_address = %08x, pbo = %u", pixelBufObj_[nextPBO].size, packed, pixelBufObj_[nextPBO].fb_address, nextPBO); - if(g_Config.bCPUConvert) { - switch(pixelBufObj_[nextPBO].format) { - case GE_FORMAT_565: - case GE_FORMAT_5551: - case GE_FORMAT_4444: { - u8 *processed = (u8 *)malloc(pixelBufObj_[nextPBO].size); - - if(processed) { - ConvertFromRGBA8888(processed, packed, - pixelBufObj_[nextPBO].stride, pixelBufObj_[nextPBO].height, - pixelBufObj_[nextPBO].format); - - Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, processed, pixelBufObj_[nextPBO].size); - - free(processed); - } - } - break; - default: - Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, packed, pixelBufObj_[nextPBO].size); - } + if(g_Config.bFramebuffersCPUConvert) { + ConvertFromRGBA8888(Memory::GetPointer(pixelBufObj_[nextPBO].fb_address), packed, + pixelBufObj_[nextPBO].stride, pixelBufObj_[nextPBO].height, + pixelBufObj_[nextPBO].format); } else { // we don't need to convert Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, packed, pixelBufObj_[nextPBO].size); } @@ -1015,36 +985,38 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { pixelBufObj_[nextPBO].reading = false; } + + unbind = true; } currentPBO_ = nextPBO; - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + if(unbind) { + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + fbo_unbind(); + if(gl_extensions.FBO_ARB) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } } } void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { - // Pixel size is always 4 here because data will come in RGBA8888 - size_t bufSize = vfb->fb_stride * vfb->height * 4; // pixel size always 4 here - - u32 fb_address = (0x44000000) | vfb->fb_address; - - if (useBufferedRendering_) { - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - return; - } + if (useBufferedRendering_ && vfb->fbo) { + fbo_bind_for_read(vfb->fbo); + } else { + fbo_unbind(); + return; } - GLubyte *packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte)); + size_t bufSize = vfb->fb_stride * vfb->height * 4; // pixel size always 4 here + u32 fb_address = (0x44000000) | vfb->fb_address; + + GLubyte *packed = 0; + if(vfb->format == GE_FORMAT_8888) { + packed = (GLubyte *)Memory::GetPointer(fb_address); + } else { // end result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address + packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte)); + } if(packed) { DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %08x, fb_address = %08x", @@ -1074,26 +1046,12 @@ void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { } if(vfb->format != GE_FORMAT_8888) { // if not RGBA 8888 we need to convert - u8 *processed = (u8 *)malloc(vfb->fb_stride * vfb->height * 2); - - if(processed) { - ConvertFromRGBA8888(processed, packed, vfb->fb_stride, vfb->height, vfb->format); - - Memory::Memcpy(fb_address, processed, vfb->fb_stride * vfb->height * 2); - - free(processed); - } - } else { - Memory::Memcpy(fb_address, packed, bufSize); + ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); + free(packed); } - - free(packed); } fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } } void FramebufferManager::EndFrame() { From 5a1071f1577c050726914dce9cee1d6ed33e0b71 Mon Sep 17 00:00:00 2001 From: arnastia Date: Tue, 2 Jul 2013 14:10:20 +0100 Subject: [PATCH 6/8] Framebuffers are now read to system memory every third frame (done in Framebuffer::DecimateFBOs()). --- GPU/GLES/Framebuffer.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 5fe372b711..6e9b16751c 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -652,10 +652,6 @@ void FramebufferManager::CopyDisplayToOutput() { CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); DrawActiveTexture(x, y, w, h, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); glBindTexture(GL_TEXTURE_2D, 0); - - if(g_Config.bFramebuffersToMem) { - ReadFramebufferToMemory(vfb); - } } if (resized_) { @@ -1060,6 +1056,11 @@ void FramebufferManager::EndFrame() { glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); resized_ = false; } + +#ifndef USING_GLES2 + // We flush last packed framebuffer, if any + PackFramebufferGL_(NULL); +#endif } void FramebufferManager::DeviceLost() { @@ -1124,16 +1125,19 @@ void FramebufferManager::DecimateFBOs() { currentRenderVfb_ = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; + int age = frameLastFramebufUsed - vfb->last_frame_used; + + if(g_Config.bFramebuffersToMem) { + if((gpuStats.numFrames % 3 == 0) && age < 3) { + ReadFramebufferToMemory(vfb); + } + } + if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { continue; } - int age = frameLastFramebufUsed - vfb->last_frame_used; - if(age == 0) { - if(g_Config.bFramebuffersToMem) { - ReadFramebufferToMemory(vfb); - } - } else if (age > FBO_OLD_AGE) { + if (age > FBO_OLD_AGE) { INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age) DestroyFramebuf(vfb); vfbs_.erase(vfbs_.begin() + i--); From 7747fc9ca7d3e7b3b7193f98fdae5277145a6956 Mon Sep 17 00:00:00 2001 From: arnastia Date: Thu, 4 Jul 2013 15:24:37 +0100 Subject: [PATCH 7/8] Some changes around pixel format conversion, should work on all cards and Android/OpenGL ES devices now (only fully tested on Nvidia); commenting and code cleanup. --- GPU/GLES/Framebuffer.cpp | 99 +++++++++++++++++----------------------- GPU/GLES/Framebuffer.h | 1 - 2 files changed, 41 insertions(+), 59 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 6e9b16751c..cd125b32f5 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -70,17 +70,6 @@ static const char basic_vs[] = " gl_Position = u_viewproj * a_position;\n" "}\n"; -static const char blit_fs[] = - "#ifdef GL_ES\n" - "precision mediump float;\n" - "#endif\n" - "uniform sampler2D sampler0;\n" - "varying vec2 v_texcoord0;\n" - "void main() {\n" - " gl_FragColor.rgb = texture2D(sampler0, v_texcoord0).bgr;\n" - " gl_FragColor.a = 1.0;\n" - "}\n"; - // Aggressively delete unused FBO:s to save gpu memory. enum { FBO_OLD_AGE = 5, @@ -90,15 +79,15 @@ static bool MaskedEqual(u32 addr1, u32 addr2) { return (addr1 & 0x3FFFFFF) == (addr2 & 0x3FFFFFF); } -inline u16 ARGB8888toBGR565(u32 px) { +inline u16 RGBA8888toRGB565(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); } -inline u16 ARGB8888toABGR4444(u32 px) { +inline u16 RGBA8888toRGBA4444(u32 px) { return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); } -inline u16 ARGB8888toABGR1555(u32 px) { +inline u16 RGBA8888toRGBA5551(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); } @@ -164,12 +153,6 @@ FramebufferManager::FramebufferManager() : glUniform1i(draw2dprogram->sampler0, 0); glsl_unbind(); - blitprogram = glsl_create_source(basic_vs, blit_fs); - - glsl_bind(blitprogram); - glUniform1i(draw2dprogram->sampler0, 0); - glsl_unbind(); - // And an initial clear. We don't clear per frame as the games are supposed to handle that // by themselves. glstate.depthWrite.set(GL_TRUE); @@ -184,7 +167,6 @@ FramebufferManager::~FramebufferManager() { if (drawPixelsTex_) glDeleteTextures(1, &drawPixelsTex_); glsl_destroy(draw2dprogram); - glsl_destroy(blitprogram); delete [] pixelBufObj_; delete [] convBuf; @@ -725,10 +707,6 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { break; } } - - //#ifdef ANDROID - // nvfb->colorDepth = FBO_8888; - //#endif nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth); if (!(nvfb->fbo)) { @@ -751,7 +729,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) { glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glClearColor(0.0f,0.0f,0.0f,1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - //glEnable(GL_DITHER); + glEnable(GL_DITHER); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; nvfb->last_frame_used = gpuStats.numFrames; @@ -816,21 +794,17 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb float x, y, w, h; CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); -#ifdef USING_GLES2 DrawActiveTexture(x, y, w, h, flip, upscale, vscale, draw2dprogram); -#else - DrawActiveTexture(x, y, w, h, flip, upscale, vscale, blitprogram); -#endif glBindTexture(GL_TEXTURE_2D, 0); fbo_unbind(); } void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { - if(format == GE_FORMAT_8888) { // Here lets assume they don't intersect + if(format == GE_FORMAT_8888) { if(src == dst) { return; - } else { + } else { // Here lets assume they don't intersect memcpy(dst, src, stride * height * 4); } } else { // But here it shouldn't matter if they do @@ -841,20 +815,20 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { case GE_FORMAT_565: // BGR 565 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = ARGB8888toBGR565(px); + *(dst16+i) = RGBA8888toRGB565(px); } break; case GE_FORMAT_5551: // ABGR 1555 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = ARGB8888toABGR1555(px); + *(dst16+i) = RGBA8888toRGBA5551(px); } break; case GE_FORMAT_4444: // ABGR 4444 for(int i = 0; i < stride; i++) { u32 px = *(src32 + i); - *(dst16+i) = ARGB8888toABGR4444(px); + *(dst16+i) = RGBA8888toRGBA4444(px); } break; default: @@ -868,6 +842,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { GLubyte *packed = 0; bool unbind = false; + // We'll prepare two PBOs to switch between readying and reading if(!pixelBufObj_) { GLuint pbos[2]; @@ -889,28 +864,31 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { int pixelType, pixelSize, pixelFormat, align; switch (vfb->format) { - case GE_FORMAT_4444: // 16 bit ABGR + // GL_UNSIGNED_INT_8_8_8_8 returns A B G R (little-endian, tested in Nvidia card/x86 PC) + // GL_UNSIGNED_BYTE returns R G B A in consecutive bytes ("big-endian"/not treated as 32-bit value) + // We want R G B A, so we use *_REV for 16-bit formats and GL_UNSIGNED_BYTE for 32-bit + case GE_FORMAT_4444: // 16 bit RGBA pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; pixelSize = 2; align = 8; break; - case GE_FORMAT_5551: // 16 bit ABGR + case GE_FORMAT_5551: // 16 bit RGBA pixelType = GL_UNSIGNED_SHORT_1_5_5_5_REV; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; pixelSize = 2; align = 8; break; - case GE_FORMAT_565: // 16 bit BGR + case GE_FORMAT_565: // 16 bit RGB pixelType = GL_UNSIGNED_SHORT_5_6_5_REV; - pixelFormat = GL_BGR; + pixelFormat = GL_RGB; pixelSize = 2; align = 8; break; - case GE_FORMAT_8888: // 32 bit ABGR + case GE_FORMAT_8888: // 32 bit RGBA default: pixelType = GL_UNSIGNED_BYTE; - pixelFormat = GL_BGRA; + pixelFormat = GL_RGBA; pixelSize = 4; align = 4; break; @@ -934,7 +912,9 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { glBindBuffer(GL_PIXEL_PACK_BUFFER, pixelBufObj_[currentPBO_].handle); if(pixelBufObj_[currentPBO_].maxSize < bufSize) { + // We reserve a buffer big enough to fit all those pixels if(g_Config.bFramebuffersCPUConvert && pixelType != GL_UNSIGNED_BYTE) { + // Wnd result may be 16-bit but we are reading 32-bit, so we need double the space on the buffer glBufferData(GL_PIXEL_PACK_BUFFER, bufSize*2, NULL, GL_DYNAMIC_READ); } else { glBufferData(GL_PIXEL_PACK_BUFFER, bufSize, NULL, GL_DYNAMIC_READ); @@ -950,17 +930,24 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { pixelBufObj_[currentPBO_].reading = true; if(g_Config.bFramebuffersCPUConvert) { + // If converting pixel formats on the CPU we'll always request RGBA8888 glPixelStorei(GL_PACK_ALIGNMENT, 4); - glReadPixels(0, 0, vfb->fb_stride, vfb->height, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glReadPixels(0, 0, vfb->fb_stride, vfb->height, GL_RGBA, GL_UNSIGNED_BYTE, 0); } else { + // Otherwise we'll directly request the format we need and let the GPU sort it out glPixelStorei(GL_PACK_ALIGNMENT, align); glReadPixels(0, 0, vfb->fb_stride, vfb->height, pixelFormat, pixelType, 0); } + fbo_unbind(); + if(gl_extensions.FBO_ARB) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } + unbind = true; } - // Receive data from previous framebuffer + // Receive previously requested data from a PBO u8 nextPBO = (currentPBO_ + 1) % 2; if(pixelBufObj_[nextPBO].reading) { glBindBuffer(GL_PIXEL_PACK_BUFFER, pixelBufObj_[nextPBO].handle); @@ -974,7 +961,7 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { ConvertFromRGBA8888(Memory::GetPointer(pixelBufObj_[nextPBO].fb_address), packed, pixelBufObj_[nextPBO].stride, pixelBufObj_[nextPBO].height, pixelBufObj_[nextPBO].format); - } else { // we don't need to convert + } else { // We don't need to convert, GPU already did (or should have) Memory::Memcpy(pixelBufObj_[nextPBO].fb_address, packed, pixelBufObj_[nextPBO].size); } glUnmapBuffer(GL_PIXEL_PACK_BUFFER); @@ -989,10 +976,6 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { if(unbind) { glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - fbo_unbind(); - if(gl_extensions.FBO_ARB) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } } } @@ -1004,13 +987,14 @@ void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { return; } - size_t bufSize = vfb->fb_stride * vfb->height * 4; // pixel size always 4 here + // Pixel size always 4 here because we always request RGBA8888 + size_t bufSize = vfb->fb_stride * vfb->height * 4; u32 fb_address = (0x44000000) | vfb->fb_address; GLubyte *packed = 0; if(vfb->format == GE_FORMAT_8888) { packed = (GLubyte *)Memory::GetPointer(fb_address); - } else { // end result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address + } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte)); } @@ -1041,7 +1025,7 @@ void FramebufferManager::PackFramebufferGLES_(VirtualFramebuffer *vfb) { break; } - if(vfb->format != GE_FORMAT_8888) { // if not RGBA 8888 we need to convert + if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); free(packed); } @@ -1058,7 +1042,7 @@ void FramebufferManager::EndFrame() { } #ifndef USING_GLES2 - // We flush last packed framebuffer, if any + // We flush to memory last requested framebuffer, if any PackFramebufferGL_(NULL); #endif } @@ -1123,12 +1107,14 @@ std::vector FramebufferManager::GetFramebufferList() { void FramebufferManager::DecimateFBOs() { fbo_unbind(); currentRenderVfb_ = 0; + bool thirdFrame = (gpuStats.numFrames % 3 == 0); for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; int age = frameLastFramebufUsed - vfb->last_frame_used; if(g_Config.bFramebuffersToMem) { - if((gpuStats.numFrames % 3 == 0) && age < 3) { + // Every third frame we'll commit framebuffers to memory + if(thirdFrame && age <= FBO_OLD_AGE) { ReadFramebufferToMemory(vfb); } } @@ -1147,9 +1133,6 @@ void FramebufferManager::DecimateFBOs() { // Do the same for ReadFramebuffersToMemory's VFBs for (size_t i = 0; i < bvfbs_.size(); ++i) { VirtualFramebuffer *vfb = bvfbs_[i]; - if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { - continue; - } int age = frameLastFramebufUsed - vfb->last_frame_used; if (age > FBO_OLD_AGE) { INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age) diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index c42c4cd930..158e54179d 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -161,7 +161,6 @@ private: void PackFramebufferGL_(VirtualFramebuffer *vfb); void PackFramebufferGLES_(VirtualFramebuffer *vfb); std::vector bvfbs_; // blitting FBOs - GLSLProgram *blitprogram; AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; From b5cc23d018c3a69bfa5b18fa8fbf0e5e105938cd Mon Sep 17 00:00:00 2001 From: arnastia Date: Fri, 5 Jul 2013 02:31:31 +0100 Subject: [PATCH 8/8] Try to guess GPU to differentiate pixel type to request in glReadPixels(); changed ConvertFromRGBA8888 to iterate linearly. --- GPU/GLES/Framebuffer.cpp | 79 ++++++++++++++++++++++++++-------------- GPU/GLES/Framebuffer.h | 8 ++++ 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index cd125b32f5..43f48ae91e 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -161,6 +161,32 @@ FramebufferManager::FramebufferManager() : glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); useBufferedRendering_ = g_Config.bBufferedRendering; + + // Check vendor string to try and guess GPU + const char *cvendor = (char *)glGetString(GL_VENDOR); + if(cvendor) { + const std::string vendor(cvendor); + + if(vendor == "NVIDIA Corporation" + || vendor == "Nouveau" + || vendor == "nouveau") { + gpuVendor = GPU_VENDOR_NVIDIA; + } else if(vendor == "Advanced Micro Devices, Inc." + || vendor == "ATI Technologies Inc.") { + gpuVendor = GPU_VENDOR_AMD; + } else if(vendor == "Intel" + || vendor == "Intel Inc." + || vendor == "Intel Corporation" + || vendor == "Tungsten Graphics, Inc") { // We'll assume this last one means Intel + gpuVendor = GPU_VENDOR_INTEL; + } else if(vendor == "ARM") { + gpuVendor = GPU_VENDOR_ARM; + } else { + gpuVendor = GPU_VENDOR_UNKNOWN; + } + } else { + gpuVendor = GPU_VENDOR_UNKNOWN; + } } FramebufferManager::~FramebufferManager() { @@ -808,32 +834,28 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, int format) { memcpy(dst, src, stride * height * 4); } } else { // But here it shouldn't matter if they do - for (int j = 0; j < height; j++) { - const u32 *src32 = (u32 *)(src + stride * j*4); - u16 *dst16 = (u16 *)(dst + stride * j*2); - switch (format) { - case GE_FORMAT_565: // BGR 565 - for(int i = 0; i < stride; i++) { - u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGB565(px); - } - break; - case GE_FORMAT_5551: // ABGR 1555 - for(int i = 0; i < stride; i++) { - u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGBA5551(px); - } + u32 size = height * stride; + u32 *src32 = (u32 *)src; + u16 *dst16 = (u16 *)dst; + switch (format) { + case GE_FORMAT_565: // BGR 565 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGB565(src32[i]); + } + break; + case GE_FORMAT_5551: // ABGR 1555 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGBA5551(src32[i]); + } - break; - case GE_FORMAT_4444: // ABGR 4444 - for(int i = 0; i < stride; i++) { - u32 px = *(src32 + i); - *(dst16+i) = RGBA8888toRGBA4444(px); - } - break; - default: - break; - } + break; + case GE_FORMAT_4444: // ABGR 4444 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGBA4444(src32[i]); + } + break; + default: + break; } } } @@ -868,19 +890,20 @@ void FramebufferManager::PackFramebufferGL_(VirtualFramebuffer *vfb) { // GL_UNSIGNED_BYTE returns R G B A in consecutive bytes ("big-endian"/not treated as 32-bit value) // We want R G B A, so we use *_REV for 16-bit formats and GL_UNSIGNED_BYTE for 32-bit case GE_FORMAT_4444: // 16 bit RGBA - pixelType = GL_UNSIGNED_SHORT_4_4_4_4_REV; + // We'll single out Nvidia for now, since that's the only vendor whose glReadPixels behaviour is tested. + pixelType = ((gpuVendor == GPU_VENDOR_NVIDIA) ? GL_UNSIGNED_SHORT_4_4_4_4_REV : GL_UNSIGNED_SHORT_4_4_4_4); pixelFormat = GL_RGBA; pixelSize = 2; align = 8; break; case GE_FORMAT_5551: // 16 bit RGBA - pixelType = GL_UNSIGNED_SHORT_1_5_5_5_REV; + pixelType = ((gpuVendor == GPU_VENDOR_NVIDIA) ? GL_UNSIGNED_SHORT_1_5_5_5_REV : GL_UNSIGNED_SHORT_5_5_5_1); pixelFormat = GL_RGBA; pixelSize = 2; align = 8; break; case GE_FORMAT_565: // 16 bit RGB - pixelType = GL_UNSIGNED_SHORT_5_6_5_REV; + pixelType = ((gpuVendor == GPU_VENDOR_NVIDIA) ? GL_UNSIGNED_SHORT_5_6_5_REV : GL_UNSIGNED_SHORT_5_6_5); pixelFormat = GL_RGB; pixelSize = 2; align = 8; diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 158e54179d..238d0b3c29 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -44,6 +44,13 @@ enum { FB_USAGE_TEXTURE = 4, }; +enum { + GPU_VENDOR_NVIDIA = 1, + GPU_VENDOR_AMD = 2, + GPU_VENDOR_INTEL = 3, + GPU_VENDOR_ARM = 4, + GPU_VENDOR_UNKNOWN = 0 +}; struct VirtualFramebuffer { int last_frame_used; @@ -160,6 +167,7 @@ private: void BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip = false, float upscale = 1.0f, float vscale = 1.0f); void PackFramebufferGL_(VirtualFramebuffer *vfb); void PackFramebufferGLES_(VirtualFramebuffer *vfb); + int gpuVendor; std::vector bvfbs_; // blitting FBOs AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_;