Merge pull request #15859 from hrydgard/depal-depth-565

Allow binding depth as 565 by going through depal
This commit is contained in:
Henrik Rydgård 2022-08-21 19:54:02 +02:00 committed by GitHub
commit 58adf3ee48
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 198 additions and 35 deletions

View file

@ -423,3 +423,19 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv
} }
return *this; return *this;
} }
ShaderWriter &ShaderWriter::GetTextureSize(const char *szVariable, const char *texName) {
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
break;
case HLSL_D3D9:
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
break;
default:
// Note: we ignore the sampler. make sure you bound samplers to the textures correctly.
F("vec2 %s = textureSize(%s, 0);", szVariable, texName);
break;
}
return *this;
}

View file

@ -83,7 +83,8 @@ public:
void ConstFloat(const char *name, float value); void ConstFloat(const char *name, float value);
ShaderWriter &SampleTexture2D(const char *sampName, const char *uv); ShaderWriter &SampleTexture2D(const char *texName, const char *uv);
ShaderWriter &GetTextureSize(const char *szVariable, const char *texName);
// Simple shaders with no special tricks. // Simple shaders with no special tricks.
void BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings); void BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings);

View file

@ -97,6 +97,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack); CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack);
CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets);
CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack);
CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth);
} }
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {

View file

@ -87,6 +87,7 @@ struct CompatFlags {
bool ZZT3SelectHack; bool ZZT3SelectHack;
bool AllowLargeFBTextureOffsets; bool AllowLargeFBTextureOffsets;
bool AtracLoopHack; bool AtracLoopHack;
bool DeswizzleDepth;
}; };
class IniFile; class IniFile;

View file

@ -66,7 +66,7 @@ struct CoreParameter {
bool headLess; // Try to avoid messageboxes etc bool headLess; // Try to avoid messageboxes etc
// Internal PSP rendering resolution and scale factor. // Internal PSP rendering resolution and scale factor.
int renderScaleFactor; int renderScaleFactor = 1;
int renderWidth; int renderWidth;
int renderHeight; int renderHeight;

View file

@ -147,10 +147,10 @@ Draw::SamplerState *DepalShaderCache::GetSampler() {
return nearestSampler_; return nearestSampler_;
} }
DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) { DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) {
using namespace Draw; using namespace Draw;
u32 id = GenerateShaderID(clutMode, pixelFormat); u32 id = GenerateShaderID(clutMode, textureFormat, bufferFormat);
auto shader = cache_.find(id); auto shader = cache_.find(id);
if (shader != cache_.end()) { if (shader != cache_.end()) {
@ -171,7 +171,8 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF
config.startPos = gstate.getClutIndexStartPos(); config.startPos = gstate.getClutIndexStartPos();
config.shift = gstate.getClutIndexShift(); config.shift = gstate.getClutIndexShift();
config.mask = gstate.getClutIndexMask(); config.mask = gstate.getClutIndexMask();
config.pixelFormat = pixelFormat; config.bufferFormat = bufferFormat;
config.textureFormat = textureFormat;
GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc());

View file

@ -49,7 +49,7 @@ public:
~DepalShaderCache(); ~DepalShaderCache();
// This also uploads the palette and binds the correct texture. // This also uploads the palette and binds the correct texture.
DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat); DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat);
Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
Draw::SamplerState *GetSampler(); Draw::SamplerState *GetSampler();
@ -63,8 +63,8 @@ public:
void DeviceRestore(Draw::DrawContext *draw); void DeviceRestore(Draw::DrawContext *draw);
private: private:
static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) { static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat) {
return (clutMode & 0xFFFFFF) | (pixelFormat << 24); return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28);
} }
static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) { static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) {

View file

@ -49,7 +49,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
const int shift = config.shift; const int shift = config.shift;
const int mask = config.mask; const int mask = config.mask;
if (config.pixelFormat == GE_FORMAT_DEPTH16) { if (config.bufferFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors(); DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset); writer.ConstFloat("z_offset", factors.offset);
@ -71,7 +71,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
int shiftedMask = mask << shift; int shiftedMask = mask << shift;
switch (config.pixelFormat) { switch (config.bufferFormat) {
case GE_FORMAT_8888: case GE_FORMAT_8888:
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n"); if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n"); if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
@ -102,6 +102,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
case GE_FORMAT_DEPTH16: case GE_FORMAT_DEPTH16:
// Remap depth buffer. // Remap depth buffer.
writer.C(" float depth = (color.x - z_offset) * z_scale;\n"); writer.C(" float depth = (color.x - z_offset) * z_scale;\n");
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
// Convert depth to 565, without going through a CLUT.
writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float r = (idepth & 31) / 31.0f;\n");
writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n");
writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n");
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
return;
}
writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n"); writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");
break; break;
default: default:
@ -135,16 +146,18 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
const int shift = config.shift; const int shift = config.shift;
const int mask = config.mask; const int mask = config.mask;
if (config.pixelFormat == GE_FORMAT_DEPTH16) { if (config.bufferFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors(); DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset); writer.ConstFloat("z_offset", factors.offset);
} }
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
float index_multiplier = 1.0f; float index_multiplier = 1.0f;
// pixelformat is the format of the texture we are sampling. // pixelformat is the format of the texture we are sampling.
bool formatOK = true; bool formatOK = true;
switch (config.pixelFormat) { switch (config.bufferFormat) {
case GE_FORMAT_8888: case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) { if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1. // If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
@ -222,6 +235,19 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
case GE_FORMAT_DEPTH16: case GE_FORMAT_DEPTH16:
{ {
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway. // TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
// Not on D3D9 though, so this path is still relevant.
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
// Convert depth to 565, without going through a CLUT.
writer.C(" float depth = (index.x - z_offset) * z_scale;\n");
writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float r = mod(idepth, 32.0) / 31.0f;\n");
writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0f;\n");
writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0f;\n");
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
return;
}
if (shift < 16) { if (shift < 16) {
index_multiplier = 1.0f / (float)(1 << shift); index_multiplier = 1.0f / (float)(1 << shift);
truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)"); truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");
@ -249,7 +275,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
// index_multiplier -= 0.01f / texturePixels; // index_multiplier -= 0.01f / texturePixels;
if (!formatOK) { if (!formatOK) {
ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.pixelFormat), shift, mask, config.startPos); ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos);
} }
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
@ -258,7 +284,6 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
char offset[128] = ""; char offset[128] = "";
sprintf(offset, " + %f", texel_offset); sprintf(offset, " + %f", texel_offset);
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
} }

View file

@ -29,7 +29,8 @@ struct DepalConfig {
int shift; int shift;
u32 startPos; u32 startPos;
GEPaletteFormat clutFormat; GEPaletteFormat clutFormat;
GEBufferFormat pixelFormat; GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
}; };
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);

View file

@ -40,6 +40,23 @@ static const SamplerDef samplers[1] = {
{ "tex" }, { "tex" },
}; };
static const UniformDef uniforms[2] = {
{ "vec2", "texSize", 0 },
{ "float", "scaleFactor", 1},
};
struct Draw2DUB {
float texSizeX;
float texSizeY;
float scaleFactor;
};
const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), {
{ "texSize", -1, 0, UniformType::FLOAT2, 0 },
{ "scaleFactor", -1, 1, UniformType::FLOAT1, 0 },
} };
RasterChannel GenerateDraw2DFs(ShaderWriter &writer) { RasterChannel GenerateDraw2DFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers); writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE); writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
@ -70,6 +87,27 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
return RASTER_DEPTH;
}
RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
// have to apply the scaling.
DepthScaleFactors factors = GetDepthScaleFactors();
writer.C(" vec2 tsize = texSize;\n");
writer.C(" vec2 coord = v_texcoord * tsize;\n");
writer.F(" float strip = 4.0 * scaleFactor;\n");
writer.C(" float in_strip = mod(coord.y, strip);\n");
writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n");
writer.C(" coord /= tsize;\n");
writer.C(" vec3 rgb = ").SampleTexture2D("tex", "coord").C(".xyz;\n");
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
return RASTER_DEPTH; return RASTER_DEPTH;
} }
@ -159,7 +197,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener
{ draw2DVs_, fs }, { draw2DVs_, fs },
inputLayout, inputLayout,
depthStencil, depthStencil,
blend, rasterNoCull, nullptr, blend, rasterNoCull, &draw2DUBDesc,
}; };
Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
@ -174,7 +212,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener
return pipeline; return pipeline;
} }
void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader) { void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader, float texW, float texH) {
using namespace Draw; using namespace Draw;
Ensure2DResources(); Ensure2DResources();
@ -196,8 +234,8 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver
} }
if (!draw2DPipelineDepth_) { if (!draw2DPipelineDepth_) {
draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DDepthFs); draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DDepthFs);
linearFilter = false;
} }
linearFilter = false;
draw_->BindPipeline(draw2DPipelineDepth_); draw_->BindPipeline(draw2DPipelineDepth_);
break; break;
@ -208,15 +246,37 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver
} }
if (!draw2DPipeline565ToDepth_) { if (!draw2DPipeline565ToDepth_) {
draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs); draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs);
linearFilter = false;
} }
linearFilter = false;
draw_->BindPipeline(draw2DPipeline565ToDepth_); draw_->BindPipeline(draw2DPipeline565ToDepth_);
break; break;
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it
return;
} }
if (!draw2DPipeline565ToDepthDeswizzle_) {
draw2DPipeline565ToDepthDeswizzle_ = Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs);
}
linearFilter = false;
draw_->BindPipeline(draw2DPipeline565ToDepthDeswizzle_);
break;
}
Draw2DUB ub;
ub.texSizeX = tex ? tex->Width() : texW;
ub.texSizeY = tex ? tex->Height() : texH;
ub.scaleFactor = (float)renderScaleFactor_;
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
if (tex) { if (tex) {
draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex); draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
} }
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_); draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_);
draw_->DrawUP(verts, vertexCount); draw_->DrawUP(verts, vertexCount);
draw_->InvalidateCachedState();
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_VERTEXSHADER_STATE);
} }

View file

@ -14,6 +14,7 @@ enum Draw2DShader {
DRAW2D_COPY_COLOR, DRAW2D_COPY_COLOR,
DRAW2D_COPY_DEPTH, DRAW2D_COPY_DEPTH,
DRAW2D_565_TO_DEPTH, DRAW2D_565_TO_DEPTH,
DRAW2D_565_TO_DEPTH_DESWIZZLE,
}; };
inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) {
@ -22,6 +23,7 @@ inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) {
return RASTER_DEPTH; return RASTER_DEPTH;
case DRAW2D_COPY_COLOR: case DRAW2D_COPY_COLOR:
case DRAW2D_565_TO_DEPTH: case DRAW2D_565_TO_DEPTH:
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
default: default:
return RASTER_COLOR; return RASTER_COLOR;
} }

View file

@ -551,20 +551,33 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra
// For now, let's just do the last thing, if there are multiple. // For now, let's just do the last thing, if there are multiple.
// for (auto &source : sources) { // for (auto &source : sources) {
if (sources.size()) { if (!sources.empty()) {
draw_->InvalidateCachedState();
auto &source = sources.back(); auto &source = sources.back();
if (source.channel == RASTER_DEPTH) { if (source.channel == RASTER_DEPTH) {
// Good old depth->depth copy. // Good old depth->depth copy.
BlitFramebufferDepth(source.vfb, dest); BlitFramebufferDepth(source.vfb, dest);
gpuStats.numDepthCopies++; gpuStats.numDepthCopies++;
dest->last_frame_depth_updated = gpuStats.numFlips; dest->last_frame_depth_updated = gpuStats.numFlips;
} else if (source.channel == RASTER_COLOR) { } else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
VirtualFramebuffer *src = source.vfb; VirtualFramebuffer *src = source.vfb;
// Copying color to depth.
if (src->drawnFormat != GE_FORMAT_565) { if (src->drawnFormat != GE_FORMAT_565) {
WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address); WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address);
} }
BlitUsingRaster(src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, dest->fbo, 0.0f, 0.0f, dest->renderWidth, dest->renderHeight, false, DRAW2D_565_TO_DEPTH, "565_to_depth");
// Really hate to do this, but tracking the depth swizzle state across multiple
// copies is not easy.
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
}
// Copying color to depth.
BlitUsingRaster(
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
false, shader, "565_to_depth");
} }
} }
@ -1941,7 +1954,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u
bool z_stencil = reason == TempFBO::STENCIL; bool z_stencil = reason == TempFBO::STENCIL;
char name[128]; char name[128];
snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : ""); snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : "");
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name }); Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name });
if (!fbo) { if (!fbo) {
return nullptr; return nullptr;
@ -2343,6 +2356,7 @@ void FramebufferManagerCommon::DeviceLost() {
DoRelease(draw2DPipelineColor_); DoRelease(draw2DPipelineColor_);
DoRelease(draw2DPipelineDepth_); DoRelease(draw2DPipelineDepth_);
DoRelease(draw2DPipeline565ToDepth_); DoRelease(draw2DPipeline565ToDepth_);
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
draw_ = nullptr; draw_ = nullptr;
} }
@ -2545,7 +2559,7 @@ void FramebufferManagerCommon::BlitUsingRaster(
Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f }; Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &vp); draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height()); draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
DrawStrip2D(nullptr, vtx, 4, linearFilter, shader); DrawStrip2D(nullptr, vtx, 4, linearFilter, shader, src->Width(), src->Height());
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
} }

View file

@ -375,7 +375,7 @@ protected:
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags); void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel); void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel, float texW = 0.0f, float texH = 0.0f);
void Ensure2DResources(); void Ensure2DResources();
Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &)); Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &));
@ -511,6 +511,7 @@ protected:
Draw::Pipeline *draw2DPipelineColor_ = nullptr; Draw::Pipeline *draw2DPipelineColor_ = nullptr;
Draw::Pipeline *draw2DPipelineDepth_ = nullptr; Draw::Pipeline *draw2DPipelineDepth_ = nullptr;
Draw::Pipeline *draw2DPipeline565ToDepth_ = nullptr; Draw::Pipeline *draw2DPipeline565ToDepth_ = nullptr;
Draw::Pipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr;
Draw::SamplerState *draw2DSamplerLinear_ = nullptr; Draw::SamplerState *draw2DSamplerLinear_ = nullptr;
Draw::SamplerState *draw2DSamplerNearest_ = nullptr; Draw::SamplerState *draw2DSamplerNearest_ = nullptr;
Draw::ShaderModule *draw2DVs_ = nullptr; Draw::ShaderModule *draw2DVs_ = nullptr;

View file

@ -892,6 +892,7 @@ bool TextureCacheCommon::MatchFramebuffer(
const bool noOffset = texaddr == addr; const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR; const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR;
const u32 w = 1 << ((entry.dim >> 0) & 0xf); const u32 w = 1 << ((entry.dim >> 0) & 0xf);
const u32 h = 1 << ((entry.dim >> 8) & 0xf); const u32 h = 1 << ((entry.dim >> 8) & 0xf);
// 512 on a 272 framebuffer is sane, so let's be lenient. // 512 on a 272 framebuffer is sane, so let's be lenient.
@ -927,6 +928,7 @@ bool TextureCacheCommon::MatchFramebuffer(
// Check works for D16 too (???) // Check works for D16 too (???)
const bool matchingClutFormat = const bool matchingClutFormat =
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) ||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) ||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
(fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16); (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
@ -971,7 +973,7 @@ bool TextureCacheCommon::MatchFramebuffer(
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
if (matchingClutFormat) { if (matchingClutFormat) {
if (!noOffset) { if (!noOffset) {
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset); WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset);
} }
return true; return true;
} else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) {
@ -1823,18 +1825,44 @@ void TextureCacheCommon::ApplyTexture() {
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
} }
bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
if (IsClutFormat(texFormat)) {
switch (bufferFormat) {
case GE_FORMAT_4444:
case GE_FORMAT_565:
case GE_FORMAT_5551:
case GE_FORMAT_DEPTH16:
if (texFormat == GE_TFMT_CLUT16) {
return true;
}
break;
case GE_FORMAT_8888:
if (texFormat == GE_TFMT_CLUT32) {
return true;
}
break;
}
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
return false;
} else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) {
// We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491).
return true;
} else {
return false;
}
}
void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
DepalShader *depalShader = nullptr; DepalShader *depalShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF; uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
bool need_depalettize = IsClutFormat(texFormat);
bool depth = channel == RASTER_DEPTH; bool depth = channel == RASTER_DEPTH;
bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D; bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D;
// TODO: Implement shader depal in the fragment shader generator for D3D11 at least. // TODO: Implement shader depal in the fragment shader generator for D3D11 at least.
if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) { if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) {
useShaderDepal = false; useShaderDepal = false;
depth = false; // Can't support this
} }
switch (draw_->GetShaderLanguageDesc().shaderLanguage) { switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
@ -1878,7 +1906,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
return; return;
} }
depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
gstate_c.SetUseShaderDepal(false); gstate_c.SetUseShaderDepal(false);
} }

View file

@ -160,6 +160,7 @@ void GPU_DX9::CheckGPUFeatures() {
u32 features = 0; u32 features = 0;
features |= GPU_SUPPORTS_16BIT_FORMATS; features |= GPU_SUPPORTS_16BIT_FORMATS;
features |= GPU_SUPPORTS_BLEND_MINMAX; features |= GPU_SUPPORTS_BLEND_MINMAX;
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
// Accurate depth is required because the Direct3D API does not support inverse Z. // Accurate depth is required because the Direct3D API does not support inverse Z.

View file

@ -1102,6 +1102,10 @@ NPEH00029 = true
ULUS10455 = true ULUS10455 = true
[BlueToAlpha] [BlueToAlpha]
# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue,
# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering
# to avoid problems.
# Split/Second # Split/Second
ULES01402 = true ULES01402 = true
ULUS10513 = true ULUS10513 = true
@ -1121,10 +1125,6 @@ ULES00262 = true
ULUS10064 = true ULUS10064 = true
ULKS46087 = true ULKS46087 = true
# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue,
# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering
# to avoid problems.
[DateLimited] [DateLimited]
# Car Jack Streets - issue #12698 # Car Jack Streets - issue #12698
NPUZ00043 = true NPUZ00043 = true
@ -1266,3 +1266,13 @@ ULES00618 = true
# Silver Fall # Silver Fall
ULES00808 = true ULES00808 = true
ULUS10270 = true ULUS10270 = true
[DeswizzleDepth]
UCUS98633 = true
UCAS40145 = true
UCES00420 = true
UCJS10052 = true
UCKS45048 = true
UCJS18030 = true
UCJS18047 = true
NPJG00015 = true

View file

@ -309,7 +309,8 @@ bool TestDepalShaders() {
config.shift = 8; config.shift = 8;
config.startPos = 64; config.startPos = 64;
config.mask = 0xFF; config.mask = 0xFF;
config.pixelFormat = GE_FORMAT_8888; config.bufferFormat = GE_FORMAT_8888;
config.textureFormat = GE_TFMT_CLUT32;
GenerateDepalFs(buffer, config, desc); GenerateDepalFs(buffer, config, desc);
if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) { if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {