Merge pull request #12443 from hrydgard/darkstalkers-work

Darkstalkers Chronicle: Add specializations and speedhacks to get it kinda playable
This commit is contained in:
Henrik Rydgård 2019-10-27 21:19:53 +01:00 committed by GitHub
commit b4438c5742
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 493 additions and 48 deletions

1
.gitignore vendored
View file

@ -68,6 +68,7 @@ build.ios
versionname.txt
versioncode.txt
build*/
android/.cxx
# Temp file used by jenkins windows build (TODO: remove)
desc.txt

View file

@ -67,6 +67,8 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "JitInvalidationHack", &flags_.JitInvalidationHack);
CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles);
CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL);
CheckSetting(iniFile, gameID, "ForceSoftwareRenderer", &flags_.ForceSoftwareRenderer);
CheckSetting(iniFile, gameID, "DarkStalkersPresentHack", &flags_.DarkStalkersPresentHack);
}
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {

View file

@ -67,6 +67,8 @@ struct CompatFlags {
bool JitInvalidationHack;
bool HideISOFiles;
bool MoreAccurateVMMUL;
bool ForceSoftwareRenderer;
bool DarkStalkersPresentHack;
};
class IniFile;

View file

@ -130,7 +130,7 @@ enum UtilityDialogType {
// Only a single dialog is allowed at a time.
static UtilityDialogType currentDialogType;
static bool currentDialogActive;
bool currentDialogActive;
static PSPSaveDialog saveDialog;
static PSPMsgDialog msgDialog;
static PSPOskDialog oskDialog;

View file

@ -349,6 +349,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) {
CPU_Init();
// Compat flags get loaded in CPU_Init (which is a bit of a misnomer) so we check for SW renderer here.
if (g_Config.bSoftwareRendering || PSP_CoreParameter().compat.flags().ForceSoftwareRenderer) {
coreParameter.gpuCore = GPUCORE_SOFTWARE;
}
*error_string = coreParameter.errorString;
bool success = coreParameter.fileToStart != "";
if (!success) {

View file

@ -197,6 +197,7 @@
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<ForcedIncludeFiles>Common/DbgNew.h</ForcedIncludeFiles>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<SupportJustMyCode>false</SupportJustMyCode>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>

View file

@ -625,6 +625,10 @@ public:
*this = *this / f;
}
bool operator ==(const Vec4 &other) const {
return x == other.x && y == other.y && z == other.z && w == other.w;
}
T Length2() const
{
return x*x + y*y + z*z + w*w;

View file

@ -17,6 +17,8 @@
#include <algorithm>
#include "Core/System.h"
#include "GPU/GPUState.h"
#include "GPU/Software/Clipper.h"
@ -24,6 +26,11 @@
#include "profiler/profiler.h"
extern bool g_DarkStalkerStretch;
// For Darkstalkers hack. Ugh.
extern bool currentDialogActive;
namespace Clipper {
enum {
@ -49,39 +56,36 @@ static inline int CalcClipMask(const ClipCoords& v)
return mask;
}
#define AddInterpolatedVertex(t, out, in, numVertices) \
{ \
Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \
numVertices++; \
inline bool different_signs(float x, float y) {
return ((x <= 0 && y > 0) || (x > 0 && y <= 0));
}
#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
#define CLIP_DOTPROD(I, A, B, C, D) \
(Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D)
inline float clip_dotprod(const VertexData &vert, float A, float B, float C, float D) {
return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D);
}
#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \
{ \
if (mask & PLANE_BIT) { \
int idxPrev = inlist[0]; \
float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \
float dpPrev = clip_dotprod(*Vertices[idxPrev], A, B, C, D );\
int outcount = 0; \
\
inlist[n] = inlist[0]; \
for (int j = 1; j <= n; j++) { \
int idx = inlist[j]; \
float dp = CLIP_DOTPROD(idx, A, B, C, D ); \
float dp = clip_dotprod(*Vertices[idx], A, B, C, D ); \
if (dpPrev >= 0) { \
outlist[outcount++] = idxPrev; \
} \
\
if (DIFFERENT_SIGNS(dp, dpPrev)) { \
if (different_signs(dp, dpPrev)) { \
if (dp < 0) { \
float t = dp / (dp - dpPrev); \
AddInterpolatedVertex(t, idx, idxPrev, numVertices); \
Vertices[numVertices++]->Lerp(t, *Vertices[idx], *Vertices[idxPrev]); \
} else { \
float t = dpPrev / (dpPrev - dp); \
AddInterpolatedVertex(t, idxPrev, idx, numVertices); \
Vertices[numVertices++]->Lerp(t, *Vertices[idxPrev], *Vertices[idx]); \
} \
outlist[outcount++] = numVertices - 1; \
} \
@ -104,25 +108,23 @@ static inline int CalcClipMask(const ClipCoords& v)
#define CLIP_LINE(PLANE_BIT, A, B, C, D) \
{ \
if (mask & PLANE_BIT) { \
float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \
int i = 0; \
if (mask & PLANE_BIT) { \
float dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \
float dp1 = clip_dotprod(*Vertices[1], A, B, C, D ); \
int numVertices = 0; \
\
if (mask0 & PLANE_BIT) { \
if (dp0 < 0) { \
float t = dp1 / (dp1 - dp0); \
i = 0; \
AddInterpolatedVertex(t, 1, 0, i); \
Vertices[0]->Lerp(t, *Vertices[1], *Vertices[0]); \
} \
} \
dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \
\
if (mask1 & PLANE_BIT) { \
if (dp1 < 0) { \
float t = dp1 / (dp1- dp0); \
i = 1; \
AddInterpolatedVertex(t, 1, 0, i); \
Vertices[1]->Lerp(t, *Vertices[1], *Vertices[0]); \
} \
} \
} \
@ -139,8 +141,11 @@ static void RotateUVThrough(const VertexData &tl, const VertexData &br, VertexDa
}
}
bool needsClear = false;
void ProcessRect(const VertexData& v0, const VertexData& v1)
{
g_DarkStalkerStretch = false;
if (!gstate.isModeThrough()) {
VertexData buf[4];
buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
@ -182,6 +187,44 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
ProcessTriangle(*topleft, *bottomleft, *bottomright, buf[3]);
} else {
// through mode handling
// Check for 1:1 texture mapping. In that case we can call DrawSprite.
int xdiff = v1.screenpos.x - v0.screenpos.x;
int ydiff = v1.screenpos.y - v0.screenpos.y;
int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f;
int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f;
bool coord_check =
(xdiff == udiff || xdiff == -udiff) &&
(ydiff == vdiff || ydiff == -vdiff);
bool state_check = !gstate.isModeClear(); // TODO: Add support for clear modes in Rasterizer::DrawSprite.
if ((coord_check || !gstate.isTextureMapEnabled()) && state_check) {
Rasterizer::DrawSprite(v0, v1);
return;
}
// Eliminate the stretch blit in DarkStalkers.
// We compensate for that when blitting the framebuffer in SoftGpu.cpp.
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {
if (v0.screenpos.x == 0x7100 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8f00 && v1.screenpos.y == 0x8880) {
// Also check for save/load dialog.
if (!currentDialogActive) {
g_DarkStalkerStretch = true;
if (needsClear) {
needsClear = false;
// Afterwards, we also need to clear the actual destination. Can do a fast rectfill.
gstate.textureMapEnable &= ~1;
VertexData newV0 = v0;
newV0.color0 = Vec4<int>(0, 0, 0, 255);
Rasterizer::DrawSprite(newV0, v1);
gstate.textureMapEnable |= 1;
}
return;
} else {
needsClear = true;
}
} // else, handle the Capcom screen stretch, or the non-wide stretch? Or let's just not bother.
}
VertexData buf[4];
buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z);
buf[0].texturecoords = v0.texturecoords;
@ -196,7 +239,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
// Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; // is color1 ever used in through mode?
buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f;
buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f;

View file

@ -1287,6 +1287,200 @@ void DrawTriangleSlice(
}
}
// Through mode, with the specific Darkstalker settings.
inline void DrawSinglePixel5551(u16 *pixel, const Vec4<int> &color_in) {
u32 new_color;
if (color_in.a() == 255) {
new_color = color_in.ToRGBA() & 0xFFFFFF;
} else {
const u32 old_color = RGBA5551ToRGBA8888(*pixel);
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
Vec3<int> blended = AlphaBlendingResult(color_in, dst);
// ToRGB() always automatically clamps.
new_color = blended.ToRGB();
}
new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;
*pixel = RGBA8888ToRGBA5551(new_color);
}
static inline Vec4<int> ModulateRGBA(const Vec4<int>& prim_color, const Vec4<int>& texcolor) {
Vec3<int> out_rgb;
int out_a;
#if defined(_M_SSE)
// We can be accurate up to 24 bit integers, should be enough.
const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
const __m128 b = _mm_mul_ps(p, t);
if (gstate.isColorDoublingEnabled()) {
// We double right here, only for modulate. Other tex funcs do not color double.
const __m128 doubleColor = _mm_setr_ps(2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 1.0f / 255.0f);
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, doubleColor));
} else {
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
}
return Vec4<int>(out_rgb.ivec);
#else
if (gstate.isColorDoublingEnabled()) {
out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
} else {
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
}
out_a = (prim_color.a() * texcolor.a() / 255);
#endif
return Vec4<int>(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a);
}
void DrawSprite(const VertexData& v0, const VertexData& v1) {
const u8 *texptr = nullptr;
GETextureFormat texfmt = gstate.getTextureFormat();
u32 texaddr = gstate.getTextureAddress(0);
int texbufw = GetTextureBufw(0, texaddr, texfmt);
if (Memory::IsValidAddress(texaddr))
texptr = Memory::GetPointerUnchecked(texaddr);
ScreenCoords pprime(v0.screenpos.x, v0.screenpos.y, 0);
Sampler::NearestFunc nearestFunc = Sampler::GetNearestFunc(); // Looks at gstate.
DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos);
DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos);
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
int z = pos0.z;
float fog = 1.0f;
bool isWhite = v0.color0 == Vec4<int>(255, 255, 255, 255);
if (gstate.isTextureMapEnabled()) {
// 1:1 (but with mirror support) texture mapping!
int s_start = v0.texturecoords.x;
int t_start = v0.texturecoords.y;
int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1;
int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1;
if (ds < 0) {
s_start += ds;
}
if (dt < 0) {
t_start += dt;
}
// First clip the right and bottom sides, since we don't need to adjust the deltas.
if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;
if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;
// Now clip the other sides.
if (pos0.x < scissorTL.x) {
s_start += (scissorTL.x - pos0.x) * ds;
pos0.x = scissorTL.x;
}
if (pos0.y < scissorTL.y) {
t_start += (scissorTL.y - pos0.y) * dt;
pos0.y = scissorTL.y;
}
if (!gstate.isStencilTestEnabled() &&
!gstate.isDepthTestEnabled() &&
!gstate.isLogicOpEnabled() &&
!gstate.isColorTestEnabled() &&
!gstate.isDitherEnabled() &&
gstate.isAlphaTestEnabled() &&
gstate.getAlphaTestRef() == 0 &&
gstate.getAlphaTestMask() == 0xFF &&
gstate.isAlphaBlendEnabled() &&
gstate.isTextureAlphaUsed() &&
gstate.getTextureFunction() == GE_TEXFUNC_MODULATE &&
gstate.getColorMask() == 0x000000 &&
gstate.FrameBufFormat() == GE_FORMAT_5551) {
int t = t_start;
for (int y = pos0.y; y < pos1.y; y++) {
int s = s_start;
u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride());
if (isWhite) {
for (int x = pos0.x; x < pos1.x; x++) {
u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0);
if (tex_color & 0xFF000000) {
DrawSinglePixel5551(pixel, Vec4<int>::FromRGBA(tex_color));
}
s += ds;
pixel++;
}
} else {
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v0.color0;
Vec4<int> tex_color = Vec4<int>::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
prim_color = ModulateRGBA(prim_color, tex_color);
if (prim_color.a() > 0) {
DrawSinglePixel5551(pixel, prim_color);
}
s += ds;
pixel++;
}
}
t += dt;
}
} else {
int t = t_start;
for (int y = pos0.y; y < pos1.y; y++) {
int s = s_start;
// Not really that fast but faster than triangle.
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v0.color0;
Vec4<int> tex_color = Vec4<int>::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
prim_color = GetTextureFunctionOutput(prim_color, tex_color);
DrawingCoords pos(x, y, z);
DrawSinglePixel<false>(pos, (u16)z, 1.0f, prim_color);
s += ds;
}
t += dt;
}
}
} else {
if (pos1.x > scissorBR.x) pos1.x = scissorBR.x;
if (pos1.y > scissorBR.y) pos1.y = scissorBR.y;
if (pos0.x < scissorTL.x) pos0.x = scissorTL.x;
if (pos0.y < scissorTL.y) pos0.y = scissorTL.y;
if (!gstate.isStencilTestEnabled() &&
!gstate.isDepthTestEnabled() &&
!gstate.isLogicOpEnabled() &&
!gstate.isColorTestEnabled() &&
!gstate.isDitherEnabled() &&
gstate.isAlphaTestEnabled() &&
gstate.getAlphaTestRef() == 0 &&
gstate.getAlphaTestMask() == 0xFF &&
gstate.isAlphaBlendEnabled() &&
gstate.isTextureAlphaUsed() &&
gstate.getTextureFunction() == GE_TEXFUNC_MODULATE &&
gstate.getColorMask() == 0x000000 &&
gstate.FrameBufFormat() == GE_FORMAT_5551) {
if (v0.color0.a() == 0)
return;
for (int y = pos0.y; y < pos1.y; y++) {
u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride());
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v0.color0;
DrawSinglePixel5551(pixel, prim_color);
pixel++;
}
}
} else {
for (int y = pos0.y; y < pos1.y; y++) {
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v0.color0;
DrawingCoords pos(x, y, z);
DrawSinglePixel<false>(pos, (u16)z, fog, prim_color);
}
}
}
}
}
// Draws triangle, vertices specified in counter-clockwise direction
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2)
{

View file

@ -27,6 +27,7 @@ namespace Rasterizer {
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2);
void DrawPoint(const VertexData &v0);
void DrawLine(const VertexData &v0, const VertexData &v1);
void DrawSprite(const VertexData &v0, const VertexData &v1);
void ClearRectangle(const VertexData &v0, const VertexData &v1);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);

View file

@ -73,8 +73,6 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
},
};
ShaderModule *vshader = draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D);
vdata = draw_->CreateBuffer(sizeof(Vertex) * 4, BufferUsageFlag::DYNAMIC | BufferUsageFlag::VERTEXDATA);
idata = draw_->CreateBuffer(sizeof(int) * 6, BufferUsageFlag::DYNAMIC | BufferUsageFlag::INDEXDATA);
@ -92,6 +90,14 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc
};
texColor = draw_->CreateGraphicsPipeline(pipelineDesc);
PipelineDesc pipelineDescRBSwizzle{
Primitive::TRIANGLE_LIST,
{ draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D), draw_->GetFshaderPreset(FS_TEXTURE_COLOR_2D_RB_SWIZZLE) },
inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc
};
texColorRBSwizzle = draw_->CreateGraphicsPipeline(pipelineDescRBSwizzle);
inputLayout->Release();
depth->Release();
blendstateOff->Release();
@ -122,6 +128,8 @@ void SoftGPU::DeviceRestore() {
SoftGPU::~SoftGPU() {
texColor->Release();
texColor = nullptr;
texColorRBSwizzle->Release();
texColorRBSwizzle = nullptr;
if (fbTex) {
fbTex->Release();
@ -148,12 +156,16 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
GPURecord::NotifyDisplay(framebuf, stride, format);
}
bool g_DarkStalkerStretch;
// Copies RGBA8 data from RAM to the currently bound render target.
void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
if (!draw_)
return;
float u0 = 0.0f;
float u1;
float v0 = 1.0f;
float v1 = 0.0f;
if (fbTex) {
fbTex->Release();
@ -163,6 +175,9 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
// For accuracy, try to handle 0 stride - sometimes used.
if (displayStride_ == 0) {
srcheight = 1;
u1 = 1.0f;
} else {
u1 = (float)srcwidth / displayStride_;
}
Draw::TextureDesc desc{};
@ -172,7 +187,26 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
desc.mipLevels = 1;
desc.tag = "SoftGPU";
bool hasImage = true;
if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {
Draw::Pipeline *pipeline = texColor;
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) {
u8 *data = Memory::GetPointer(0x04088000);
if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// The perfect one.
desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;
} else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// RB swapped, compensate with a shader.
desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
pipeline = texColorRBSwizzle;
}
desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
desc.height = srcheight;
desc.initData.push_back(data);
u0 = 64.5f / 512.0f;
u1 = 447.5f / 512.0f;
v1 = 16.0f / 272.0f;
v0 = 240.0f / 272.0f;
} else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {
hasImage = false;
u1 = 1.0f;
} else if (displayFormat_ == GE_FORMAT_8888) {
@ -181,11 +215,20 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
desc.height = srcheight;
desc.initData.push_back(data);
desc.format = Draw::DataFormat::R8G8B8A8_UNORM;
if (displayStride_ != 0) {
u1 = (float)srcwidth / displayStride_;
} else {
u1 = 1.0f;
} else if (displayFormat_ == GE_FORMAT_5551) {
u8 *data = Memory::GetPointer(displayFramebuf_);
desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// The perfect one.
desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;
} else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// RB swapped, compensate with a shader.
desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
pipeline = texColorRBSwizzle;
}
desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
desc.height = srcheight;
desc.initData.push_back(data);
} else {
// TODO: This should probably be converted in a shader instead..
fbTexBuffer.resize(srcwidth * srcheight);
@ -247,12 +290,10 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
x2 -= 1.0f;
y2 -= 1.0f;
float v0 = 1.0f;
float v1 = 0.0f;
if (GetGPUBackend() == GPUBackend::VULKAN) {
std::swap(v0, v1);
}
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE });
Draw::Viewport viewport = { 0.0f, 0.0f, dstwidth, dstheight, 0.0f, 1.0f };
draw_->SetViewports(1, &viewport);
@ -288,7 +329,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
Draw::VsTexColUB ub{};
memcpy(ub.WorldViewProj, g_display_rot_matrix.m, sizeof(float) * 16);
draw_->BindPipeline(texColor);
draw_->BindPipeline(pipeline);
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
draw_->BindVertexBuffers(0, 1, &vdata, nullptr);
draw_->BindIndexBuffer(idata, 0);

View file

@ -44,6 +44,10 @@ struct FormatBuffer {
inline u32 Get32(int x, int y, int stride) {
return as32[x + y * stride];
}
inline u16 *Get16Ptr(int x, int y, int stride) {
return &as16[x + y * stride];
}
};
class SoftwareDrawEngine;
@ -108,6 +112,7 @@ private:
Draw::Texture *fbTex;
Draw::Pipeline *texColor;
Draw::Pipeline *texColorRBSwizzle;
std::vector<u32> fbTexBuffer;
Draw::SamplerState *samplerNearest = nullptr;

View file

@ -319,8 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
VertexReader vreader(buf, vtxfmt, vertex_type);
const int max_vtcs_per_prim = 3;
static VertexData data[max_vtcs_per_prim];
static VertexData data[4]; // Normally max verts per prim is 3, but we temporarily need 4 to detect rectangles from strips.
// This is the index of the next vert in data (or higher, may need modulus.)
static int data_index = 0;
@ -439,6 +438,62 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
// Don't draw a triangle when loading the first two vertices.
int skip_count = data_index >= 2 ? 0 : 2 - data_index;
// If index count == 4, check if we can convert to a rectangle.
// This is for Darkstalkers (and should speed up many 2D games).
if (vertex_count == 4 && gstate.isModeThrough()) {
for (int vtx = 0; vtx < 4; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
}
else {
vreader.Goto(vtx);
}
data[vtx] = ReadVertex(vreader);
}
// OK, now let's look at data to detect rectangles. There are a few possibilities
// but we focus on Darkstalkers for now.
if (data[0].screenpos.x == data[1].screenpos.x &&
data[0].screenpos.y == data[2].screenpos.y &&
data[2].screenpos.x == data[3].screenpos.x &&
data[1].screenpos.y == data[3].screenpos.y &&
data[1].screenpos.y > data[0].screenpos.y && // Avoid rotation handling
data[2].screenpos.x > data[0].screenpos.x &&
data[0].texturecoords.x == data[1].texturecoords.x &&
data[0].texturecoords.y == data[2].texturecoords.y &&
data[2].texturecoords.x == data[3].texturecoords.x &&
data[1].texturecoords.y == data[3].texturecoords.y &&
data[1].texturecoords.y > data[0].texturecoords.y &&
data[2].texturecoords.x > data[0].texturecoords.x &&
data[0].color0 == data[1].color0 &&
data[1].color0 == data[2].color0 &&
data[2].color0 == data[3].color0) {
// It's a rectangle!
Clipper::ProcessRect(data[0], data[3]);
break;
}
// There's the other vertex order too...
if (data[0].screenpos.x == data[2].screenpos.x &&
data[0].screenpos.y == data[1].screenpos.y &&
data[1].screenpos.x == data[3].screenpos.x &&
data[2].screenpos.y == data[3].screenpos.y &&
data[2].screenpos.y > data[0].screenpos.y && // Avoid rotation handling
data[1].screenpos.x > data[0].screenpos.x &&
data[0].texturecoords.x == data[2].texturecoords.x &&
data[0].texturecoords.y == data[1].texturecoords.y &&
data[1].texturecoords.x == data[3].texturecoords.x &&
data[2].texturecoords.y == data[3].texturecoords.y &&
data[2].texturecoords.y > data[0].texturecoords.y &&
data[1].texturecoords.x > data[0].texturecoords.x &&
data[0].color0 == data[1].color0 &&
data[1].color0 == data[2].color0 &&
data[2].color0 == data[3].color0) {
// It's a rectangle!
Clipper::ProcessRect(data[0], data[3]);
break;
}
}
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);

View file

@ -232,9 +232,6 @@ void EmuScreen::bootGame(const std::string &filename) {
break;
#endif
}
if (g_Config.bSoftwareRendering) {
coreParam.gpuCore = GPUCORE_SOFTWARE;
}
// Preserve the existing graphics context.
coreParam.graphicsContext = PSP_CoreParameter().graphicsContext;

View file

@ -116,6 +116,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m
if (!g_Config.sVulkanDevice.empty())
g_Config.sVulkanDevice = g_Vulkan->GetPhysicalDeviceProperties(deviceNum).properties.deviceName;
}
g_Vulkan->ChooseDevice(deviceNum);
if (g_Vulkan->CreateDevice() != VK_SUCCESS) {
*error_message = g_Vulkan->InitError();

View file

@ -683,3 +683,13 @@ UCET00844 = true
UCUS98705 = true
UCED00971 = true
UCUS98713 = true
[ForceSoftwareRenderer]
# Darkstalkers
ULES00016 = true
ULUS10005 = true
[DarkStalkersPresentHack]
# Darkstalkers
ULES00016 = true
ULUS10005 = true

View file

@ -17,7 +17,7 @@ float pixel_in_dps_y = 1.0f;
float display_hz = 60.0f;
DisplayRotation g_display_rotation;
Lin::Matrix4x4 g_display_rot_matrix;
Lin::Matrix4x4 g_display_rot_matrix = Lin::Matrix4x4::identity();
template<class T>
void RotateRectToDisplayImpl(DisplayRect<T> &rect, T curRTWidth, T curRTHeight) {

View file

@ -57,7 +57,11 @@ public:
empty();
xx=yy=zz=f; ww=1.0f;
}
static Matrix4x4 identity() {
Matrix4x4 id;
id.setIdentity();
return id;
}
void setIdentity() {
setScaling(1.0f);
}

View file

@ -30,6 +30,7 @@ enum class DataFormat : uint8_t {
R5G5B5A1_UNORM_PACK16, // A1 in the LOWER bit
B5G5R5A1_UNORM_PACK16, // A1 in the LOWER bit
A1R5G5B5_UNORM_PACK16, // A1 in the UPPER bit.
A1B5G5R5_UNORM_PACK16, // A1 in the UPPER bit. OpenGL-only.
R16_FLOAT,
R16G16_FLOAT,

View file

@ -146,6 +146,50 @@ static const std::vector<ShaderSource> fsTexCol = {
}
};
static const std::vector<ShaderSource> fsTexColRBSwizzle = {
{ShaderLanguage::GLSL_ES_200,
"#ifdef GL_ES\n"
"precision lowp float;\n"
"#endif\n"
"#if __VERSION__ >= 130\n"
"#define varying in\n"
"#define texture2D texture\n"
"#define gl_FragColor fragColor0\n"
"out vec4 fragColor0;\n"
"#endif\n"
"varying vec4 oColor0;\n"
"varying vec2 oTexCoord0;\n"
"uniform sampler2D Sampler0;\n"
"void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0).zyxw * oColor0; }\n"
},
{ShaderLanguage::HLSL_D3D9,
"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"
"sampler2D Sampler0 : register(s0);\n"
"float4 main(PS_INPUT input) : COLOR0 {\n"
" return input.color * tex2D(Sampler0, input.uv).zyxw;\n"
"}\n"
},
{ShaderLanguage::HLSL_D3D11,
"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"
"SamplerState samp : register(s0);\n"
"Texture2D<float4> tex : register(t0);\n"
"float4 main(PS_INPUT input) : SV_Target {\n"
" float4 col = input.color * tex.Sample(samp, input.uv).bgra;\n"
" return col;\n"
"}\n"
},
{ShaderLanguage::GLSL_VULKAN,
"#version 140\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_ARB_shading_language_420pack : enable\n"
"layout(location = 0) in vec4 oColor0;\n"
"layout(location = 1) in vec2 oTexCoord0;\n"
"layout(location = 0) out vec4 fragColor0\n;"
"layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n"
"void main() { fragColor0 = texture(Sampler0, oTexCoord0).bgra * oColor0; }\n"
}
};
static const std::vector<ShaderSource> fsCol = {
{ ShaderLanguage::GLSL_ES_200,
"#ifdef GL_ES\n"
@ -330,8 +374,9 @@ bool DrawContext::CreatePresets() {
fsPresets_[FS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsTexCol);
fsPresets_[FS_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsCol);
fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE] = CreateShader(this, ShaderStage::FRAGMENT, fsTexColRBSwizzle);
return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D];
return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE];
}
void DrawContext::DestroyPresets() {

View file

@ -146,6 +146,7 @@ enum VertexShaderPreset : int {
enum FragmentShaderPreset : int {
FS_COLOR_2D,
FS_TEXTURE_COLOR_2D,
FS_TEXTURE_COLOR_2D_RB_SWIZZLE,
FS_MAX_PRESET,
};

View file

@ -348,6 +348,10 @@ bool D3D9Texture::Create(const TextureDesc &desc) {
format_ = desc.format;
tex_ = NULL;
d3dfmt_ = FormatToD3DFMT(desc.format);
if (d3dfmt_ == D3DFMT_UNKNOWN) {
return false;
}
HRESULT hr = E_FAIL;
D3DPOOL pool = D3DPOOL_MANAGED;
@ -424,6 +428,7 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d
}
break;
case DataFormat::A4R4G4B4_UNORM_PACK16:
case DataFormat::A1R5G5B5_UNORM_PACK16:
// Native
memcpy(dest, source, width * sizeof(uint16_t));
break;
@ -437,6 +442,10 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d
case DataFormat::B8G8R8A8_UNORM:
memcpy(dest, source, sizeof(uint32_t) * width);
break;
default:
// Unhandled data format copy.
DebugBreak();
break;
}
}
tex_->UnlockRect(level);

View file

@ -277,7 +277,7 @@ bool OpenGLShaderModule::Compile(GLRenderManager *render, ShaderLanguage languag
class OpenGLInputLayout : public InputLayout {
public:
OpenGLInputLayout(GLRenderManager *render) : render_(render), stride(0) {}
OpenGLInputLayout(GLRenderManager *render) : render_(render) {}
~OpenGLInputLayout();
void Compile(const InputLayoutDesc &desc);
@ -286,7 +286,7 @@ public:
}
GLRInputLayout *inputLayout_ = nullptr;
int stride;
int stride = 0;
private:
GLRenderManager *render_;
};
@ -718,6 +718,15 @@ public:
FBColorDepth colorDepth = FBO_8888;
};
// TODO: SSE/NEON optimize, and move to ColorConv.cpp.
void MoveABit(u16 *dest, const u16 *src, size_t count) {
for (int i = 0; i < count; i++) {
u16 data = src[i];
data = (data >> 15) | (data << 1);
dest[i] = data;
}
}
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data) {
if (width != width_ || height != height_ || depth != depth_) {
// When switching to texStorage we need to handle this correctly.
@ -729,12 +738,20 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int
if (stride == 0)
stride = width;
size_t alignment = DataFormatSizeInBytes(format_);
// Make a copy of data with stride eliminated.
uint8_t *texData = new uint8_t[(size_t)(width * height * alignment)];
for (int y = 0; y < height; y++) {
memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment);
// Emulate support for DataFormat::A1R5G5B5_UNORM_PACK16.
if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) {
format_ = DataFormat::R5G5B5A1_UNORM_PACK16;
for (int y = 0; y < height; y++) {
MoveABit((u16 *)(texData + y * width * alignment), (const u16 *)(data + y * stride * alignment), width);
}
} else {
for (int y = 0; y < height; y++) {
memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment);
}
}
render_->TextureImage(tex_, level, width, height, format_, texData);
}
@ -1220,6 +1237,9 @@ uint32_t OpenGLContext::GetDataFormatSupport(DataFormat fmt) const {
case DataFormat::R8G8B8A8_UNORM:
return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS;
case DataFormat::A1R5G5B5_UNORM_PACK16:
return FMT_TEXTURE; // we will emulate this! Very fast to convert from R5G5B5A1_UNORM_PACK16 during upload.
case DataFormat::R32_FLOAT:
case DataFormat::R32G32_FLOAT:
case DataFormat::R32G32B32_FLOAT:

View file

@ -699,6 +699,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
// Gonna have to generate some, which requires TRANSFER_SRC
usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) {
ELOG("Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_);
return false;
@ -1358,6 +1359,8 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const {
return 0;
case DataFormat::A4R4G4B4_UNORM_PACK16:
return 0;
case DataFormat::A1R5G5B5_UNORM_PACK16:
return FMT_RENDERTARGET | FMT_TEXTURE;
case DataFormat::R8G8B8A8_UNORM:
return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT;