Merge remote-tracking branch 'origin/master' into Universal_Audio_Branch

This commit is contained in:
kaienfr 2014-04-14 11:49:27 +02:00
commit cc933d6fc9
25 changed files with 407 additions and 218 deletions

View file

@ -48,15 +48,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
if (NOT MOBILE_DEVICE)
set(USE_FFMPEG ON)
endif()
if (NOT ARM)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_X64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_X64")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_IX86")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_IX86")
endif()
endif()
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
@ -64,6 +55,16 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(USE_FFMPEG ON)
endif()
if (NOT ARM AND NOT MIPS)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_X64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_X64")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_M_IX86")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_M_IX86")
endif()
endif()
if(NOT DEFINED HEADLESS)
set(HEADLESS OFF)
endif()

View file

@ -134,6 +134,11 @@ bool FixPathCase(std::string& basePath, std::string &path, FixPathCaseBehavior b
#endif
DirectoryFileSystem::DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath, int _flags) : basePath(_basePath), flags(_flags) {
File::CreateFullPath(basePath);
hAlloc = _hAlloc;
}
std::string DirectoryFileHandle::GetLocalPath(std::string& basePath, std::string localpath)
{
if (localpath.empty())
@ -326,11 +331,6 @@ void DirectoryFileHandle::Close()
#endif
}
DirectoryFileSystem::DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath) : basePath(_basePath) {
File::CreateFullPath(basePath);
hAlloc = _hAlloc;
}
void DirectoryFileSystem::CloseAll() {
for (auto iter = entries.begin(); iter != entries.end(); ++iter) {
iter->second.hFile.Close();

View file

@ -86,7 +86,7 @@ struct DirectoryFileHandle
class DirectoryFileSystem : public IFileSystem {
public:
DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath);
DirectoryFileSystem(IHandleAllocator *_hAlloc, std::string _basePath, int _flags = 0);
~DirectoryFileSystem();
void CloseAll();
@ -108,6 +108,7 @@ public:
int RenameFile(const std::string &from, const std::string &to);
bool RemoveFile(const std::string &filename);
bool GetHostPath(const std::string &inpath, std::string &outpath);
int Flags() { return flags; }
private:
struct OpenFileEntry {
@ -120,7 +121,7 @@ private:
EntryMap entries;
std::string basePath;
IHandleAllocator *hAlloc;
int flags;
// In case of Windows: Translate slashes, etc.
std::string GetLocalPath(std::string localpath);
};
@ -149,6 +150,7 @@ public:
int RenameFile(const std::string &from, const std::string &to);
bool RemoveFile(const std::string &filename);
bool GetHostPath(const std::string &inpath, std::string &outpath);
int Flags() { return 0; }
private:
struct OpenFileEntry {

View file

@ -50,6 +50,10 @@ enum DevType
PSP_DEV_TYPE_ALIAS = 0x20,
};
enum FileSystemFlags
{
FILESYSTEM_SIMULATE_FAT32 = 1,
};
class IHandleAllocator {
public:
@ -112,6 +116,7 @@ public:
virtual bool GetHostPath(const std::string &inpath, std::string &outpath) = 0;
virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec) = 0;
virtual int DevType(u32 handle) = 0;
virtual int Flags() = 0;
};
@ -133,7 +138,8 @@ public:
virtual bool RemoveFile(const std::string &filename) {return false;}
virtual bool GetHostPath(const std::string &inpath, std::string &outpath) {return false;}
virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec) {return SCE_KERNEL_ERROR_ERRNO_FUNCTION_NOT_SUPPORTED; }
virtual int DevType(u32 handle) {return 0;}
virtual int DevType(u32 handle) { return 0; }
virtual int Flags() { return 0; }
};

View file

@ -41,6 +41,7 @@ public:
bool OwnsHandle(u32 handle);
int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec);
int DevType(u32 handle);
int Flags() { return 0; }
size_t WriteFile(u32 handle, const u8 *pointer, s64 size);
bool GetHostPath(const std::string &inpath, std::string &outpath) {return false;}

View file

@ -255,6 +255,10 @@ std::string MetaFileSystem::NormalizePrefix(std::string prefix) const {
if (startsWith(prefix, "host"))
prefix = "host0:";
// Should we simply make this case insensitive?
if (prefix == "DISC0:")
prefix = "disc0:";
return prefix;
}
@ -284,6 +288,13 @@ void MetaFileSystem::Remount(IFileSystem *oldSystem, IFileSystem *newSystem) {
}
}
IFileSystem *MetaFileSystem::GetSystemFromFilename(const std::string &filename) {
size_t prefixPos = filename.find(':');
if (prefixPos == filename.npos)
return 0;
return GetSystem(filename.substr(0, prefixPos + 1));
}
IFileSystem *MetaFileSystem::GetSystem(const std::string &prefix) {
for (auto it = fileSystems.begin(); it != fileSystems.end(); ++it) {
if (it->prefix == NormalizePrefix(prefix))

View file

@ -54,6 +54,7 @@ public:
void Remount(IFileSystem *oldSystem, IFileSystem *newSystem);
IFileSystem *GetSystem(const std::string &prefix);
IFileSystem *GetSystemFromFilename(const std::string &filename);
void ThreadEnded(int threadID);
@ -106,6 +107,7 @@ public:
virtual bool RemoveFile(const std::string &filename);
virtual int Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen, int &usec);
virtual int DevType(u32 handle);
virtual int Flags() { return 0; }
// Convenience helper - returns < 0 on failure.
int ReadEntireFile(const std::string &filename, std::vector<u8> &data);

View file

@ -40,6 +40,7 @@ public:
int DevType(u32 handle);
bool GetHostPath(const std::string &inpath, std::string &outpath);
std::vector<PSPFileInfo> GetDirListing(std::string path);
int Flags() { return 0; }
// unsupported operations
size_t WriteFile(u32 handle, const u8 *pointer, s64 size);

View file

@ -226,7 +226,7 @@ void __DisplayDoState(PointerWrap &p) {
CoreTiming::RestoreRegisterEvent(afterFlipEvent, "AfterFlip", &hleAfterFlip);
p.Do(gstate);
p.Do(gstate_c);
gstate_c.DoState(p);
#ifndef _XBOX
if (s < 2) {
// This shouldn't have been savestated anyway, but it was.
@ -672,7 +672,8 @@ u32 sceDisplaySetFramebuf(u32 topaddr, int linesize, int pixelformat, int sync)
const int FLIP_DELAY_MIN_FLIPS = 30;
u64 now = CoreTiming::GetTicks();
u64 expected = msToCycles(1000) / g_Config.iForceMaxEmulatedFPS;
// 1001 to account for NTSC timing (59.94 fps.)
u64 expected = msToCycles(1001) / g_Config.iForceMaxEmulatedFPS;
u64 actual = now - lastFlipCycles;
if (actual < expected - FLIP_DELAY_CYCLES_MIN) {
if (lastFlipsTooFrequent >= FLIP_DELAY_MIN_FLIPS) {

View file

@ -457,7 +457,7 @@ void __IoInit() {
asyncNotifyEvent = CoreTiming::RegisterEvent("IoAsyncNotify", __IoAsyncNotify);
syncNotifyEvent = CoreTiming::RegisterEvent("IoSyncNotify", __IoSyncNotify);
memstickSystem = new DirectoryFileSystem(&pspFileSystem, g_Config.memCardDirectory);
memstickSystem = new DirectoryFileSystem(&pspFileSystem, g_Config.memCardDirectory, FILESYSTEM_SIMULATE_FAT32);
#if defined(USING_WIN_UI) || defined(APPLE)
flash0System = new DirectoryFileSystem(&pspFileSystem, g_Config.flash0Directory);
#else
@ -716,7 +716,7 @@ u32 npdrmRead(FileNode *f, u8 *data, int size) {
memcpy(data, pgd->block_buf+offset, copy_size);
block += 1;
offset = 0;
}else{
} else {
copy_size = remain_size;
memcpy(data, pgd->block_buf+offset, copy_size);
}
@ -1925,6 +1925,19 @@ u32 sceIoDopen(const char *path) {
return id;
}
// For some reason strncpy will fill up the entire output buffer. No reason to do that,
// so we use this trivial replacement.
static void strcpy_limit(char *dest, const char *src, int limit) {
int i;
for (i = 0; i < limit - 1; i++) {
if (!src[i])
break;
dest[i] = src[i];
}
// Always null terminate.
dest[i] = 0;
}
u32 sceIoDread(int id, u32 dirent_addr) {
u32 error;
DirListing *dir = kernelObjects.Get<DirListing>(id, error);
@ -1943,8 +1956,15 @@ u32 sceIoDread(int id, u32 dirent_addr) {
strncpy(entry->d_name, info.name.c_str(), 256);
entry->d_name[255] = '\0';
bool isFAT = false;
IFileSystem *sys = pspFileSystem.GetSystemFromFilename(dir->name);
if (sys && (sys->Flags() & FILESYSTEM_SIMULATE_FAT32))
isFAT = true;
else
isFAT = false;
// Only write d_private for memory stick
if (dir->name.substr(0, 3) == "ms0") {
if (isFAT) {
// write d_private for supporting Custom BGM
// ref JPCSP https://code.google.com/p/jpcsp/source/detail?r=3468
if (Memory::IsValidAddress(entry->d_private)){
@ -1952,7 +1972,9 @@ u32 sceIoDread(int id, u32 dirent_addr) {
// d_private is pointing to an area of unknown size
// - [0..12] "8.3" file name (null-terminated), could be empty.
// - [13..???] long file name (null-terminated)
strncpy((char*)Memory::GetPointer(entry->d_private + 13), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name));
// Hm, so currently we don't write the short name at all to d_private? TODO
strcpy_limit((char*)Memory::GetPointer(entry->d_private + 13), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name));
}
else {
// d_private is pointing to an area of total size 1044
@ -1960,8 +1982,9 @@ u32 sceIoDread(int id, u32 dirent_addr) {
// - [4..19] "8.3" file name (null-terminated), could be empty.
// - [20..???] long file name (null-terminated)
auto size = Memory::Read_U32(entry->d_private);
// Hm, so currently we don't write the short name at all to d_private? TODO
if (size >= 1044) {
strncpy((char*)Memory::GetPointer(entry->d_private + 20), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name));
strcpy_limit((char*)Memory::GetPointer(entry->d_private + 20), (const char*)entry->d_name, ARRAY_SIZE(entry->d_name));
}
}
}

View file

@ -428,7 +428,7 @@ void FramebufferManagerDX9::SetRenderFrameBuffer() {
// None found? Create one.
if (!vfb) {
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
vfb = new VirtualFramebufferDX9();
vfb->fbo = 0;
vfb->fb_address = fb_address;
@ -512,7 +512,7 @@ void FramebufferManagerDX9::SetRenderFrameBuffer() {
// Use it as a render target.
DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;

View file

@ -520,7 +520,7 @@ void DIRECTX9_GPU::CopyDisplayToOutputInternal() {
shaderManager_->EndFrame();
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
// Maybe should write this in ASM...
@ -737,7 +737,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_REGION2:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
break;
@ -751,7 +751,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXTUREMAPENABLE:
if (diff)
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
case GE_CMD_LIGHTINGENABLE:
@ -829,7 +829,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_FRAMEBUFPIXFORMAT:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
break;
@ -841,7 +841,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXADDR5:
case GE_CMD_TEXADDR6:
case GE_CMD_TEXADDR7:
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
@ -853,18 +853,18 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXBUFWIDTH5:
case GE_CMD_TEXBUFWIDTH6:
case GE_CMD_TEXBUFWIDTH7:
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
case GE_CMD_CLUTADDR:
case GE_CMD_CLUTADDRUPPER:
case GE_CMD_CLUTFORMAT:
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
// This could be used to "dirty" textures with clut.
break;
case GE_CMD_LOADCLUT:
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
textureCache_.LoadClut();
// This could be used to "dirty" textures with clut.
break;
@ -897,7 +897,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
DoBlockTransfer();
// Fixes Gran Turismo's funky text issue.
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
}
@ -913,7 +913,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXSIZE5:
case GE_CMD_TEXSIZE6:
case GE_CMD_TEXSIZE7:
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
case GE_CMD_ZBUFPTR:
@ -1046,7 +1046,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_VIEWPORTZ2:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
break;
@ -1381,7 +1381,7 @@ void DIRECTX9_GPU::DoState(PointerWrap &p) {
textureCache_.Clear(true);
transformDraw_.ClearTrackedVertexArrays();
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
framebufferManager_.DestroyAllFBOs();
shaderManager_->ClearCache(true);
}

View file

@ -112,11 +112,11 @@ static bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margin = 0.1
void TransformDrawEngineDX9::ApplyDrawState(int prim) {
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
if (gstate_c.textureChanged) {
if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
if (gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
}
gstate_c.textureChanged = false;
gstate_c.textureChanged = TEXCHANGE_UNCHANGED;
}
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a

View file

@ -731,17 +731,22 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
}
}
if (vfb && (drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
// If it's newly wrong, or changing every frame, just keep track.
if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
vfb->newWidth = drawing_width;
vfb->newHeight = drawing_height;
if (vfb) {
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
// If it's newly wrong, or changing every frame, just keep track.
if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
vfb->newWidth = drawing_width;
vfb->newHeight = drawing_height;
vfb->lastFrameNewSize = gpuStats.numFlips;
} else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
// Okay, it's changed for a while (and stayed that way.) Let's start over.
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i);
vfb = NULL;
}
} else {
// It's not different, let's keep track of that too.
vfb->lastFrameNewSize = gpuStats.numFlips;
} else if (vfb->lastFrameNewSize + FBO_OLD_AGE <= gpuStats.numFlips) {
// Okay, it's changed for a while (and stayed that way.) Let's start over.
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i);
vfb = NULL;
}
}
@ -750,7 +755,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
// None found? Create one.
if (!vfb) {
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
vfb = new VirtualFramebuffer();
vfb->fbo = 0;
vfb->fb_address = fb_address;
@ -854,7 +859,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
// Use it as a render target.
DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
@ -1186,7 +1191,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
glEnable(GL_DITHER);
} else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
nvfb->last_frame_render = gpuStats.numFlips;
nvfb->dirtyAfterDisplay = true;
@ -1669,13 +1674,13 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) {
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
fbo_unbind();
currentRenderVfb_ = 0;
bool needUnbind = false;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
fbo_unbind();
currentRenderVfb_ = 0;
vfb->dirtyAfterDisplay = true;
vfb->reallyDirtyAfterDisplay = true;
// TODO: This without the fbo_unbind() above would be better than destroying the FBO.

View file

@ -46,7 +46,7 @@ enum {
FLAG_FLUSHBEFORE = 1,
FLAG_FLUSHBEFOREONCHANGE = 2,
FLAG_EXECUTE = 4, // needs to actually be executed. unused for now.
FLAG_EXECUTEONCHANGE = 8, // unused for now. not sure if checking for this will be more expensive than doing it.
FLAG_EXECUTEONCHANGE = 8,
FLAG_ANY_EXECUTE = 4 | 8,
FLAG_READS_PC = 16,
FLAG_WRITES_PC = 32,
@ -115,7 +115,7 @@ static const CommandTableEntry commandTable[] = {
// These affect the fragment shader so need flushing.
{GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
{GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE},
@ -610,7 +610,7 @@ void GLES_GPU::CopyDisplayToOutputInternal() {
#endif
#endif
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}
// Maybe should write this in ASM...
@ -627,7 +627,7 @@ void GLES_GPU::FastRunLoop(DisplayList &list) {
transformDraw_.Flush();
}
gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0...
if (cmdFlags & FLAG_ANY_EXECUTE) { // (cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
ExecuteOpInternal(op, diff);
}
list.pc += 4;
@ -658,7 +658,7 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
}
inline void GLES_GPU::CheckFlushOp(int cmd, u32 diff) {
u8 cmdFlags = commandFlags_[cmd];
const u8 cmdFlags = commandFlags_[cmd];
if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
if (dumpThisFrame_) {
NOTICE_LOG(G3D, "================ FLUSH ================");
@ -672,7 +672,11 @@ void GLES_GPU::PreExecuteOp(u32 op, u32 diff) {
}
void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
return ExecuteOpInternal(op, diff);
const u8 cmd = op >> 24;
const u8 cmdFlags = commandFlags_[cmd];
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
ExecuteOpInternal(op, diff);
}
}
void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
@ -883,30 +887,26 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_VERTEXTYPE:
if (diff) {
if (!g_Config.bSoftwareSkinning) {
if (!g_Config.bSoftwareSkinning) {
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
} else {
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (data & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
}
}
break;
case GE_CMD_REGION1:
case GE_CMD_REGION2:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
}
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_CLIPENABLE:
@ -918,26 +918,19 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_TEXTUREMAPENABLE:
if (diff)
gstate_c.textureChanged = true;
// Don't need to dirty the texture here, already dirtied at list start/etc.
break;
case GE_CMD_LIGHTINGENABLE:
break;
case GE_CMD_FOGCOLOR:
if (diff)
shaderManager_->DirtyUniform(DIRTY_FOGCOLOR);
shaderManager_->DirtyUniform(DIRTY_FOGCOLOR);
break;
case GE_CMD_FOG1:
if (diff)
shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
break;
case GE_CMD_FOG2:
if (diff)
shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
break;
case GE_CMD_FOGENABLE:
@ -953,39 +946,29 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_TEXSCALEU:
if (diff) {
gstate_c.uv.uScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
gstate_c.uv.uScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_TEXSCALEV:
if (diff) {
gstate_c.uv.vScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
gstate_c.uv.vScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_TEXOFFSETU:
if (diff) {
gstate_c.uv.uOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
gstate_c.uv.uOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_TEXOFFSETV:
if (diff) {
gstate_c.uv.vOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
gstate_c.uv.vOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_SCISSOR1:
case GE_CMD_SCISSOR2:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
}
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
///
@ -996,13 +979,15 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_FRAMEBUFPTR:
case GE_CMD_FRAMEBUFWIDTH:
case GE_CMD_FRAMEBUFPIXFORMAT:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
}
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_TEXADDR0:
gstate_c.textureChanged = TEXCHANGE_UPDATED;
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_TEXADDR1:
case GE_CMD_TEXADDR2:
case GE_CMD_TEXADDR3:
@ -1010,13 +995,13 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_TEXADDR5:
case GE_CMD_TEXADDR6:
case GE_CMD_TEXADDR7:
if (diff) {
gstate_c.textureChanged = true;
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_TEXBUFWIDTH0:
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
case GE_CMD_TEXBUFWIDTH1:
case GE_CMD_TEXBUFWIDTH2:
case GE_CMD_TEXBUFWIDTH3:
@ -1024,15 +1009,11 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_TEXBUFWIDTH5:
case GE_CMD_TEXBUFWIDTH6:
case GE_CMD_TEXBUFWIDTH7:
if (diff) {
gstate_c.textureChanged = true;
}
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_CLUTFORMAT:
if (diff) {
gstate_c.textureChanged = true;
}
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
// This could be used to "dirty" textures with clut.
break;
@ -1042,15 +1023,13 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break;
case GE_CMD_LOADCLUT:
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
textureCache_.LoadClut();
// This could be used to "dirty" textures with clut.
break;
case GE_CMD_TEXMAPMODE:
if (diff) {
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
break;
case GE_CMD_TEXSHADELS:
@ -1075,21 +1054,21 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
DoBlockTransfer();
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
}
case GE_CMD_TEXSIZE0:
// Render to texture may have overridden the width/height.
// Don't reset it unless the size is different / the texture has changed.
if (diff || gstate_c.textureChanged) {
if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
// We will need to reset the texture now.
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
//fall thru - ignoring the mipmap sizes for now
break;
case GE_CMD_TEXSIZE1:
case GE_CMD_TEXSIZE2:
@ -1098,9 +1077,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_TEXSIZE5:
case GE_CMD_TEXSIZE6:
case GE_CMD_TEXSIZE7:
if (diff) {
gstate_c.textureChanged = true;
}
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_ZBUFPTR:
@ -1109,30 +1086,25 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_AMBIENTCOLOR:
case GE_CMD_AMBIENTALPHA:
if (diff)
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
shaderManager_->DirtyUniform(DIRTY_AMBIENT);
break;
case GE_CMD_MATERIALDIFFUSE:
if (diff)
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
break;
case GE_CMD_MATERIALEMISSIVE:
if (diff)
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
break;
case GE_CMD_MATERIALAMBIENT:
case GE_CMD_MATERIALALPHA:
if (diff)
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
break;
case GE_CMD_MATERIALSPECULAR:
case GE_CMD_MATERIALSPECULARCOEF:
if (diff)
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
break;
case GE_CMD_LIGHTTYPE0:
@ -1145,7 +1117,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
if (diff) {
{
int n = cmd - GE_CMD_LX0;
int l = n / 3;
int c = n % 3;
@ -1158,7 +1130,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1:
case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
if (diff) {
{
int n = cmd - GE_CMD_LDX0;
int l = n / 3;
int c = n % 3;
@ -1171,7 +1143,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1:
case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2:
case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3:
if (diff) {
{
int n = cmd - GE_CMD_LKA0;
int l = n / 3;
int c = n % 3;
@ -1184,7 +1156,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LKS1:
case GE_CMD_LKS2:
case GE_CMD_LKS3:
if (diff) {
{
int l = cmd - GE_CMD_LKS0;
gstate_c.lightspotCoef[l] = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_LIGHT0 << l);
@ -1195,7 +1167,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LKO1:
case GE_CMD_LKO2:
case GE_CMD_LKO3:
if (diff) {
{
int l = cmd - GE_CMD_LKO0;
gstate_c.lightangle[l] = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_LIGHT0 << l);
@ -1205,7 +1177,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3:
case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3:
case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3:
if (diff) {
{
float r = (float)(data & 0xff) * (1.0f / 255.0f);
float g = (float)((data >> 8) & 0xff) * (1.0f / 255.0f);
float b = (float)(data >> 16) * (1.0f / 255.0f);
@ -1225,10 +1197,8 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_VIEWPORTY2:
case GE_CMD_VIEWPORTZ1:
case GE_CMD_VIEWPORTZ2:
if (diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged = true;
}
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
case GE_CMD_LIGHTENABLE0:
@ -1280,28 +1250,29 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
if (((data >> 16) & 0xFF) != 0xFF && (data & 7) > 1)
WARN_LOG_REPORT_ONCE(alphatestmask, G3D, "Unsupported alphatest mask: %02x", (data >> 16) & 0xFF);
#endif
// Intentional fallthrough - we still need to dirty DIRTY_ALPHACOLORREF for GE_CMD_ALPHATEST.
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
break;
case GE_CMD_COLORREF:
if (diff)
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
break;
case GE_CMD_TEXENVCOLOR:
if (diff)
shaderManager_->DirtyUniform(DIRTY_TEXENV);
shaderManager_->DirtyUniform(DIRTY_TEXENV);
break;
case GE_CMD_TEXFUNC:
case GE_CMD_TEXFLUSH:
break;
case GE_CMD_TEXMODE:
case GE_CMD_TEXFORMAT:
gstate_c.textureChanged = TEXCHANGE_UPDATED;
break;
case GE_CMD_TEXMODE:
case GE_CMD_TEXFILTER:
case GE_CMD_TEXWRAP:
if (diff)
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
//////////////////////////////////////////////////////////////////
@ -1321,8 +1292,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_MORPHWEIGHT5:
case GE_CMD_MORPHWEIGHT6:
case GE_CMD_MORPHWEIGHT7:
if (diff)
gstate_c.morphWeights[cmd - GE_CMD_MORPHWEIGHT0] = getFloat24(data);
gstate_c.morphWeights[cmd - GE_CMD_MORPHWEIGHT0] = getFloat24(data);
break;
case GE_CMD_DITH0:
@ -1581,8 +1551,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
#endif
case GE_CMD_TEXLEVEL:
if (diff)
gstate_c.textureChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
break;
//////////////////////////////////////////////////////////////////
@ -1591,9 +1560,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
case GE_CMD_STENCILTEST:
// Handled in StateMapping.
if (diff) {
shaderManager_->DirtyUniform(DIRTY_STENCILREPLACEVALUE);
}
shaderManager_->DirtyUniform(DIRTY_STENCILREPLACEVALUE);
break;
case GE_CMD_STENCILTESTENABLE:
@ -1731,14 +1698,22 @@ void GLES_GPU::DoBlockTransfer() {
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers...
// GetPointerUnchecked crash in windows 64 bit of issue 2301
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
if (srcStride == dstStride && width == srcStride) {
// Common case in God of War, let's do it all in one chunk.
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
memcpy(dst, src, width * height * bpp);
} else {
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
}
}
// TODO: Notify all overlapping FBOs that they need to reload.
@ -1824,7 +1799,7 @@ void GLES_GPU::DoState(PointerWrap &p) {
textureCache_.Clear(true);
transformDraw_.ClearTrackedVertexArrays();
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
framebufferManager_.DestroyAllFBOs();
}
}

View file

@ -166,11 +166,9 @@ static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margi
void TransformDrawEngine::ApplyDrawState(int prim) {
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
if (gstate_c.textureChanged && !gstate.isModeClear()) {
if (gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
}
gstate_c.textureChanged = false;
if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
gstate_c.textureChanged = TEXCHANGE_UNCHANGED;
}
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a

View file

@ -56,6 +56,14 @@
extern int g_iNumVideos;
static inline bool UseBGRA8888() {
// TODO: Other platforms? May depend on vendor which is faster?
#ifdef _WIN32
return gl_extensions.EXT_bgra;
#endif
return false;
}
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
lastBoundTexture = -1;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
@ -138,16 +146,15 @@ void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) {
// They could invalidate inside the texture, let's just give a bit of leeway.
const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
u64 startKey = addr - LARGEST_TEXTURE_SIZE;
u64 endKey = addr + size + LARGEST_TEXTURE_SIZE;
u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32;
u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32;
for (TexCache::iterator iter = cache.lower_bound(startKey), end = cache.upper_bound(endKey); iter != end; ++iter) {
u32 texAddr = iter->second.addr;
u32 texEnd = iter->second.addr + iter->second.sizeInRAM;
if (texAddr < addr_end && addr < texEnd) {
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
// Clear status -> STATUS_HASHING.
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
if (type != GPU_INVALIDATE_ALL) {
gpuStats.numTextureInvalidations++;
@ -168,9 +175,8 @@ void TextureCache::InvalidateAll(GPUInvalidationType /*unused*/) {
}
for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) {
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
// Clear status -> STATUS_HASHING.
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
if (!iter->second.framebuffer) {
iter->second.invalidHint++;
@ -635,7 +641,35 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n
}
break;
default:
{
if (UseBGRA8888()) {
#ifdef _M_SSE
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
__m128i *srcp = (__m128i *)src;
__m128i *dstp = (__m128i *)dst;
const int sseChunks = numPixels / 4;
for (int i = 0; i < sseChunks; ++i) {
__m128i c = _mm_load_si128(&srcp[i]);
__m128i rb = _mm_andnot_si128(maskGA, c);
c = _mm_and_si128(c, maskGA);
__m128i b = _mm_srli_epi32(rb, 16);
__m128i r = _mm_slli_epi32(rb, 16);
c = _mm_or_si128(_mm_or_si128(c, r), b);
_mm_store_si128(&dstp[i], c);
}
// The remainder starts right after those done via SSE.
int i = sseChunks * 4;
#else
int i = 0;
#endif
for (; i < numPixels; i++) {
u32 c = src[i];
dst[i] = ((c >> 16) & 0x000000FF) |
((c >> 0) & 0xFF00FF00) |
((c << 16) & 0x00FF0000);
}
} else {
// No need to convert RGBA8888, right order already
if (dst != src)
memcpy(dst, src, numPixels * sizeof(u32));
@ -737,7 +771,7 @@ void TextureCache::UpdateCurrentClut() {
clutHash_ = DoReliableHash((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
// Avoid a copy when we don't need to convert colors.
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
if (UseBGRA8888() || clutFormat != GE_CMODE_32BIT_ABGR8888) {
ConvertColors(clutBufConverted_, clutBufRaw_, getClutDestFormat(clutFormat), clutExtendedBytes / sizeof(u16));
clutBuf_ = clutBufConverted_;
} else {
@ -921,9 +955,19 @@ void TextureCache::SetTexture(bool force) {
}
}
bool rehash = (entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE;
bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE;
bool doDelete = true;
// First let's see if another texture with the same address had a hashfail.
if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) {
// Always rehash in this case, if one changed the rest all probably did.
rehash = true;
entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK;
} else if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) {
// Okay, just some parameter change - the data didn't change, no need to rehash.
rehash = false;
}
if (match) {
if (entry->lastFrame != gpuStats.numFlips) {
u32 diff = gpuStats.numFlips - entry->lastFrame;
@ -956,14 +1000,14 @@ void TextureCache::SetTexture(bool force) {
rehash = false;
}
if (rehash && (entry->status & TexCacheEntry::STATUS_MASK) != TexCacheEntry::STATUS_RELIABLE) {
if (rehash && entry->GetHashStatus() != TexCacheEntry::STATUS_RELIABLE) {
fullhash = QuickTexHash(texaddr, bufw, w, h, format);
if (fullhash != entry->fullhash) {
hashFail = true;
} else if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
} else if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
// Reset to STATUS_HASHING.
if (g_Config.bTextureBackoffCache) {
entry->status &= ~TexCacheEntry::STATUS_MASK;
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
}
}
@ -1009,7 +1053,7 @@ void TextureCache::SetTexture(bool force) {
if (entry->texture != lastBoundTexture) {
glBindTexture(GL_TEXTURE_2D, entry->texture);
lastBoundTexture = entry->texture;
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
}
UpdateSamplingParams(*entry, false);
VERBOSE_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr);
@ -1031,8 +1075,19 @@ void TextureCache::SetTexture(bool force) {
}
}
// Clear the reliable bit if set.
if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
entry->status &= ~TexCacheEntry::STATUS_MASK;
if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
// Also, mark any textures with the same address but different clut. They need rechecking.
if (cluthash != 0) {
const u64 cachekeyMin = (u64)texaddr << 32;
const u64 cachekeyMax = (u64)(texaddr + 1) << 32;
for (auto it = cache.lower_bound(cachekeyMin), end = cache.upper_bound(cachekeyMax); it != end; ++it) {
if (it->second.cluthash != cluthash) {
it->second.status |= TexCacheEntry::STATUS_CLUT_RECHECK;
}
}
}
}
} else {
@ -1229,7 +1284,7 @@ void TextureCache::SetTexture(bool force) {
//glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
}
GLenum TextureCache::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
@ -1362,18 +1417,23 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
if (!gstate.isTextureSwizzled()) {
// Special case: if we don't need to deal with packing, we don't need to copy.
if ((g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) || w == bufw) {
finalBuf = (void *)texptr;
if (UseBGRA8888()) {
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
} else {
finalBuf = (void *)texptr;
}
} else {
int len = bufw * h;
tmpTexBuf32.resize(std::max(bufw, w) * h);
tmpTexBufRearrange.resize(std::max(bufw, w) * h);
memcpy(tmpTexBuf32.data(), texptr, len * sizeof(u32));
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
}
}
else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
finalBuf = UnswizzleFromMem(texptr, bufw, 4, level);
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}
break;
@ -1393,6 +1453,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
}
}
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
w = (w + 3) & ~3;
}
break;
@ -1414,6 +1475,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
}
w = (w + 3) & ~3;
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}
break;
@ -1434,6 +1496,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
}
w = (w + 3) & ~3;
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
}
break;
@ -1537,19 +1600,17 @@ void TextureCache::CheckAlpha(TexCacheEntry &entry, u32 *pixelData, GLenum dstFm
}
if (hitSomeAlpha != 0)
entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN;
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
else if (hitZeroAlpha != 0)
entry.status |= TexCacheEntry::STATUS_ALPHA_SIMPLE;
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_SIMPLE);
else
entry.status |= TexCacheEntry::STATUS_ALPHA_FULL;
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_FULL);
}
void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, GLenum dstFmt) {
// TODO: only do this once
u32 texByteAlign = 1;
// TODO: Look into using BGRA for 32-bit textures when the GL_EXT_texture_format_BGRA8888 extension is available, as it's faster than RGBA on some chips.
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
int bufw;
void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt, &bufw);
@ -1578,20 +1639,27 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
CheckAlpha(entry, pixelData, dstFmt, useUnpack ? bufw : w, w, h);
else
entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN;
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA;
GLuint components2 = components;
#if defined(MAY_HAVE_GLES3)
if (UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE) {
components2 = GL_BGRA_EXT;
}
#endif
if (replaceImages) {
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, w, h, components, dstFmt, pixelData);
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, w, h, components2, dstFmt, pixelData);
} else {
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components, dstFmt, pixelData);
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components2, dstFmt, pixelData);
GLenum err = glGetError();
if (err == GL_OUT_OF_MEMORY) {
lowMemoryMode_ = true;
Decimate();
// Try again.
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components, dstFmt, pixelData);
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components2, dstFmt, pixelData);
}
}

View file

@ -87,6 +87,7 @@ private:
STATUS_ALPHA_MASK = 0x0c,
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.)
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
};
// Status, but int so we can zero initialize.
@ -116,6 +117,18 @@ private:
bool sClamp;
bool tClamp;
Status GetHashStatus() {
return Status(status & STATUS_MASK);
}
void SetHashStatus(Status newStatus) {
status = (status & ~STATUS_MASK) | newStatus;
}
Status GetAlphaStatus() {
return Status(status & STATUS_ALPHA_MASK);
}
void SetAlphaStatus(Status newStatus) {
status = (status & ~STATUS_ALPHA_MASK) | newStatus;
}
bool Matches(u16 dim2, u8 format2, int maxLevel2);
};

View file

@ -648,7 +648,7 @@ void GPUCommon::ProcessDLQueueInternal() {
UpdateTickEstimate(std::max(busyTicks, startingTicks + cyclesExecuted));
// Game might've written new texture data.
gstate_c.textureChanged = true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
// Seems to be correct behaviour to process the list anyway?
if (startingTicks < busyTicks) {

View file

@ -27,6 +27,7 @@
#include "GPU/Directx9/helper/global.h"
#include "GPU/Directx9/GPU_DX9.h"
#endif
#include "Common/ChunkFile.h"
#include "Core/CoreParameter.h"
#include "Core/Config.h"
#include "Core/System.h"
@ -249,3 +250,73 @@ bool vertTypeIsSkinningEnabled(u32 vertType) {
else
return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
}
struct GPUStateCache_v0
{
u32 vertexAddr;
u32 indexAddr;
u32 offsetAddr;
bool textureChanged;
bool textureFullAlpha;
bool vertexFullAlpha;
bool framebufChanged;
int skipDrawReason;
UVScale uv;
bool flipTexture;
};
void GPUStateCache::DoState(PointerWrap &p) {
auto s = p.Section("GPUStateCache", 0, 1);
if (!s) {
// Old state, this was not versioned.
GPUStateCache_v0 old;
p.Do(old);
vertexAddr = old.vertexAddr;
indexAddr = old.indexAddr;
offsetAddr = old.offsetAddr;
textureChanged = TEXCHANGE_UPDATED;
textureFullAlpha = old.textureFullAlpha;
vertexFullAlpha = old.vertexFullAlpha;
framebufChanged = old.framebufChanged;
skipDrawReason = old.skipDrawReason;
uv = old.uv;
flipTexture = old.flipTexture;
} else {
p.Do(vertexAddr);
p.Do(indexAddr);
p.Do(offsetAddr);
p.Do(textureChanged);
p.Do(textureFullAlpha);
p.Do(vertexFullAlpha);
p.Do(framebufChanged);
p.Do(skipDrawReason);
p.Do(uv);
p.Do(flipTexture);
}
p.Do(lightpos);
p.Do(lightdir);
p.Do(lightatt);
p.Do(lightColor);
p.Do(lightangle);
p.Do(lightspotCoef);
p.Do(morphWeights);
p.Do(curTextureWidth);
p.Do(curTextureHeight);
p.Do(actualTextureHeight);
p.Do(vpWidth);
p.Do(vpHeight);
p.Do(curRTWidth);
p.Do(curRTHeight);
}

View file

@ -23,6 +23,8 @@
#include "ge_constants.h"
#include "Common/Common.h"
class PointerWrap;
// PSP uses a curious 24-bit float - it's basically the top 24 bits of a regular IEEE754 32-bit float.
// This is used for light positions, transform matrices, you name it.
inline float getFloat24(unsigned int data)
@ -432,6 +434,12 @@ struct UVScale {
float uOff, vOff;
};
enum TextureChangeReason {
TEXCHANGE_UNCHANGED = 0x00,
TEXCHANGE_UPDATED = 0x01,
TEXCHANGE_PARAMSONLY = 0x02,
};
struct GPUStateCache
{
u32 vertexAddr;
@ -439,7 +447,7 @@ struct GPUStateCache
u32 offsetAddr;
bool textureChanged;
u8 textureChanged;
bool textureFullAlpha;
bool vertexFullAlpha;
bool framebufChanged;
@ -468,6 +476,7 @@ struct GPUStateCache
u32 curRTHeight;
u32 getRelativeAddress(u32 data) const;
void DoState(PointerWrap &p);
};
// TODO: Implement support for these.

View file

@ -214,7 +214,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) {
break;
case GE_CMD_TEXADDR0:
gstate_c.textureChanged=true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
case GE_CMD_TEXADDR1:
case GE_CMD_TEXADDR2:
case GE_CMD_TEXADDR3:
@ -226,7 +226,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) {
break;
case GE_CMD_TEXBUFWIDTH0:
gstate_c.textureChanged=true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
case GE_CMD_TEXBUFWIDTH1:
case GE_CMD_TEXBUFWIDTH2:
case GE_CMD_TEXBUFWIDTH3:
@ -314,7 +314,7 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) {
}
case GE_CMD_TEXSIZE0:
gstate_c.textureChanged=true;
gstate_c.textureChanged = TEXCHANGE_UPDATED;
gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf);
gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf);
//fall thru - ignoring the mipmap sizes for now

View file

@ -1314,12 +1314,12 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData&
int range = (maxY - minY) / 16 + 1;
if (gstate.isModeClear()) {
if (range >= 24)
if (range >= 24 && (maxX - minX) >= 24 * 16)
GlobalThreadPool::Loop(std::bind(&DrawTriangleSlice<true>, v0, v1, v2, minX, minY, maxX, maxY, placeholder::_1, placeholder::_2), 0, range);
else
DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, 0, range);
} else {
if (range >= 24)
if (range >= 24 && (maxX - minX) >= 24 * 16)
GlobalThreadPool::Loop(std::bind(&DrawTriangleSlice<false>, v0, v1, v2, minX, minY, maxX, maxY, placeholder::_1, placeholder::_2), 0, range);
else
DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, 0, range);

View file

@ -138,6 +138,8 @@ void EmuScreen::bootComplete() {
const char *renderer = (const char*)glGetString(GL_RENDERER);
if (strstr(renderer, "Chainfire3D") != 0) {
osm.Show(s->T("Chainfire3DWarning", "WARNING: Chainfire3D detected, may cause problems"), 10.0f, 0xFF30a0FF, -1, true);
} else if (strstr(renderer, "GLTools") != 0) {
osm.Show(s->T("GLToolsWarning", "WARNING: GLTools detected, may cause problems"), 10.0f, 0xFF30a0FF, -1, true);
}
System_SendMessage("event", "startgame");

2
native

@ -1 +1 @@
Subproject commit dc1e691ea8e70778d6559366d3e6372b31fe0bf5
Subproject commit 980e6eaca6e51e07c0ea5b38bce71499f10acc21