// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0 or later versions. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #ifdef _WIN32 #define SHADERLOG #endif #include #include "helper/global.h" #include "math/lin/matrix4x4.h" #include "Common/Common.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" #include "GPU/ge_constants.h" #include "GPU/Directx9/ShaderManager.h" #include "GPU/Directx9/TransformPipeline.h" #include "UI/OnScreenDisplay.h" #include "Framebuffer.h" // For matrices convertions #include PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) { source_ = code; #ifdef SHADERLOG OutputDebugString(code); #endif bool success; success = CompilePixelShader(code, &shader, &constant); if (!success) { failed_ = true; shader = NULL; } else { DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); } } PSShader::~PSShader() { pD3Ddevice->SetPixelShader(NULL); if (shader) shader->Release(); } VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) { source_ = code; #ifdef SHADERLOG OutputDebugString(code); #endif bool success; success = CompileVertexShader(code, &shader, &constant); if (!success) { failed_ = true; shader = NULL; } else { DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); } } VSShader::~VSShader() { pD3Ddevice->SetVertexShader(NULL); if (shader) shader->Release(); } // Helper D3DXHANDLE LinkedShader::GetConstantByName(LPCSTR pName) { D3DXHANDLE ret = NULL; if ((ret = m_fs->constant->GetConstantByName(NULL, pName)) != NULL) { } else if ((ret = m_vs->constant->GetConstantByName(NULL, pName)) != NULL) {} return ret; } LinkedShader::LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform) :dirtyUniforms(0), useHWTransform_(useHWTransform) { INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader); m_vs = vs; m_fs = fs; u_tex = GetConstantByName("tex"); u_proj = GetConstantByName("u_proj"); u_proj_through = GetConstantByName("u_proj_through"); u_texenv = GetConstantByName("u_texenv"); u_fogcolor = GetConstantByName("u_fogcolor"); u_fogcoef = GetConstantByName("u_fogcoef"); u_alphacolorref = GetConstantByName("u_alphacolorref"); u_colormask = GetConstantByName("u_colormask"); // Transform u_view = GetConstantByName("u_view"); u_world = GetConstantByName("u_world"); u_texmtx = GetConstantByName("u_texmtx"); if (gstate.getWeightMask() != 0) numBones = TranslateNumBones(gstate.getNumBoneWeights()); else numBones = 0; #ifdef USE_BONE_ARRAY u_bone = glGetUniformLocation(program, "u_bone"); #else for (int i = 0; i < 8; i++) { char name[10]; sprintf(name, "u_bone%i", i); u_bone[i] = GetConstantByName(name); } #endif // Lighting, texturing u_ambient = GetConstantByName("u_ambient"); u_matambientalpha = GetConstantByName("u_matambientalpha"); u_matdiffuse = GetConstantByName("u_matdiffuse"); u_matspecular = GetConstantByName("u_matspecular"); u_matemissive = GetConstantByName("u_matemissive"); u_uvscaleoffset = GetConstantByName("u_uvscaleoffset"); for (int i = 0; i < 4; i++) { char temp[64]; sprintf(temp, "u_lightpos%i", i); u_lightpos[i] = GetConstantByName(temp); sprintf(temp, "u_lightdir%i", i); u_lightdir[i] = GetConstantByName(temp); sprintf(temp, "u_lightatt%i", i); u_lightatt[i] = GetConstantByName(temp); sprintf(temp, "u_lightangle%i", i); u_lightangle[i] = GetConstantByName(temp); sprintf(temp, "u_lightspotCoef%i", i); u_lightspotCoef[i] = GetConstantByName(temp); sprintf(temp, "u_lightambient%i", i); u_lightambient[i] = GetConstantByName(temp); sprintf(temp, "u_lightdiffuse%i", i); u_lightdiffuse[i] = GetConstantByName(temp); sprintf(temp, "u_lightspecular%i", i); u_lightspecular[i] = GetConstantByName(temp); } /* a_position = glGetAttribLocation(program, "a_position"); a_color0 = glGetAttribLocation(program, "a_color0"); a_color1 = glGetAttribLocation(program, "a_color1"); a_texcoord = glGetAttribLocation(program, "a_texcoord"); a_normal = glGetAttribLocation(program, "a_normal"); a_weight0123 = glGetAttribLocation(program, "a_w1"); a_weight4567 = glGetAttribLocation(program, "a_w2"); */ //glUseProgram(program); pD3Ddevice->SetPixelShader(fs->shader); pD3Ddevice->SetVertexShader(vs->shader); // Default uniform values //glUniform1i(u_tex, 0); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL; use(); } LinkedShader::~LinkedShader() { // glDeleteProgram(program); } void LinkedShader::SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len) { if (m_fs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len) == D3D_OK); else m_vs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len); } void LinkedShader::SetFloat(D3DXHANDLE uniform, float value) { if (m_fs->constant->SetFloat(pD3Ddevice, uniform, value) == D3D_OK); else m_vs->constant->SetFloat(pD3Ddevice, uniform, value); } // Utility void LinkedShader::SetColorUniform3(D3DXHANDLE uniform, u32 color) { const float col[3] = { ((color & 0xFF)) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF0000) >> 16) / 255.0f }; SetFloatArray(uniform, col, 4); } void LinkedShader::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha) { const float col[4] = { ((color & 0xFF)) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF0000) >> 16) / 255.0f, alpha/255.0f }; SetFloatArray(uniform, col, 4); } void LinkedShader::SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 alpha) { if (1) { const float col[4] = { (float)((color & 0xFF)) * (1.0f / 255.0f), (float)((color & 0xFF00) >> 8) * (1.0f / 255.0f), (float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f), (float)alpha * (1.0f / 255.0f) }; SetFloatArray(uniform, col, 4); } else { const float col[4] = { (float)((color & 0xFF)) , (float)((color & 0xFF00) >> 8) , (float)((color & 0xFF0000) >> 16) , (float)alpha }; SetFloatArray(uniform, col, 4); } } void LinkedShader::SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra) { const float col[4] = { ((color & 0xFF)) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF0000) >> 16) / 255.0f, extra }; SetFloatArray(uniform, col, 4); } static void ConvertMatrix4x3To4x4(const float *m4x3, float *m4x4) { m4x4[0] = m4x3[0]; m4x4[1] = m4x3[1]; m4x4[2] = m4x3[2]; m4x4[3] = 0.0f; m4x4[4] = m4x3[3]; m4x4[5] = m4x3[4]; m4x4[6] = m4x3[5]; m4x4[7] = 0.0f; m4x4[8] = m4x3[6]; m4x4[9] = m4x3[7]; m4x4[10] = m4x3[8]; m4x4[11] = 0.0f; m4x4[12] = m4x3[9]; m4x4[13] = m4x3[10]; m4x4[14] = m4x3[11]; m4x4[15] = 1.0f; } // Utility void LinkedShader::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) { float m4x4[16]; ConvertMatrix4x3To4x4(m4x3, m4x4); if (m_vs->constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4) == D3D_OK); else m_fs->constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4); } void LinkedShader::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) { D3DXMATRIX * pDxMat = (D3DXMATRIX*)pMatrix; if (m_vs->constant->SetMatrix(pD3Ddevice, uniform, pDxMat) == D3D_OK); else m_fs->constant->SetMatrix(pD3Ddevice, uniform, pDxMat); } // Depth in ogl is between -1;1 we need between 0;1 static void ConvertMatrices(Matrix4x4 & in) { /* in.zz *= 0.5f; in.wz += 1.f; */ Matrix4x4 s; Matrix4x4 t; s.setScaling(Vec3(1, 1, 0.5f)); t.setTranslation(Vec3(0, 0, 0.5f)); in = in * s; in = in * t; } void LinkedShader::use() { updateUniforms(); pD3Ddevice->SetPixelShader(m_fs->shader); pD3Ddevice->SetVertexShader(m_vs->shader); } void LinkedShader::stop() { } void LinkedShader::updateUniforms() { if (!dirtyUniforms) return; // Update any dirty uniforms before we draw if (u_proj != 0 && (dirtyUniforms & DIRTY_PROJMATRIX)) { Matrix4x4 flippedMatrix; memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); if (gstate_c.vpHeight < 0) { flippedMatrix[5] = -flippedMatrix[5]; flippedMatrix[13] = -flippedMatrix[13]; } if (gstate_c.vpWidth < 0) { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[12] = -flippedMatrix[12]; } // Convert matrices ! ConvertMatrices(flippedMatrix); SetMatrix(u_proj, flippedMatrix.getReadPtr()); } if (u_proj_through != 0 && (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX)) { Matrix4x4 proj_through; proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); // Convert matrices ! ConvertMatrices(proj_through); SetMatrix(u_proj_through, proj_through.getReadPtr()); } if (u_texenv != 0 && (dirtyUniforms & DIRTY_TEXENV)) { SetColorUniform3(u_texenv, gstate.texenvcolor); } if (u_alphacolorref != 0 && (dirtyUniforms & DIRTY_ALPHACOLORREF)) { SetColorUniform3Alpha255(u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef()); } if (u_colormask != 0 && (dirtyUniforms & DIRTY_COLORMASK)) { SetColorUniform3(u_colormask, gstate.colormask); } if (u_fogcolor != 0 && (dirtyUniforms & DIRTY_FOGCOLOR)) { SetColorUniform3(u_fogcolor, gstate.fogcolor); } if (u_fogcoef != 0 && (dirtyUniforms & DIRTY_FOGCOEF)) { const float fogcoef[2] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2), }; SetFloatArray(u_fogcoef, fogcoef, 2); } // Texturing if (u_uvscaleoffset != 0 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) { float uvscaleoff[4]; if (gstate.isModeThrough()) { // We never get here because we don't use HW transform with through mode. // Although - why don't we? uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth; uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight; uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth; uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight; } else { int w = gstate.getTextureWidth(0); int h = gstate.getTextureHeight(0); float widthFactor = (float)w / (float)gstate_c.curTextureWidth; float heightFactor = (float)h / (float)gstate_c.curTextureHeight; // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN) { static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } else { uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; } } SetFloatArray(u_uvscaleoffset, uvscaleoff, 4); } // Transform if (u_world != 0 && (dirtyUniforms & DIRTY_WORLDMATRIX)) { SetMatrix4x3(u_world, gstate.worldMatrix); } if (u_view != 0 && (dirtyUniforms & DIRTY_VIEWMATRIX)) { SetMatrix4x3(u_view, gstate.viewMatrix); } if (u_texmtx != 0 && (dirtyUniforms & DIRTY_TEXMATRIX)) { SetMatrix4x3(u_texmtx, gstate.tgenMatrix); } // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != 0) { float allBones[8 * 16]; bool allDirty = true; for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, allBones + 16 * i); } else { allDirty = false; } } if (allDirty) { // Set them all with one call glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); } else { // Set them one by one. Could try to coalesce two in a row etc but too lazy. for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); } } } } #else float bonetemp[16]; for (int i = 0; i < numBones; i++) { if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, bonetemp); if (u_bone[i] != 0) SetMatrix(u_bone[i], bonetemp); } } #endif // Lighting if (u_ambient != 0 && (dirtyUniforms & DIRTY_AMBIENT)) { SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA()); } if (u_matambientalpha != 0 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) { SetColorUniform3Alpha(u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA()); } if (u_matdiffuse != 0 && (dirtyUniforms & DIRTY_MATDIFFUSE)) { SetColorUniform3(u_matdiffuse, gstate.materialdiffuse); } if (u_matemissive != 0 && (dirtyUniforms & DIRTY_MATEMISSIVE)) { SetColorUniform3(u_matemissive, gstate.materialemissive); } if (u_matspecular != 0 && (dirtyUniforms & DIRTY_MATSPECULAR)) { SetColorUniform3ExtraFloat(u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize float x = gstate_c.lightpos[i][0]; float y = gstate_c.lightpos[i][1]; float z = gstate_c.lightpos[i][2]; float len = sqrtf(x*x+y*y+z*z); if (len == 0.0f) len = 1.0f; else len = 1.0f / len; float vec[3] = { x * len, y * len, z * len }; if (u_lightpos[i] != 0) SetFloatArray(u_lightpos[i], vec, 3); } else { if (u_lightpos[i] != 0) SetFloatArray(u_lightpos[i], gstate_c.lightpos[i], 3); } if (u_lightdir[i] != 0) SetFloatArray(u_lightdir[i], gstate_c.lightdir[i], 3); if (u_lightatt[i] != 0) SetFloatArray(u_lightatt[i], gstate_c.lightatt[i], 3); if (u_lightangle[i] != 0) SetFloat(u_lightangle[i], gstate_c.lightangle[i]); if (u_lightspotCoef[i] != 0) SetFloat(u_lightspotCoef[i], gstate_c.lightspotCoef[i]); if (u_lightambient[i] != 0) SetFloatArray(u_lightambient[i], gstate_c.lightColor[0][i], 3); if (u_lightdiffuse[i] != 0) SetFloatArray(u_lightdiffuse[i], gstate_c.lightColor[1][i], 3); if (u_lightspecular[i] != 0) SetFloatArray(u_lightspecular[i], gstate_c.lightColor[2][i], 3); } } dirtyUniforms = 0; } ShaderManager::ShaderManager() : lastShader_(NULL), globalDirty_(0xFFFFFFFF), shaderSwitchDirty_(0) { codeBuffer_ = new char[16384]; } ShaderManager::~ShaderManager() { delete [] codeBuffer_; } void ShaderManager::Clear() { for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) { delete iter->ls; } for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) { delete iter->second; } for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) { delete iter->second; } linkedShaderCache_.clear(); fsCache_.clear(); vsCache_.clear(); globalDirty_ = 0xFFFFFFFF; lastFSID_.clear(); lastVSID_.clear(); DirtyShader(); } void ShaderManager::ClearCache(bool deleteThem) { Clear(); } void ShaderManager::DirtyShader() { // Forget the last shader ID lastFSID_.clear(); lastVSID_.clear(); lastShader_ = 0; globalDirty_ = 0xFFFFFFFF; shaderSwitchDirty_ = 0; } void ShaderManager::EndFrame() { // disables vertex arrays if (lastShader_) lastShader_->stop(); lastShader_ = 0; } LinkedShader *ShaderManager::ApplyShader(int prim) { if (globalDirty_) { if (lastShader_) lastShader_->dirtyUniforms |= globalDirty_; shaderSwitchDirty_ |= globalDirty_; globalDirty_ = 0; } bool useHWTransform = CanUseHardwareTransform(prim); VertexShaderID VSID; FragmentShaderID FSID; ComputeVertexShaderID(&VSID, prim, useHWTransform); ComputeFragmentShaderID(&FSID); // Just update uniforms if this is the same shader as last time. if (lastShader_ != 0 && VSID == lastVSID_ && FSID == lastFSID_) { lastShader_->updateUniforms(); return lastShader_; // Already all set. } if (lastShader_ != 0) { // There was a previous shader and we're switching. lastShader_->stop(); } lastVSID_ = VSID; lastFSID_ = FSID; VSCache::iterator vsIter = vsCache_.find(VSID); VSShader *vs; if (vsIter == vsCache_.end()) { // Vertex shader not in cache. Let's compile it. GenerateVertexShader(prim, codeBuffer_, useHWTransform); vs = new VSShader(codeBuffer_, useHWTransform); if (vs->Failed()) { ERROR_LOG(HLE, "Shader compilation failed, falling back to software transform"); osm.Show("hardware transform error - falling back to software", 2.5f, 0xFF3030FF, -1, true); delete vs; // TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure // that that shader ID is not used when computing the linked shader ID below, because then IDs won't match // next time and we'll do this over and over... // Can still work with software transform. GenerateVertexShader(prim, codeBuffer_, false); vs = new VSShader(codeBuffer_, false); } vsCache_[VSID] = vs; } else { vs = vsIter->second; } FSCache::iterator fsIter = fsCache_.find(FSID); PSShader *fs; if (fsIter == fsCache_.end()) { // Fragment shader not in cache. Let's compile it. GenerateFragmentShader(codeBuffer_); fs = new PSShader(codeBuffer_, useHWTransform); fsCache_[FSID] = fs; } else { fs = fsIter->second; } // Okay, we have both shaders. Let's see if there's a linked one. LinkedShader *ls = NULL; for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) { // Deferred dirtying! Let's see if we can make this even more clever later. iter->ls->dirtyUniforms |= shaderSwitchDirty_; if (iter->vs == vs && iter->fs == fs) { ls = iter->ls; } } shaderSwitchDirty_ = 0; if (ls == NULL) { ls = new LinkedShader(vs, fs, vs->UseHWTransform()); // This does "use" automatically const LinkedShaderCacheEntry entry(vs, fs, ls); linkedShaderCache_.push_back(entry); } else { // If shader changed we need to update all uniforms if (lastShader_ != ls) { ls->dirtyUniforms = DIRTY_ALL; } ls->use(); } lastShader_ = ls; return ls; }