mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Fix NEON 4444 CheckAlpha, workaround for failed bNEON detection on ARM64
This commit is contained in:
parent
7b50ec7b75
commit
2a1f6bca97
3 changed files with 13 additions and 8 deletions
|
@ -101,7 +101,7 @@ bool CheckCPUFeature(const std::string& feature)
|
|||
|
||||
if (!File::OpenCPPFile(file, procfile, std::ios::in))
|
||||
return 0;
|
||||
|
||||
|
||||
while (std::getline(file, line))
|
||||
{
|
||||
if (line.find(marker) != std::string::npos)
|
||||
|
@ -115,7 +115,7 @@ bool CheckCPUFeature(const std::string& feature)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -170,7 +170,7 @@ void CPUInfo::Detect()
|
|||
Mode64bit = false;
|
||||
#endif
|
||||
vendor = VENDOR_ARM;
|
||||
|
||||
|
||||
// Get the information about the CPU
|
||||
#if !defined(__linux__)
|
||||
bool isVFP3 = false;
|
||||
|
@ -240,6 +240,11 @@ void CPUInfo::Detect()
|
|||
bASIMD = CheckCPUFeature("asimd");
|
||||
num_cores = GetCoreCount();
|
||||
#endif
|
||||
#ifdef ARM64
|
||||
// Whether the above detection failed or not, on ARM64 we do have ASIMD/NEON.
|
||||
bNEON = true;
|
||||
bASIMD = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Turn the cpu info into a string we can show
|
||||
|
|
|
@ -517,7 +517,7 @@ CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w
|
|||
if ((w & 3) == 0 && (stride & 3) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA8888SSE2(pixelData, stride, w, h);
|
||||
#elif defined(ARM) || defined(ARM64)
|
||||
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
return CheckAlphaRGBA8888NEON(pixelData, stride, w, h);
|
||||
}
|
||||
|
@ -551,7 +551,7 @@ CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w
|
|||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaABGR4444SSE2(pixelData, stride, w, h);
|
||||
#elif defined(ARM) || defined(ARM64)
|
||||
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
return CheckAlphaABGR4444NEON(pixelData, stride, w, h);
|
||||
}
|
||||
|
@ -588,7 +588,7 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
|
|||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaABGR1555SSE2(pixelData, stride, w, h);
|
||||
#elif defined(ARM) || defined(ARM64)
|
||||
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
return CheckAlphaABGR1555NEON(pixelData, stride, w, h);
|
||||
}
|
||||
|
|
|
@ -301,7 +301,7 @@ CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w,
|
|||
|
||||
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const uint16x8_t zero = vdupq_n_u16(0);
|
||||
const uint16x8_t full = vdupq_n_u16(0xF);
|
||||
const uint16x8_t full = vdupq_n_u16(0xF000);
|
||||
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
|
@ -313,7 +313,7 @@ CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w,
|
|||
uint16x8_t foundFraction = zero;
|
||||
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vshrq_n_u16(vld1q_u16(&p[i]), 12);
|
||||
const uint16x8_t a = vshlq_n_u16(vld1q_u16(&p[i]), 12);
|
||||
|
||||
const uint16x8_t isZero = vceqq_u16(a, zero);
|
||||
foundAZero = vorrq_u16(foundAZero, isZero);
|
||||
|
|
Loading…
Add table
Reference in a new issue