Fix NEON 4444 CheckAlpha, workaround for failed bNEON detection on ARM64

This commit is contained in:
Henrik Rydgård 2015-05-31 12:28:41 +02:00
parent 7b50ec7b75
commit 2a1f6bca97
3 changed files with 13 additions and 8 deletions

View file

@ -101,7 +101,7 @@ bool CheckCPUFeature(const std::string& feature)
if (!File::OpenCPPFile(file, procfile, std::ios::in))
return 0;
while (std::getline(file, line))
{
if (line.find(marker) != std::string::npos)
@ -115,7 +115,7 @@ bool CheckCPUFeature(const std::string& feature)
}
}
}
return false;
}
@ -170,7 +170,7 @@ void CPUInfo::Detect()
Mode64bit = false;
#endif
vendor = VENDOR_ARM;
// Get the information about the CPU
#if !defined(__linux__)
bool isVFP3 = false;
@ -240,6 +240,11 @@ void CPUInfo::Detect()
bASIMD = CheckCPUFeature("asimd");
num_cores = GetCoreCount();
#endif
#ifdef ARM64
// Whether the above detection failed or not, on ARM64 we do have ASIMD/NEON.
bNEON = true;
bASIMD = true;
#endif
}
// Turn the cpu info into a string we can show

View file

@ -517,7 +517,7 @@ CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w
if ((w & 3) == 0 && (stride & 3) == 0) {
#ifdef _M_SSE
return CheckAlphaRGBA8888SSE2(pixelData, stride, w, h);
#elif defined(ARM) || defined(ARM64)
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
if (cpu_info.bNEON) {
return CheckAlphaRGBA8888NEON(pixelData, stride, w, h);
}
@ -551,7 +551,7 @@ CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaABGR4444SSE2(pixelData, stride, w, h);
#elif defined(ARM) || defined(ARM64)
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
if (cpu_info.bNEON) {
return CheckAlphaABGR4444NEON(pixelData, stride, w, h);
}
@ -588,7 +588,7 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaABGR1555SSE2(pixelData, stride, w, h);
#elif defined(ARM) || defined(ARM64)
#elif (defined(ARM) && defined(HAVE_ARMV7)) || defined(ARM64)
if (cpu_info.bNEON) {
return CheckAlphaABGR1555NEON(pixelData, stride, w, h);
}

View file

@ -301,7 +301,7 @@ CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w,
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
const uint16x8_t zero = vdupq_n_u16(0);
const uint16x8_t full = vdupq_n_u16(0xF);
const uint16x8_t full = vdupq_n_u16(0xF000);
const u16 *p = (const u16 *)pixelData;
@ -313,7 +313,7 @@ CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w,
uint16x8_t foundFraction = zero;
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vshrq_n_u16(vld1q_u16(&p[i]), 12);
const uint16x8_t a = vshlq_n_u16(vld1q_u16(&p[i]), 12);
const uint16x8_t isZero = vceqq_u16(a, zero);
foundAZero = vorrq_u16(foundAZero, isZero);