Merge pull request #13343 from unknownbrackets/xxhash

Switch to XXH3
This commit is contained in:
Henrik Rydgård 2020-08-28 12:20:26 +02:00 committed by GitHub
commit cb3ed8f4a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 4806 additions and 1232 deletions

View file

@ -8,14 +8,11 @@
#include "Common/CommonFuncs.h"
#include "Common/Log.h"
// Whatever random value.
const uint32_t hashmapSeed = 0x23B58532;
// TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
// Seems to be ubiquitous on ARM64 though.
template<class K>
inline uint32_t HashKey(const K &k) {
return XXH32(&k, sizeof(k), hashmapSeed);
return XXH3_64bits(&k, sizeof(k)) & 0xFFFFFFFF;
}
template<class K>
inline bool KeyEquals(const K &a, const K &b) {

View file

@ -41,12 +41,12 @@ bool isInInterval(u32 start, u32 size, u32 value)
}
static u32 computeHash(u32 address, u32 size)
static HashType computeHash(u32 address, u32 size)
{
#ifdef _M_X64
return XXH64(Memory::GetPointer(address), size, 0xBACD7814BACD7814LL);
return XXH3_64bits(Memory::GetPointer(address), size);
#else
return XXH32(Memory::GetPointer(address), size, 0xBACD7814);
return XXH3_64bits(Memory::GetPointer(address), size) & 0xFFFFFFFF;
#endif
}

View file

@ -481,7 +481,7 @@ u64 IRBlock::CalculateHash() const {
buffer[pos++] = instr.encoding;
}
return XXH64(&buffer[0], origSize_, 0x9A5C33B8);
return XXH3_64bits(&buffer[0], origSize_);
}
return 0;

View file

@ -246,9 +246,9 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
case ReplacedTextureHash::QUICK:
return StableQuickTexHash(checkp, sizeInRAM);
case ReplacedTextureHash::XXH32:
return DoReliableHash32(checkp, sizeInRAM, 0xBACD7814);
return XXH32(checkp, sizeInRAM, 0xBACD7814);
case ReplacedTextureHash::XXH64:
return DoReliableHash64(checkp, sizeInRAM, 0xBACD7814);
return XXH64(checkp, sizeInRAM, 0xBACD7814);
default:
return 0;
}
@ -269,7 +269,7 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
case ReplacedTextureHash::XXH32:
for (int y = 0; y < h; ++y) {
u32 rowHash = DoReliableHash32(checkp, bytesPerLine, 0xBACD7814);
u32 rowHash = XXH32(checkp, bytesPerLine, 0xBACD7814);
result = (result * 11) ^ rowHash;
checkp += stride;
}
@ -277,7 +277,7 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
case ReplacedTextureHash::XXH64:
for (int y = 0; y < h; ++y) {
u32 rowHash = DoReliableHash64(checkp, bytesPerLine, 0xBACD7814);
u32 rowHash = XXH64(checkp, bytesPerLine, 0xBACD7814);
result = (result * 11) ^ rowHash;
checkp += stride;
}

View file

@ -48,7 +48,6 @@ enum class ReplacedTextureAlpha {
// For forward comatibility, we specify the hash.
enum class ReplacedTextureHash {
// TODO: Maybe only support crc32c for now?
QUICK,
XXH32,
XXH64,

View file

@ -65,7 +65,7 @@ PortManager::~PortManager() {
}
void PortManager::Terminate() {
INFO_LOG(SCENET, "PortManager::Terminate()");
VERBOSE_LOG(SCENET, "PortManager::Terminate()");
if (urls) {
FreeUPNPUrls(urls);
free(urls);
@ -96,7 +96,7 @@ bool PortManager::Initialize(const unsigned int timeout) {
unsigned char ttl = 2; // defaulting to 2
int error = 0;
INFO_LOG(SCENET, "PortManager::Initialize(%d)", timeout);
VERBOSE_LOG(SCENET, "PortManager::Initialize(%d)", timeout);
if (!g_Config.bEnableUPnP) {
ERROR_LOG(SCENET, "PortManager::Initialize - UPnP is Disabled on Networking Settings");
return false;
@ -265,7 +265,7 @@ bool PortManager::Remove(const char* protocol, unsigned short port) {
bool PortManager::Restore() {
int r;
INFO_LOG(SCENET, "PortManager::Restore()");
VERBOSE_LOG(SCENET, "PortManager::Restore()");
if (urls == NULL || urls->controlURL == NULL || urls->controlURL[0] == '\0')
{
if (g_Config.bEnableUPnP) WARN_LOG(SCENET, "PortManager::Remove - the init was not done !");
@ -318,7 +318,7 @@ bool PortManager::Clear() {
char rHost[64];
char duration[16];
INFO_LOG(SCENET, "PortManager::Clear()");
VERBOSE_LOG(SCENET, "PortManager::Clear()");
if (urls == NULL || urls->controlURL == NULL || urls->controlURL[0] == '\0')
{
if (g_Config.bEnableUPnP) WARN_LOG(SCENET, "PortManager::Clear - the init was not done !");

View file

@ -23,7 +23,6 @@
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/SplineCommon.h"
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/TextureDecoder.h" // for ReliableHash
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
@ -607,7 +606,7 @@ inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
size_t step = sz / 4;
u32 hash = 0;
for (size_t i = 0; i < sz; i += step) {
hash += DoReliableHash32(p + i, 100, 0x3A44B9C4);
hash += XXH3_64bits(p + i, 100);
}
return hash;
} else {
@ -642,8 +641,8 @@ u32 DrawEngineCommon::ComputeMiniHash() {
return fullhash;
}
ReliableHashType DrawEngineCommon::ComputeHash() {
ReliableHashType fullhash = 0;
uint64_t DrawEngineCommon::ComputeHash() {
uint64_t fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = IndexSize(dec_->VertexType());
@ -652,7 +651,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls[i];
if (!dc.inds) {
fullhash += DoReliableHash((const char *)dc.verts, vertexSize * dc.vertexCount, 0x1DE8CAC4);
fullhash += XXH3_64bits((const char *)dc.verts, vertexSize * dc.vertexCount);
} else {
int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound;
int j = i + 1;
@ -667,15 +666,15 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
}
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
fullhash += DoReliableHash((const char *)dc.verts + vertexSize * indexLowerBound,
vertexSize * (indexUpperBound - indexLowerBound), 0x029F3EE1);
fullhash += XXH3_64bits((const char *)dc.verts + vertexSize * indexLowerBound,
vertexSize * (indexUpperBound - indexLowerBound));
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += DoReliableHash((const char *)dc.inds, indexSize * dc.vertexCount, 0x955FD1CA);
fullhash += XXH3_64bits((const char *)dc.inds, indexSize * dc.vertexCount);
i = lastMatch;
}
}
fullhash += DoReliableHash(&drawCalls[0].uvScale, sizeof(drawCalls[0].uvScale) * numDrawCalls, 0x0123e658);
fullhash += XXH3_64bits(&drawCalls[0].uvScale, sizeof(drawCalls[0].uvScale) * numDrawCalls);
return fullhash;
}

View file

@ -36,13 +36,6 @@ enum {
DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 16,
};
// Avoiding the full include of TextureDecoder.h.
#if (defined(_M_SSE) && defined(_M_X64)) || defined(ARM64)
typedef u64 ReliableHashType;
#else
typedef u32 ReliableHashType;
#endif
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
// into the top of the verttype where there are unused bits.
@ -115,7 +108,7 @@ protected:
// Utility for vertex caching
u32 ComputeMiniHash();
ReliableHashType ComputeHash();
uint64_t ComputeHash();
// Vertex decoding
void DecodeVertsStep(u8 *dest, int &i, int &decodedVerts);

View file

@ -311,8 +311,6 @@ void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32
QuickTexHashFunc DoQuickTexHash = &QuickTexHashBasic;
QuickTexHashFunc StableQuickTexHash = &QuickTexHashNonSSE;
UnswizzleTex16Func DoUnswizzleTex16 = &DoUnswizzleTex16Basic;
ReliableHash32Func DoReliableHash32 = &XXH32;
ReliableHash64Func DoReliableHash64 = &XXH64;
#endif
// This has to be done after CPUDetect has done its magic.
@ -322,10 +320,6 @@ void SetupTextureDecoder() {
DoQuickTexHash = &QuickTexHashNEON;
StableQuickTexHash = &QuickTexHashNEON;
DoUnswizzleTex16 = &DoUnswizzleTex16NEON;
#if !PPSSPP_PLATFORM(IOS)
// Not sure if this is safe on iOS, it's had issues with xxhash.
DoReliableHash32 = &ReliableHash32NEON;
#endif
}
#endif
}

View file

@ -45,31 +45,11 @@ u32 QuickTexHashSSE2(const void *checkp, u32 size);
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
#define DoUnswizzleTex16 DoUnswizzleTex16Basic
#include "ext/xxhash.h"
#define DoReliableHash32 XXH32
#define DoReliableHash64 XXH64
#if defined(_M_X64) || defined(ARM64)
#define DoReliableHash XXH64
typedef u64 ReliableHashType;
#else
#define DoReliableHash XXH32
typedef u32 ReliableHashType;
#endif
// For ARM64, NEON is mandatory, so we also statically link.
#elif PPSSPP_ARCH(ARM64) || defined(ARM64)
#define DoQuickTexHash QuickTexHashNEON
#define StableQuickTexHash QuickTexHashNEON
#define DoUnswizzleTex16 DoUnswizzleTex16NEON
#define DoReliableHash32 ReliableHash32NEON
#include "ext/xxhash.h"
#define DoReliableHash64 XXH64
#define DoReliableHash XXH64
typedef u64 ReliableHashType;
#else
typedef u32 (*QuickTexHashFunc)(const void *checkp, u32 size);
extern QuickTexHashFunc DoQuickTexHash;
@ -77,15 +57,6 @@ extern QuickTexHashFunc StableQuickTexHash;
typedef void (*UnswizzleTex16Func)(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
extern UnswizzleTex16Func DoUnswizzleTex16;
typedef u32 (*ReliableHash32Func)(const void *input, size_t len, u32 seed);
extern ReliableHash32Func DoReliableHash32;
typedef u64 (*ReliableHash64Func)(const void *input, size_t len, u64 seed);
extern ReliableHash64Func DoReliableHash64;
#define DoReliableHash DoReliableHash32
typedef u32 ReliableHashType;
#endif
CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w, int h);

View file

@ -162,107 +162,6 @@ void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 p
}
}
// NOTE: This is just a NEON version of xxhash.
// GCC sucks at making things NEON and can't seem to handle it.
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
u32 ReliableHash32NEON(const void *input, size_t len, u32 seed) {
if (((uintptr_t)input & 3) != 0) {
// Cannot handle misaligned data. Fall back to XXH32.
return XXH32(input, len, seed);
}
const u8 *p = (const u8 *)input;
const u8 *const bEnd = p + len;
U32 h32;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
#endif
if (len>=16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
uint32x4_t prime32_1q = vdupq_n_u32(PRIME32_1);
uint32x4_t prime32_2q = vdupq_n_u32(PRIME32_2);
uint32x4_t vq = vcombine_u32(vcreate_u32(v1 | ((U64)v2 << 32)), vcreate_u32(v3 | ((U64)v4 << 32)));
do {
__builtin_prefetch(p + 0xc0, 0, 0);
vq = vmlaq_u32(vq, vld1q_u32((const U32*)p), prime32_2q);
vq = vorrq_u32(vshlq_n_u32(vq, 13), vshrq_n_u32(vq, 32 - 13));
p += 16;
vq = vmulq_u32(vq, prime32_1q);
} while (p<=limit);
v1 = vgetq_lane_u32(vq, 0);
v2 = vgetq_lane_u32(vq, 1);
v3 = vgetq_lane_u32(vq, 2);
v4 = vgetq_lane_u32(vq, 3);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p<=bEnd-4)
{
h32 += *(const U32*)p * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
static inline bool VectorIsNonZeroNEON(const uint32x4_t &v) {
u64 low = vgetq_lane_u64(vreinterpretq_u64_u32(v), 0);
u64 high = vgetq_lane_u64(vreinterpretq_u64_u32(v), 1);

View file

@ -19,7 +19,6 @@
u32 QuickTexHashNEON(const void *checkp, u32 size);
void DoUnswizzleTex16NEON(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
u32 ReliableHash32NEON(const void *input, size_t len, u32 seed);
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h);

View file

@ -371,7 +371,7 @@ void DrawEngineD3D11::DoFlush() {
case VertexArrayInfoD3D11::VAI_NEW:
{
// Haven't seen this one before.
ReliableHashType dataHash = ComputeHash();
uint64_t dataHash = ComputeHash();
vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoD3D11::VAI_HASHING;
@ -395,7 +395,7 @@ void DrawEngineD3D11::DoFlush() {
if (vai->drawsUntilNextFullHash == 0) {
// Let's try to skip a full hash if mini would fail.
const u32 newMiniHash = ComputeMiniHash();
ReliableHashType newHash = vai->hash;
uint64_t newHash = vai->hash;
if (newMiniHash == vai->minihash) {
newHash = ComputeHash();
}

View file

@ -79,7 +79,7 @@ public:
VAI_UNRELIABLE, // never cache
};
ReliableHashType hash;
uint64_t hash;
u32 minihash;
ID3D11Buffer *vbo;

View file

@ -208,7 +208,10 @@ void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBa
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
if (replacer_.Enabled())
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
else
clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
clutBuf_ = clutBufRaw_;
// Special optimization: fonts typically draw clut4 with just alpha values in a single color.

View file

@ -350,7 +350,7 @@ void DrawEngineDX9::DoFlush() {
case VertexArrayInfoDX9::VAI_NEW:
{
// Haven't seen this one before.
ReliableHashType dataHash = ComputeHash();
uint64_t dataHash = ComputeHash();
vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoDX9::VAI_HASHING;
@ -375,7 +375,7 @@ void DrawEngineDX9::DoFlush() {
if (vai->drawsUntilNextFullHash == 0) {
// Let's try to skip a full hash if mini would fail.
const u32 newMiniHash = ComputeMiniHash();
ReliableHashType newHash = vai->hash;
uint64_t newHash = vai->hash;
if (newMiniHash == vai->minihash) {
newHash = ComputeHash();
}

View file

@ -77,7 +77,7 @@ public:
VAI_UNRELIABLE, // never cache
};
ReliableHashType hash;
uint64_t hash;
u32 minihash;
LPDIRECT3DVERTEXBUFFER9 vbo;

View file

@ -250,7 +250,10 @@ void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
if (replacer_.Enabled())
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
else
clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
clutBuf_ = clutBufRaw_;
// Special optimization: fonts typically draw clut4 with just alpha values in a single color.

View file

@ -365,7 +365,7 @@ void DrawEngineGLES::DoFlush() {
case VertexArrayInfo::VAI_NEW:
{
// Haven't seen this one before.
ReliableHashType dataHash = ComputeHash();
uint64_t dataHash = ComputeHash();
vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfo::VAI_HASHING;
@ -385,7 +385,7 @@ void DrawEngineGLES::DoFlush() {
if (vai->drawsUntilNextFullHash == 0) {
// Let's try to skip a full hash if mini would fail.
const u32 newMiniHash = ComputeMiniHash();
ReliableHashType newHash = vai->hash;
uint64_t newHash = vai->hash;
if (newMiniHash == vai->minihash) {
newHash = ComputeHash();
}

View file

@ -90,7 +90,7 @@ public:
VAI_UNRELIABLE, // never cache
};
ReliableHashType hash;
uint64_t hash;
u32 minihash;
GLRBuffer *vbo;

View file

@ -249,7 +249,10 @@ void TextureCacheGLES::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBas
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
if (replacer_.Enabled())
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
else
clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
// Avoid a copy when we don't need to convert colors.
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {

View file

@ -35,7 +35,6 @@
#include "Common/Vulkan/VulkanContext.h"
#include "Common/Vulkan/VulkanMemory.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/Common/SplineCommon.h"
#include "GPU/Common/TransformCommon.h"
#include "GPU/Common/VertexDecoderCommon.h"
@ -653,7 +652,7 @@ void DrawEngineVulkan::DoFlush() {
case VertexArrayInfoVulkan::VAI_NEW:
{
// Haven't seen this one before. We don't actually upload the vertex data yet.
ReliableHashType dataHash = ComputeHash();
uint64_t dataHash = ComputeHash();
vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoVulkan::VAI_HASHING;
@ -678,7 +677,7 @@ void DrawEngineVulkan::DoFlush() {
if (vai->drawsUntilNextFullHash == 0) {
// Let's try to skip a full hash if mini would fail.
const u32 newMiniHash = ComputeMiniHash();
ReliableHashType newHash = vai->hash;
uint64_t newHash = vai->hash;
if (newMiniHash == vai->minihash) {
newHash = ComputeHash();
}

View file

@ -55,13 +55,6 @@ class PipelineManagerVulkan;
class TextureCacheVulkan;
class FramebufferManagerVulkan;
// Avoiding the full include of TextureDecoder.h.
#if (defined(_M_SSE) && defined(_M_X64)) || defined(ARM64)
typedef u64 ReliableHashType;
#else
typedef u32 ReliableHashType;
#endif
class VulkanContext;
class VulkanPushBuffer;
struct VulkanPipeline;
@ -91,7 +84,7 @@ public:
VAI_UNRELIABLE, // never cache
};
ReliableHashType hash;
uint64_t hash;
u32 minihash;
// These will probably always be the same, but whatever.

View file

@ -507,7 +507,10 @@ void TextureCacheVulkan::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutB
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
if (replacer_.Enabled())
clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
else
clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
clutBuf_ = clutBufRaw_;
// Special optimization: fonts typically draw clut4 with just alpha values in a single color.

View file

@ -16,7 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/stringutil.h"
#include "ext/cityhash/city.h"
#include "ext/xxhash.h"
#include "i18n/i18n.h"
#include "ui/ui.h"
#include "util/text/utf8.h"
@ -62,7 +62,7 @@ void CwCheatScreen::LoadCheatInfo() {
// We won't parse this, just using it to detect changes to the file.
std::string str;
if (readFileToString(true, engine_->CheatFilename().c_str(), str)) {
fileCheckHash_ = CityHash64(str.c_str(), str.size());
fileCheckHash_ = XXH3_64bits(str.c_str(), str.size());
}
fileCheckCounter_ = 0;
@ -119,7 +119,7 @@ void CwCheatScreen::update() {
// Check if the file has changed. If it has, we'll reload.
std::string str;
if (readFileToString(true, engine_->CheatFilename().c_str(), str)) {
uint64_t newHash = CityHash64(str.c_str(), str.size());
uint64_t newHash = XXH3_64bits(str.c_str(), str.size());
if (newHash != fileCheckHash_) {
// This will update the hash.
RecreateViews();

View file

@ -614,7 +614,7 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {
VKRFramebuffer *depalFramebuffer = steps[i]->render.framebuffer;
VKRFramebuffer *targetFramebuffer = steps[i + 1]->render.framebuffer;
// OK, found the start of a post-process sequence. Let's scan until we find the end.
for (int j = i; j < steps.size() - 3; j++) {
for (int j = i; j < (int)steps.size() - 3; j++) {
if (((j - i) & 1) == 0) {
// This should be a depal draw.
if (steps[j]->render.numDraws != 1)

View file

@ -1,944 +1,43 @@
/*
xxHash - Fast Hash algorithm
Copyright (C) 2012-2014, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
- public discussion board : https://groups.google.com/forum/#!forum/lz4c
*/
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2020 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
//**************************************
// Tuning parameters
//**************************************
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#include "ppsspp_config.h"
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
// ector NOTE: For whatever reason ARM is unhappy about this on Android.
#if !defined(IOS) && !defined(ANDROID) && (defined(ARM) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64))
# define XXH_USE_UNALIGNED_ACCESS 1
#endif
// XXH_ACCEPT_NULL_INPUT_POINTER :
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
// This option has a very small performance cost (only measurable on small inputs).
// By default, this option is disabled. To enable it, uncomment below define :
//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
// XXH_FORCE_NATIVE_FORMAT :
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
// Results are therefore identical for little-endian and big-endian CPU.
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
// It will improve speed for Big-endian CPU.
// This option has no impact on Little_Endian CPU.
#define XXH_FORCE_NATIVE_FORMAT 1
//**************************************
// Compiler Specific Options
//**************************************
// Disable some Visual warning messages
#ifdef _MSC_VER // Visual Studio
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
#endif
#ifdef _MSC_VER // Visual Studio
# define FORCE_INLINE static __forceinline
#else
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
#endif
//**************************************
// Includes & Memory related functions
//**************************************
#include "xxhash.h"
// Modify the local functions below should you wish to use some other memory routines
// for malloc(), free()
#include <stdlib.h>
FORCE_INLINE void* XXH_malloc(size_t s)
{
return malloc(s);
}
FORCE_INLINE void XXH_free (void* p)
{
free(p);
}
// for memcpy()
#include <string.h>
FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size)
{
return memcpy(dest,src,size);
}
//**************************************
// Basic Types
//**************************************
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
# define _PACKED __attribute__ ((packed))
#else
# define _PACKED
#endif
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# ifdef __IBMC__
# pragma pack(1)
# else
# pragma pack(push, 1)
# endif
#endif
typedef struct _U32_S
{
U32 v;
} _PACKED U32_S;
typedef struct _U64_S
{
U64 v;
} _PACKED U64_S;
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# pragma pack(pop)
#endif
#define A32(x) (((U32_S *)(x))->v)
#define A64(x) (((U64_S *)(x))->v)
//***************************************
// Compiler-specific Functions and Macros
//***************************************
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER) // Visual Studio
# define XXH_swap32 _byteswap_ulong
# define XXH_swap64 _byteswap_uint64
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
# define XXH_swap64 __builtin_bswap64
#else
static inline U32 XXH_swap32 (U32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
static inline U64 XXH_swap64 (U64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
//**************************************
// Constants
//**************************************
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
#define PRIME64_1 11400714785074694791ULL
#define PRIME64_2 14029467366897019727ULL
#define PRIME64_3 1609587929392839161ULL
#define PRIME64_4 9650029242287828579ULL
#define PRIME64_5 2870177450012600261ULL
//**************************************
// Architecture Macros
//**************************************
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
static const int one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
#endif
//**************************************
// Macros
//**************************************
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
//****************************
// Memory reads
//****************************
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
else
return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
}
FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian)
{
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
}
FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
else
return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr);
}
FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian)
{
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
}
//****************************
// Simple Hash Functions
//****************************
FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)16;
}
#endif
if (len>=16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do
{
#if defined(ARM) && defined(__GNUC__)
__builtin_prefetch(p + 0xc0, 0, 0);
#endif
v1 += XXH_get32bits(p) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_get32bits(p) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_get32bits(p) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_get32bits(p) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p+4<=bEnd)
{
h32 += XXH_get32bits(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
unsigned int XXH32 (const void* input, size_t len, unsigned seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USE_UNALIGNED_ACCESS)
if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U64 h64;
#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)32;
}
#endif
if (len>=32)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
do
{
#if defined(ARM) && defined(__GNUC__)
// TODO: Validate that this helps as it does with XXH32.
__builtin_prefetch(p + 0xc0, 0, 0);
#endif
v1 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
v2 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
v3 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
v4 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
}
while (p<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64 * PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64 * PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64 * PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64 * PRIME64_1 + PRIME64_4;
}
else
{
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
while (p+8<=bEnd)
{
U64 k1 = XXH_get64bits(p);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
XXH64_state_t state;
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
return XXH64_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USE_UNALIGNED_ACCESS)
if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/****************************************************
* Advanced Hash Functions
****************************************************/
/*** Allocation ***/
typedef struct
{
U64 total_len;
U32 seed;
U32 v1;
U32 v2;
U32 v3;
U32 v4;
U32 memsize;
char memory[16];
} XXH_istate32_t;
typedef struct
{
U64 total_len;
U64 seed;
U64 v1;
U64 v2;
U64 v3;
U64 v4;
U32 memsize;
char memory[32];
} XXH_istate64_t;
XXH32_state_t* XXH32_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough
return (XXH32_state_t*)malloc(sizeof(XXH32_state_t));
}
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
free(statePtr);
return XXH_OK;
};
XXH64_state_t* XXH64_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough
return (XXH64_state_t*)malloc(sizeof(XXH64_state_t));
}
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
free(statePtr);
return XXH_OK;
};
/*** Hash feed ***/
XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
{
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME32_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
{
XXH_istate64_t* state = (XXH_istate64_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME64_1 + PRIME64_2;
state->v2 = seed + PRIME64_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME64_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate32_t* state = (XXH_istate32_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 16) // fill in tmp buffer
{
XXH_memcpy(state->memory + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) // some data left from previous update
{
XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
{
const U32* p32 = (const U32*)state->memory;
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v1 = XXH_rotl32(state->v1, 13);
state->v1 *= PRIME32_1;
p32++;
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v2 = XXH_rotl32(state->v2, 13);
state->v2 *= PRIME32_1;
p32++;
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v3 = XXH_rotl32(state->v3, 13);
state->v3 *= PRIME32_1;
p32++;
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v4 = XXH_rotl32(state->v4, 13);
state->v4 *= PRIME32_1;
p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do
{
v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->memory, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
{
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
const BYTE * p = (const BYTE*)state->memory;
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
U32 h32;
if (state->total_len >= 16)
{
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
}
else
{
h32 = state->seed + PRIME32_5;
}
h32 += (U32) state->total_len;
while (p+4<=bEnd)
{
h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
U32 XXH32_digest (const XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 32) // fill in tmp buffer
{
XXH_memcpy(state->memory + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) // some data left from previous update
{
XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize);
{
const U64* p64 = (const U64*)state->memory;
state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v1 = XXH_rotl64(state->v1, 31);
state->v1 *= PRIME64_1;
p64++;
state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v2 = XXH_rotl64(state->v2, 31);
state->v2 *= PRIME64_1;
p64++;
state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v3 = XXH_rotl64(state->v3, 31);
state->v3 *= PRIME64_1;
p64++;
state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v4 = XXH_rotl64(state->v4, 31);
state->v4 *= PRIME64_1;
p64++;
}
p += 32-state->memsize;
state->memsize = 0;
}
if (p+32 <= bEnd)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
do
{
v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
p+=8;
v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
p+=8;
v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
p+=8;
v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
p+=8;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->memory, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
{
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE * p = (const BYTE*)state->memory;
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
U64 h64;
if (state->total_len >= 32)
{
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64*PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64*PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64*PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64*PRIME64_1 + PRIME64_4;
}
else
{
h64 = state->seed + PRIME64_5;
}
h64 += (U64) state->total_len;
while (p+8<=bEnd)
{
U64 k1 = XXH_readLE64((const U64*)p, endian);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64_digest (const XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_digest_endian(state_in, XXH_bigEndian);
}

File diff suppressed because it is too large Load diff

View file

@ -24,12 +24,14 @@
// TODO: Make a test of nice unittest asserts and count successes etc.
// Or just integrate with an existing testing framework.
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <string>
#include <sstream>
#if defined(ANDROID)
#include <jni.h>
#endif
#include "base/NativeApp.h"
#include "input/input_state.h"
@ -62,6 +64,16 @@ bool System_GetPropertyBool(SystemProperty prop) {
return false;
}
#if defined(ANDROID)
JNIEnv *getEnv() {
return nullptr;
}
jclass findClass(const char *name) {
return nullptr;
}
#endif
#ifndef M_PI_2
#define M_PI_2 1.57079632679489661923
#endif