diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1194fec7a3..d7fd06a0c4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,6 +69,8 @@ if(CMAKE_SYSTEM_PROCESSOR)
 		set(RISCV64_DEVICE ON)
 	elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch64")
 		set(LOONGARCH64_DEVICE ON)
+		add_compile_options(-mlsx)
+		add_compile_options(-mlasx)
 	else()
 		message("Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
 	endif()
diff --git a/Common/Math/SIMDHeaders.h b/Common/Math/SIMDHeaders.h
index 790fac494e..f6e86ef0c2 100644
--- a/Common/Math/SIMDHeaders.h
+++ b/Common/Math/SIMDHeaders.h
@@ -31,6 +31,12 @@
 #endif
 #endif
 
+#if PPSSPP_ARCH(LOONGARCH64)
+#if PPSSPP_ARCH(LOONGARCH64_LSX)
+#include <lsxintrin.h>
+#endif
+#endif
+
 // Basic types
 
 #if PPSSPP_ARCH(ARM64_NEON)
diff --git a/Common/Math/fast/fast_matrix.c b/Common/Math/fast/fast_matrix.c
index cff592e680..6ce87e2a6a 100644
--- a/Common/Math/fast/fast_matrix.c
+++ b/Common/Math/fast/fast_matrix.c
@@ -22,6 +22,42 @@ void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b) {
 	}
 }
 
+#elif PPSSPP_ARCH(LOONGARCH64_LSX)
+
+typedef union
+{
+    int32_t i;
+    float f;
+} FloatInt;
+
+static __m128 __lsx_vreplfr2vr_s(float val)
+{
+    FloatInt tmpval = {.f = val};
+    return (__m128)__lsx_vreplgr2vr_w(tmpval.i);
+}
+
+void fast_matrix_mul_4x4_lsx(float *dest, const float *a, const float *b) {
+    __m128 a_col_1 = (__m128)__lsx_vld(a, 0);
+    __m128 a_col_2 = (__m128)__lsx_vld(a + 4, 0);
+    __m128 a_col_3 = (__m128)__lsx_vld(a + 8, 0);
+    __m128 a_col_4 = (__m128)__lsx_vld(a + 12, 0);
+
+    for (int i = 0; i < 16; i += 4) {
+
+        __m128 b1 = __lsx_vreplfr2vr_s(b[i]);
+        __m128 b2 = __lsx_vreplfr2vr_s(b[i + 1]);
+        __m128 b3 = __lsx_vreplfr2vr_s(b[i + 2]);
+        __m128 b4 = __lsx_vreplfr2vr_s(b[i + 3]);
+
+        __m128 result = __lsx_vfmul_s(a_col_1, b1);
+        result = __lsx_vfmadd_s(a_col_2, b2, result);
+        result = __lsx_vfmadd_s(a_col_3, b3, result);
+        result = __lsx_vfmadd_s(a_col_4, b4, result);
+
+        __lsx_vst(result, &dest[i], 0);
+    }
+}
+
 #elif PPSSPP_ARCH(ARM_NEON)
 
 // From https://developer.arm.com/documentation/102467/0100/Matrix-multiplication-example
diff --git a/Common/Math/fast/fast_matrix.h b/Common/Math/fast/fast_matrix.h
index 8e5fc9320f..fb4a1b7f26 100644
--- a/Common/Math/fast/fast_matrix.h
+++ b/Common/Math/fast/fast_matrix.h
@@ -11,12 +11,15 @@ extern "C" {
 extern void fast_matrix_mul_4x4_c(float *dest, const float *a, const float *b);
 extern void fast_matrix_mul_4x4_neon(float *dest, const float *a, const float *b);
 extern void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b);
+extern void fast_matrix_mul_4x4_lsx(float *dest, const float *a, const float *b);
 
 #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
 // Hard link to SSE implementations on x86/amd64
 #define fast_matrix_mul_4x4 fast_matrix_mul_4x4_sse
 #elif PPSSPP_ARCH(ARM_NEON)
 #define fast_matrix_mul_4x4 fast_matrix_mul_4x4_neon
+#elif PPSSPP_ARCH(LOONGARCH64_LSX)
+#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_lsx
 #else
 #define fast_matrix_mul_4x4 fast_matrix_mul_4x4_c
 #endif
diff --git a/ppsspp_config.h b/ppsspp_config.h
index dd9855f582..2a741dca3d 100644
--- a/ppsspp_config.h
+++ b/ppsspp_config.h
@@ -81,6 +81,7 @@
     //https://github.com/gcc-mirror/gcc/blob/master/gcc/config/loongarch/loongarch-c.cc
     #define PPSSPP_ARCH_LOONGARCH64 1
     #define PPSSPP_ARCH_64BIT 1
+    #define PPSSPP_ARCH_LOONGARCH64_LSX 1
 #endif
 
 // PLATFORM defines