mirror of
https://github.com/n64dev/cen64.git
synced 2024-06-21 13:32:40 -04:00
device: Manually specify thread affinity.
Both Windows and Linux seem to be doing a terrible job of scheduling the device threads optimally. Until I can think of something more clever, manually set thread affinity.
This commit is contained in:
parent
8d31a56b91
commit
49960312ce
|
@ -275,6 +275,7 @@ set(OS_SOURCES
|
|||
|
||||
set(OS_POSIX_SOURCES
|
||||
${PROJECT_SOURCE_DIR}/os/posix/alloc.c
|
||||
${PROJECT_SOURCE_DIR}/os/posix/cpuid.c
|
||||
${PROJECT_SOURCE_DIR}/os/posix/local_time.c
|
||||
${PROJECT_SOURCE_DIR}/os/posix/main.c
|
||||
${PROJECT_SOURCE_DIR}/os/posix/rom_file.c
|
||||
|
@ -284,6 +285,7 @@ set(OS_POSIX_SOURCES
|
|||
|
||||
set(OS_WINAPI_SOURCES
|
||||
${PROJECT_SOURCE_DIR}/os/winapi/alloc.c
|
||||
${PROJECT_SOURCE_DIR}/os/winapi/cpuid.c
|
||||
${PROJECT_SOURCE_DIR}/os/winapi/gl_config.c
|
||||
${PROJECT_SOURCE_DIR}/os/winapi/gl_window.c
|
||||
${PROJECT_SOURCE_DIR}/os/winapi/local_time.c
|
||||
|
@ -389,7 +391,7 @@ if (DEFINED UNIX)
|
|||
find_package(X11 REQUIRED)
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} MATCHES GNU OR ${CMAKE_C_COMPILER_ID} MATCHES Clang OR ${CMAKE_C_COMPILER_ID} MATCHES Intel)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_GNU_SOURCE")
|
||||
endif (${CMAKE_C_COMPILER_ID} MATCHES GNU OR ${CMAKE_C_COMPILER_ID} MATCHES Clang OR ${CMAKE_C_COMPILER_ID} MATCHES Intel)
|
||||
endif (DEFINED UNIX)
|
||||
|
||||
|
|
7
cen64.c
7
cen64.c
|
@ -279,11 +279,10 @@ int validate_sha(struct rom_file *rom, const uint8_t *good_sum) {
|
|||
|
||||
// Spins the device until an exit request is received.
|
||||
int run_device(struct cen64_device *device, bool no_video) {
|
||||
cen64_thread thread;
|
||||
|
||||
device->running = true;
|
||||
cen64_thread_get_current(&device->os_thread);
|
||||
|
||||
if (cen64_thread_create(&thread, run_device_thread, device)) {
|
||||
if (cen64_thread_create(&device->device_thread, run_device_thread, device)) {
|
||||
printf("Failed to create the main emulation thread.\n");
|
||||
device_destroy(device);
|
||||
return 1;
|
||||
|
@ -293,7 +292,7 @@ int run_device(struct cen64_device *device, bool no_video) {
|
|||
cen64_gl_window_thread(device);
|
||||
|
||||
device->running = false;
|
||||
cen64_thread_join(&thread);
|
||||
cen64_thread_join(&device->device_thread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "gl_window.h"
|
||||
#include "os/common/rom_file.h"
|
||||
#include "os/common/save_file.h"
|
||||
#include "os/cpuid.h"
|
||||
|
||||
#include "bus/controller.h"
|
||||
#include "ai/controller.h"
|
||||
|
@ -31,6 +32,7 @@
|
|||
|
||||
cen64_cold int angrylion_rdp_init(struct cen64_device *device);
|
||||
cen64_cold static int device_debug_spin(struct cen64_device *device);
|
||||
cen64_cold static void device_schedule_threads(unsigned num_threads, cen64_thread **threads);
|
||||
cen64_flatten cen64_hot static int device_multithread_spin(struct cen64_device *device);
|
||||
cen64_flatten cen64_hot static int device_spin(struct cen64_device *device);
|
||||
|
||||
|
@ -138,12 +140,16 @@ void device_exit(struct bus_controller *bus) {
|
|||
// Create a device and proceed to the main loop.
|
||||
void device_run(struct cen64_device *device) {
|
||||
fpu_state_t saved_fpu_state;
|
||||
char vendor[13];
|
||||
|
||||
// TODO: Preserve host registers pinned to the device.
|
||||
saved_fpu_state = fpu_get_state();
|
||||
vr4300_cp1_init(&device->vr4300);
|
||||
rsp_late_init(&device->rsp);
|
||||
|
||||
// Set thread affinities for Intel CPUs.
|
||||
cen64_cpuid_get_vendor(vendor);
|
||||
|
||||
// Spin the device until we return (from setjmp).
|
||||
if (unlikely(device->debug_sfd > 0))
|
||||
device_debug_spin(device);
|
||||
|
@ -226,10 +232,29 @@ CEN64_THREAD_RETURN_TYPE run_vr4300_thread(void *opaque) {
|
|||
return CEN64_THREAD_RETURN_VAL;
|
||||
}
|
||||
|
||||
//
|
||||
// Set affinity of threads to maximize performance.
|
||||
// There should be at least 3 threads, possibly 4:
|
||||
//
|
||||
// 0: device/vr4300 thread
|
||||
// 1: os thread
|
||||
// 2: rdp thread
|
||||
// 3: (if present) rcp thread
|
||||
//
|
||||
cen64_cold static void device_schedule_threads(
|
||||
unsigned num_threads, cen64_thread **threads) {
|
||||
|
||||
cen64_thread_setaffinity(threads[0], 1 << 0);
|
||||
cen64_thread_setaffinity(threads[1], 1 << 1);
|
||||
cen64_thread_setaffinity(threads[2], 1 << 2);
|
||||
|
||||
if (num_threads > 3)
|
||||
cen64_thread_setaffinity(threads[3], 1 << 3);
|
||||
}
|
||||
|
||||
// Continually cycles the device until setjmp returns.
|
||||
int device_multithread_spin(struct cen64_device *device) {
|
||||
cen64_thread vr4300_thread;
|
||||
|
||||
cen64_thread *device_threads[4];
|
||||
device->other_thread_is_waiting = false;
|
||||
|
||||
if (cen64_mutex_create(&device->sync_mutex)) {
|
||||
|
@ -243,16 +268,23 @@ int device_multithread_spin(struct cen64_device *device) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (cen64_thread_create(&vr4300_thread, run_vr4300_thread, device)) {
|
||||
if (cen64_thread_create(&device->vr4300_thread, run_vr4300_thread, device)) {
|
||||
printf("Failed to create the VR4300 thread.\n");
|
||||
cen64_cv_destroy(&device->sync_cv);
|
||||
cen64_mutex_destroy(&device->sync_mutex);
|
||||
return 1;
|
||||
}
|
||||
|
||||
device_threads[0] = &device->vr4300_thread;
|
||||
device_threads[1] = &device->os_thread;
|
||||
device_threads[2] = &device->rdp.rdp_thread;
|
||||
device_threads[3] = &device->device_thread;
|
||||
|
||||
device_schedule_threads(4, device_threads);
|
||||
|
||||
run_rcp_thread(device);
|
||||
|
||||
cen64_thread_join(&vr4300_thread);
|
||||
cen64_thread_join(&device->vr4300_thread);
|
||||
cen64_cv_destroy(&device->sync_cv);
|
||||
cen64_mutex_destroy(&device->sync_mutex);
|
||||
return 0;
|
||||
|
@ -260,9 +292,17 @@ int device_multithread_spin(struct cen64_device *device) {
|
|||
|
||||
// Continually cycles the device until setjmp returns.
|
||||
int device_spin(struct cen64_device *device) {
|
||||
cen64_thread *device_threads[3];
|
||||
|
||||
if (setjmp(device->bus.unwind_data))
|
||||
return 1;
|
||||
|
||||
device_threads[0] = &device->device_thread;
|
||||
device_threads[1] = &device->os_thread;
|
||||
device_threads[2] = &device->rdp.rdp_thread;
|
||||
|
||||
device_schedule_threads(3, device_threads);
|
||||
|
||||
while (likely(device->running)) {
|
||||
unsigned i;
|
||||
|
||||
|
|
|
@ -48,6 +48,11 @@ struct cen64_device {
|
|||
|
||||
bool multithread;
|
||||
bool other_thread_is_waiting;
|
||||
|
||||
cen64_thread device_thread;
|
||||
cen64_thread os_thread;
|
||||
cen64_thread vr4300_thread;
|
||||
|
||||
cen64_mutex sync_mutex;
|
||||
cen64_cv sync_cv;
|
||||
|
||||
|
|
25
os/cpuid.h
Normal file
25
os/cpuid.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
//
|
||||
// os/cpuid.h
|
||||
//
|
||||
// Functions for calling cpuid on x86.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#ifndef __os_cpuid_h__
|
||||
#define __os_cpuid_h__
|
||||
#include "common.h"
|
||||
|
||||
struct cen64_cpuid_t {
|
||||
uint32_t eax;
|
||||
uint32_t ebx;
|
||||
uint32_t ecx;
|
||||
uint32_t edx;
|
||||
};
|
||||
|
||||
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid);
|
||||
void cen64_cpuid_get_vendor(char vendor[13]);
|
||||
|
||||
#endif
|
||||
|
31
os/posix/cpuid.c
Normal file
31
os/posix/cpuid.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
//
|
||||
// os/posix/cpuid.c
|
||||
//
|
||||
// Functions for calling cpuid on x86.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "os/cpuid.h"
|
||||
|
||||
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid) {
|
||||
__asm__ __volatile__(
|
||||
"cpuid\n\t"
|
||||
|
||||
: "=a"(cpuid->eax), "=b"(cpuid->ebx), "=c"(cpuid->ecx), "=d"(cpuid->edx)
|
||||
: "0"(eax), "2"(ecx)
|
||||
);
|
||||
}
|
||||
|
||||
void cen64_cpuid_get_vendor(char vendor[13]) {
|
||||
struct cen64_cpuid_t my_cpuid;
|
||||
|
||||
cen64_cpuid(0, 0, &my_cpuid);
|
||||
|
||||
memcpy(vendor + 0, &my_cpuid.ebx, sizeof(my_cpuid.ebx));
|
||||
memcpy(vendor + 4, &my_cpuid.edx, sizeof(my_cpuid.edx));
|
||||
memcpy(vendor + 8, &my_cpuid.ecx, sizeof(my_cpuid.ecx));
|
||||
vendor[sizeof(vendor) - 1] = '\0';
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
#define CEN64_OS_POSIX_THREAD
|
||||
#include "common.h"
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
|
||||
#define CEN64_THREAD_RETURN_TYPE void*
|
||||
#define CEN64_THREAD_RETURN_VAL NULL
|
||||
|
@ -33,6 +34,14 @@ static inline int cen64_thread_create(cen64_thread *t,
|
|||
return pthread_create(t, NULL, f, arg);
|
||||
}
|
||||
|
||||
//
|
||||
// Returns a pointer to the currently executing thread.
|
||||
//
|
||||
static inline int cen64_thread_get_current(cen64_thread *t) {
|
||||
*t = pthread_self();
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Join a thread created with cen64_thread_create. Use this to
|
||||
// effectively "free" the resources acquired for the thread.
|
||||
|
@ -41,6 +50,24 @@ static inline int cen64_thread_join(cen64_thread *t) {
|
|||
return pthread_join(*t, NULL);
|
||||
}
|
||||
|
||||
//
|
||||
// Set the affinity of a thread to the CPU mask given by mask.
|
||||
// Assumes the host system has <= 32 CPUs, but good enough for now.
|
||||
//
|
||||
static inline int cen64_thread_setaffinity(cen64_thread *t, uint32_t mask) {
|
||||
cpu_set_t cpuset;
|
||||
unsigned i;
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
|
||||
for (i = 0; mask; i++, mask >>= 1) {
|
||||
if (mask & 0x1)
|
||||
CPU_SET(i, &cpuset);
|
||||
}
|
||||
|
||||
return pthread_setaffinity_np(*t, sizeof(cpuset), &cpuset);
|
||||
}
|
||||
|
||||
//
|
||||
// Mutexes.
|
||||
//
|
||||
|
|
35
os/winapi/cpuid.c
Normal file
35
os/winapi/cpuid.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
//
|
||||
// os/winapi/cpuid.c
|
||||
//
|
||||
// Functions for calling cpuid on x86.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "os/cpuid.h"
|
||||
#include <intrin.h>
|
||||
|
||||
|
||||
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid) {
|
||||
int cpuInfo[4];
|
||||
|
||||
__cpuidex(cpuInfo, eax, ecx);
|
||||
|
||||
cpuid->eax = cpuInfo[0];
|
||||
cpuid->ebx = cpuInfo[1];
|
||||
cpuid->ecx = cpuInfo[2];
|
||||
cpuid->edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
void cen64_cpuid_get_vendor(char vendor[13]) {
|
||||
int cpuInfo[4];
|
||||
|
||||
__cpuidex(cpuInfo, 0, 0);
|
||||
|
||||
memcpy(vendor + 0, cpuInfo + 1, sizeof(*cpuInfo));
|
||||
memcpy(vendor + 4, cpuInfo + 3, sizeof(*cpuInfo));
|
||||
memcpy(vendor + 8, cpuInfo + 2, sizeof(*cpuInfo));
|
||||
vendor[sizeof(vendor) - 1] = '\0';
|
||||
}
|
||||
|
|
@ -36,6 +36,14 @@ static inline int cen64_thread_create(cen64_thread *t,
|
|||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Returns a pointer to the currently executing thread.
|
||||
//
|
||||
static inline int cen64_thread_get_current(cen64_thread *t) {
|
||||
*t = GetCurrentThread();
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Join a thread created with cen64_thread_create. Use this to
|
||||
// effectively "free" the resources acquired for the thread.
|
||||
|
@ -47,6 +55,15 @@ static inline int cen64_thread_join(cen64_thread *t) {
|
|||
return !CloseHandle(*t);
|
||||
}
|
||||
|
||||
//
|
||||
// Set the affinity of a thread to the CPU mask given by mask.
|
||||
// Assumes the host system has <= 32 CPUs, but good enough for now.
|
||||
//
|
||||
static inline int cen64_thread_setaffinity(cen64_thread *t, uint32_t mask) {
|
||||
DWORD winapi_mask = mask;
|
||||
return !SetThreadAffinityMask(*t, &winapi_mask);
|
||||
}
|
||||
|
||||
//
|
||||
// Mutexes.
|
||||
//
|
||||
|
|
Loading…
Reference in a new issue