device: Manually specify thread affinity.

Both Windows and Linux seem to be doing a terrible job of
scheduling the device threads optimally. Until I can think
of something more clever, manually set thread affinity.
This commit is contained in:
Tyler J. Stachecki 2016-07-13 16:41:50 -04:00
parent 8d31a56b91
commit 49960312ce
9 changed files with 190 additions and 9 deletions

View file

@ -275,6 +275,7 @@ set(OS_SOURCES
set(OS_POSIX_SOURCES
${PROJECT_SOURCE_DIR}/os/posix/alloc.c
${PROJECT_SOURCE_DIR}/os/posix/cpuid.c
${PROJECT_SOURCE_DIR}/os/posix/local_time.c
${PROJECT_SOURCE_DIR}/os/posix/main.c
${PROJECT_SOURCE_DIR}/os/posix/rom_file.c
@ -284,6 +285,7 @@ set(OS_POSIX_SOURCES
set(OS_WINAPI_SOURCES
${PROJECT_SOURCE_DIR}/os/winapi/alloc.c
${PROJECT_SOURCE_DIR}/os/winapi/cpuid.c
${PROJECT_SOURCE_DIR}/os/winapi/gl_config.c
${PROJECT_SOURCE_DIR}/os/winapi/gl_window.c
${PROJECT_SOURCE_DIR}/os/winapi/local_time.c
@ -389,7 +391,7 @@ if (DEFINED UNIX)
find_package(X11 REQUIRED)
if (${CMAKE_C_COMPILER_ID} MATCHES GNU OR ${CMAKE_C_COMPILER_ID} MATCHES Clang OR ${CMAKE_C_COMPILER_ID} MATCHES Intel)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_GNU_SOURCE")
endif (${CMAKE_C_COMPILER_ID} MATCHES GNU OR ${CMAKE_C_COMPILER_ID} MATCHES Clang OR ${CMAKE_C_COMPILER_ID} MATCHES Intel)
endif (DEFINED UNIX)

View file

@ -279,11 +279,10 @@ int validate_sha(struct rom_file *rom, const uint8_t *good_sum) {
// Spins the device until an exit request is received.
int run_device(struct cen64_device *device, bool no_video) {
cen64_thread thread;
device->running = true;
cen64_thread_get_current(&device->os_thread);
if (cen64_thread_create(&thread, run_device_thread, device)) {
if (cen64_thread_create(&device->device_thread, run_device_thread, device)) {
printf("Failed to create the main emulation thread.\n");
device_destroy(device);
return 1;
@ -293,7 +292,7 @@ int run_device(struct cen64_device *device, bool no_video) {
cen64_gl_window_thread(device);
device->running = false;
cen64_thread_join(&thread);
cen64_thread_join(&device->device_thread);
return 0;
}

View file

@ -15,6 +15,7 @@
#include "gl_window.h"
#include "os/common/rom_file.h"
#include "os/common/save_file.h"
#include "os/cpuid.h"
#include "bus/controller.h"
#include "ai/controller.h"
@ -31,6 +32,7 @@
cen64_cold int angrylion_rdp_init(struct cen64_device *device);
cen64_cold static int device_debug_spin(struct cen64_device *device);
cen64_cold static void device_schedule_threads(unsigned num_threads, cen64_thread **threads);
cen64_flatten cen64_hot static int device_multithread_spin(struct cen64_device *device);
cen64_flatten cen64_hot static int device_spin(struct cen64_device *device);
@ -138,12 +140,16 @@ void device_exit(struct bus_controller *bus) {
// Create a device and proceed to the main loop.
void device_run(struct cen64_device *device) {
fpu_state_t saved_fpu_state;
char vendor[13];
// TODO: Preserve host registers pinned to the device.
saved_fpu_state = fpu_get_state();
vr4300_cp1_init(&device->vr4300);
rsp_late_init(&device->rsp);
// Set thread affinities for Intel CPUs.
cen64_cpuid_get_vendor(vendor);
// Spin the device until we return (from setjmp).
if (unlikely(device->debug_sfd > 0))
device_debug_spin(device);
@ -226,10 +232,29 @@ CEN64_THREAD_RETURN_TYPE run_vr4300_thread(void *opaque) {
return CEN64_THREAD_RETURN_VAL;
}
//
// Set affinity of threads to maximize performance.
// There should be at least 3 threads, possibly 4:
//
// 0: device/vr4300 thread
// 1: os thread
// 2: rdp thread
// 3: (if present) rcp thread
//
cen64_cold static void device_schedule_threads(
unsigned num_threads, cen64_thread **threads) {
cen64_thread_setaffinity(threads[0], 1 << 0);
cen64_thread_setaffinity(threads[1], 1 << 1);
cen64_thread_setaffinity(threads[2], 1 << 2);
if (num_threads > 3)
cen64_thread_setaffinity(threads[3], 1 << 3);
}
// Continually cycles the device until setjmp returns.
int device_multithread_spin(struct cen64_device *device) {
cen64_thread vr4300_thread;
cen64_thread *device_threads[4];
device->other_thread_is_waiting = false;
if (cen64_mutex_create(&device->sync_mutex)) {
@ -243,16 +268,23 @@ int device_multithread_spin(struct cen64_device *device) {
return 1;
}
if (cen64_thread_create(&vr4300_thread, run_vr4300_thread, device)) {
if (cen64_thread_create(&device->vr4300_thread, run_vr4300_thread, device)) {
printf("Failed to create the VR4300 thread.\n");
cen64_cv_destroy(&device->sync_cv);
cen64_mutex_destroy(&device->sync_mutex);
return 1;
}
device_threads[0] = &device->vr4300_thread;
device_threads[1] = &device->os_thread;
device_threads[2] = &device->rdp.rdp_thread;
device_threads[3] = &device->device_thread;
device_schedule_threads(4, device_threads);
run_rcp_thread(device);
cen64_thread_join(&vr4300_thread);
cen64_thread_join(&device->vr4300_thread);
cen64_cv_destroy(&device->sync_cv);
cen64_mutex_destroy(&device->sync_mutex);
return 0;
@ -260,9 +292,17 @@ int device_multithread_spin(struct cen64_device *device) {
// Continually cycles the device until setjmp returns.
int device_spin(struct cen64_device *device) {
cen64_thread *device_threads[3];
if (setjmp(device->bus.unwind_data))
return 1;
device_threads[0] = &device->device_thread;
device_threads[1] = &device->os_thread;
device_threads[2] = &device->rdp.rdp_thread;
device_schedule_threads(3, device_threads);
while (likely(device->running)) {
unsigned i;

View file

@ -48,6 +48,11 @@ struct cen64_device {
bool multithread;
bool other_thread_is_waiting;
cen64_thread device_thread;
cen64_thread os_thread;
cen64_thread vr4300_thread;
cen64_mutex sync_mutex;
cen64_cv sync_cv;

25
os/cpuid.h Normal file
View file

@ -0,0 +1,25 @@
//
// os/cpuid.h
//
// Functions for calling cpuid on x86.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#ifndef __os_cpuid_h__
#define __os_cpuid_h__
#include "common.h"
struct cen64_cpuid_t {
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
};
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid);
void cen64_cpuid_get_vendor(char vendor[13]);
#endif

31
os/posix/cpuid.c Normal file
View file

@ -0,0 +1,31 @@
//
// os/posix/cpuid.c
//
// Functions for calling cpuid on x86.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#include "os/cpuid.h"
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid) {
__asm__ __volatile__(
"cpuid\n\t"
: "=a"(cpuid->eax), "=b"(cpuid->ebx), "=c"(cpuid->ecx), "=d"(cpuid->edx)
: "0"(eax), "2"(ecx)
);
}
void cen64_cpuid_get_vendor(char vendor[13]) {
struct cen64_cpuid_t my_cpuid;
cen64_cpuid(0, 0, &my_cpuid);
memcpy(vendor + 0, &my_cpuid.ebx, sizeof(my_cpuid.ebx));
memcpy(vendor + 4, &my_cpuid.edx, sizeof(my_cpuid.edx));
memcpy(vendor + 8, &my_cpuid.ecx, sizeof(my_cpuid.ecx));
vendor[sizeof(vendor) - 1] = '\0';
}

View file

@ -12,6 +12,7 @@
#define CEN64_OS_POSIX_THREAD
#include "common.h"
#include <pthread.h>
#include <sched.h>
#define CEN64_THREAD_RETURN_TYPE void*
#define CEN64_THREAD_RETURN_VAL NULL
@ -33,6 +34,14 @@ static inline int cen64_thread_create(cen64_thread *t,
return pthread_create(t, NULL, f, arg);
}
//
// Returns a pointer to the currently executing thread.
//
static inline int cen64_thread_get_current(cen64_thread *t) {
*t = pthread_self();
return 0;
}
//
// Join a thread created with cen64_thread_create. Use this to
// effectively "free" the resources acquired for the thread.
@ -41,6 +50,24 @@ static inline int cen64_thread_join(cen64_thread *t) {
return pthread_join(*t, NULL);
}
//
// Set the affinity of a thread to the CPU mask given by mask.
// Assumes the host system has <= 32 CPUs, but good enough for now.
//
static inline int cen64_thread_setaffinity(cen64_thread *t, uint32_t mask) {
cpu_set_t cpuset;
unsigned i;
CPU_ZERO(&cpuset);
for (i = 0; mask; i++, mask >>= 1) {
if (mask & 0x1)
CPU_SET(i, &cpuset);
}
return pthread_setaffinity_np(*t, sizeof(cpuset), &cpuset);
}
//
// Mutexes.
//

35
os/winapi/cpuid.c Normal file
View file

@ -0,0 +1,35 @@
//
// os/winapi/cpuid.c
//
// Functions for calling cpuid on x86.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#include "os/cpuid.h"
#include <intrin.h>
void cen64_cpuid(uint32_t eax, uint32_t ecx, struct cen64_cpuid_t *cpuid) {
int cpuInfo[4];
__cpuidex(cpuInfo, eax, ecx);
cpuid->eax = cpuInfo[0];
cpuid->ebx = cpuInfo[1];
cpuid->ecx = cpuInfo[2];
cpuid->edx = cpuInfo[3];
}
void cen64_cpuid_get_vendor(char vendor[13]) {
int cpuInfo[4];
__cpuidex(cpuInfo, 0, 0);
memcpy(vendor + 0, cpuInfo + 1, sizeof(*cpuInfo));
memcpy(vendor + 4, cpuInfo + 3, sizeof(*cpuInfo));
memcpy(vendor + 8, cpuInfo + 2, sizeof(*cpuInfo));
vendor[sizeof(vendor) - 1] = '\0';
}

View file

@ -36,6 +36,14 @@ static inline int cen64_thread_create(cen64_thread *t,
return 1;
}
//
// Returns a pointer to the currently executing thread.
//
static inline int cen64_thread_get_current(cen64_thread *t) {
*t = GetCurrentThread();
return 0;
}
//
// Join a thread created with cen64_thread_create. Use this to
// effectively "free" the resources acquired for the thread.
@ -47,6 +55,15 @@ static inline int cen64_thread_join(cen64_thread *t) {
return !CloseHandle(*t);
}
//
// Set the affinity of a thread to the CPU mask given by mask.
// Assumes the host system has <= 32 CPUs, but good enough for now.
//
static inline int cen64_thread_setaffinity(cen64_thread *t, uint32_t mask) {
DWORD winapi_mask = mask;
return !SetThreadAffinityMask(*t, &winapi_mask);
}
//
// Mutexes.
//