daedalus/Source/SysPSP/DynaRec/DynaRecStubs.S
2013-02-26 13:49:33 +00:00

807 lines
21 KiB
ArmAsm

#include "as_reg_compat.h"
//The top two defines needs to be adjusted depending on how gCPUState struct is formated in CPU.h!! //Corn
//
#define _C0_Count (0x100 + 9 * 4) //CPU_Control_base + 9*8(64bit regs) or 9*4(32bit regs)
#define _AuxBase 0x280 //Base pointer to Aux regs
#define _CurrentPC (_AuxBase + 0x00)
#define _TargetPC (_AuxBase + 0x04)
#define _Delay (_AuxBase + 0x08)
#define _StuffToDo (_AuxBase + 0x0c)
#define _MultLo (_AuxBase + 0x10)
#define _MultHi (_AuxBase + 0x18)
#define _Temp1 (_AuxBase + 0x20)
#define _Temp2 (_AuxBase + 0x24)
#define _Temp3 (_AuxBase + 0x28)
#define _Temp4 (_AuxBase + 0x2C)
#define _Events (_AuxBase + 0x30)
.set noat
.extern HandleException_extern
.extern CPU_UpdateCounter
.extern IndirectExitMap_Lookup
.extern g_MemoryLookupTableReadForDynarec
.extern Write32BitsForDynaRec
.extern Write16BitsForDynaRec
.extern Write8BitsForDynaRec
.extern CPU_HANDLE_COUNT_INTERRUPT
.global _EnterDynaRec
.global _ReturnFromDynaRec
.global _DirectExitCheckNoDelay
.global _DirectExitCheckDelay
.global _IndirectExitCheck
.global _ReturnFromDynaRecIfStuffToDo
.global _DaedalusICacheInvalidate
.global _ReadBitsDirect_u8
.global _ReadBitsDirect_s8
.global _ReadBitsDirect_u16
.global _ReadBitsDirect_s16
.global _ReadBitsDirect_u32
.global _ReadBitsDirectBD_u8
.global _ReadBitsDirectBD_s8
.global _ReadBitsDirectBD_u16
.global _ReadBitsDirectBD_s16
.global _ReadBitsDirectBD_u32
.global _WriteBitsDirect_u32
.global _WriteBitsDirect_u16
.global _WriteBitsDirect_u8
.global _WriteBitsDirectBD_u32
.global _WriteBitsDirectBD_u16
.global _WriteBitsDirectBD_u8
.global _FloatToDouble
.global _DoubleToFloat
.global _printf_asm
.global _DMULTU
.global _DMULT
.global _DDIVU
.global _DDIV
.data
exit_dynarec_text:
.asciiz "Exiting dynarec (PC is %08x StuffToDo is 0x%x)\n"
.text
.set push
.set noreorder
#######################################################################################
# Invalidate a1 bytes of icache from a0
# a0 - the base address of the memory to invalidate in the icache
# a1 - the number of bytes to invalidate
_DaedalusICacheInvalidate:
addiu $v0, $0, 0xffc0 # v0 = ~63
# Store base+size(-1) in a1
addu $a1, $a1, $a0
addiu $a1, $a1, -1
# Truncate 'start' pointer down to nearest 64 bytes boundary
and $a0, $a0, $v0
# Truncate 'end' pointer down to nearest 64 bytes boundary
and $a1, $a1, $v0
# Do while current < end
invalidate_next:
cache 8, 0($a0) # 8 is for icache invalidate
bne $a0, $a1, invalidate_next # Keep looping until
addiu $a0, $a0, 64
jr $ra
nop
#######################################################################################
#
# a0 - fragment function to enter
# a1 - gCPUState base pointer
# a2 - Memory base offset (i.e. g_pu8RamBase - 0x80000000 )
# a3 - Memory upper bound (e.g. 0x80400000)
#
#
_EnterDynaRec:
addiu $sp, $sp, -40 # Push return address on the stack
sw $ra, 0($sp)
sw $fp, 4($sp) # Store s8 - we use this as our base pointer
sw $s0, 8($sp)
sw $s1, 12($sp)
sw $s2, 16($sp)
sw $s3, 20($sp)
sw $s4, 24($sp)
sw $s5, 28($sp)
sw $s6, 32($sp)
sw $s7, 36($sp)
or $s7, $a2, $0 # Read address table
or $s6, $a3, $0 # Upper bound
jr $a0 # Jump to our target function
or $fp, $a1, $0 # set frame pointer to Emulated CPU registers
#######################################################################################
# Check gCPUState.StuffToDo. If non-zero, performs any required handling then exits
# the dynarec system. If the flag is zero this just returns immediately.
# NB: As a significant optinisation the dynarec system patches the first two ops
# of this function to return immediately in the case the gCPUState.StuffToDo is not set.
#
_ReturnFromDynaRecIfStuffToDo:
# Sanity checking logic
# lw $v0, _StuffToDo($fp) # StuffToDo
# bne $v0, $0, exception_exit
# nop
# jr $ra # Just return back to caller
# nop
#exception_exit:
jal HandleException_extern
nop
# Fall through to this
# j _ReturnFromDynaRec
# nop
#######################################################################################
#
_ReturnFromDynaRec:
lw $ra, 0($sp) # Restore our return address
lw $fp, 4($sp) # And s8
lw $s0, 8($sp)
lw $s1, 12($sp)
lw $s2, 16($sp)
lw $s3, 20($sp)
lw $s4, 24($sp)
lw $s5, 28($sp)
lw $s6, 32($sp)
lw $s7, 36($sp)
jr $ra
addiu $sp, $sp, +40
#######################################################################################
# Check if we need to exit the dynarec system and jump out as necessary.
# If we are ok to continue, this returns control to the calling code.
# Calling this function updates the COUNT register with the specified number of cycles.
# a0 - instructions executed
# a1 - exit pc
_DirectExitCheckNoDelay:
# The code below corresponds to CPU_UpdateCounter
lw $v0, _C0_Count($fp) # COUNT register
lw $v1, _Events($fp) # Events[0].mCount
addu $v0, $v0, $a0 # COUNT + ops_executed
sw $v0, _C0_Count($fp) # COUNT = COUNT + ops_executed
sw $a1, _CurrentPC($fp) # CurrentPC
sw $0, _Delay($fp) # Delay = NO_DELAY
subu $v1, $v1, $a0 # Events[0].mCount - ops_executed
blez $v1, _DirectExitCheckCheckCount
sw $v1, _Events($fp) # Events[0].mCount = Events[0].mCount - ops_executed
jr $ra # Return back to caller
nop
#######################################################################################
# Check if we need to exit the dynarec system and jump out as necessary.
# If we are ok to continue, this returns control to the calling code.
# Calling this function updates the COUNT register with the specified number of cycles.
# a0 - instructions executed
# a1 - exit pc
# a2 - target pc
_DirectExitCheckDelay:
# The code below corresponds to CPU_UpdateCounter
lw $v0, _C0_Count($fp) # COUNT register
lw $v1, _Events($fp) # Events[0].mCount
addu $v0, $v0, $a0 # COUNT + ops_executed
sw $v0, _C0_Count($fp) # COUNT = COUNT + ops_executed
sw $a1, _CurrentPC($fp) # CurrentPC
sw $a2, _TargetPC($fp) # TargetPC
li $v0, 1 # EXEC_DELAY
sw $v0, _Delay($fp) # Delay
subu $v1, $v1, $a0 # Events[0].mCount - ops_executed
blez $v1, _DirectExitCheckCheckCount
sw $v1, _Events($fp) # Events[0].mCount = Events[0].mCount - ops_executed
jr $ra
nop
#######################################################################################
# Utility routine for _DirectExitCheckXX.
#
_DirectExitCheckCheckCount:
or $s0, $ra, $0 # Keep track of return addresss
jal CPU_HANDLE_COUNT_INTERRUPT
nop
lw $v0, _StuffToDo($fp) # StuffToDo
bne $v0, $0, _ReturnFromDynaRec
nop
jr $s0 # Return back to caller
nop
#######################################################################################
# Update counter. If StuffToDo flags is clear on return,
# a0 - instructions executed
# a1 - CIndirectExitMap pointer
# a2 - exit pc (exit delay is always NO_DELAY)
_IndirectExitCheck:
or $s0, $a1, $0 # Keep track of map pointer
or $s1, $a2, $0 # and the exit pc
# Can avoid these until _ReturnFromDynaRec?
sw $a2, _CurrentPC($fp) # CurrentPC
jal CPU_UpdateCounter # a0 holds instructions executed
sw $0, _Delay($fp) # Delay (NO_DELAY)
lw $v0, _StuffToDo($fp) # StuffToDo
bne $v0, $0, _ReturnFromDynaRec
nop
or $a0, $s0, $0 # p_map
jal IndirectExitMap_Lookup
or $a1, $s1, $0 # exit_pc
# $v0 holds pointer to indirect target. If it's 0, it means it's not compiled yet
beq $v0, $0, _ReturnFromDynaRec
nop
jr $v0
nop
#######################################################################################
# u32 ret = u32( *(T *)FuncTableReadAddress( address ) );
# _ReturnFromDynaRecIfStuffToDo( 0 );
# return ret;
#
# a0 address (pre-swizzled)
# a1 current_pc
#
.macro READ_BITS function, load_instruction
\function:
sw $ra, _Temp1($fp) # Temp storage
srl $v1, $a0, 0x12
sll $v1, $v1, 0x3 # * 8 to index the two pointer struct
la $v0, g_MemoryLookupTableReadForDynarec
lw $v0, 0($v0) # The above is a pointer to our table
addu $v1, $v1, $v0
lw $v0, 4($v1) #offset 4 to get the second pointer
jalr $v0
sw $a1, _CurrentPC($fp) # CurrentPC
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
\load_instruction $v0, 0($v0) # e.g. lbu, lhu, lw etc
.endm
.macro READ_BITS_BD function, load_instruction
\function:
sw $ra, _Temp1($fp) # Temp storage
sw $a1, _CurrentPC($fp) # CurrentPC
srl $v1, $a0, 0x12
sll $v1, $v1, 0x3 # * 8 to index the two pointer struct
la $v0, g_MemoryLookupTableReadForDynarec
lw $v0, 0($v0) # The above is a pointer to our table
addu $v1, $v1, $v0
lw $v0, 4($v1) #offset 4 to get the second pointer
li $v1, 1 # EXEC_DELAY
jalr $v0
sw $v1, _Delay($fp) # Delay
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
sw $0, _Delay($fp) # Delay <- NO_DELAY
lw $ra, _Temp1($fp) # Temp storage
jr $ra
\load_instruction $v0, 0($v0) # e.g. lbu, lhu, lw etc
.endm
READ_BITS _ReadBitsDirect_u8, lbu
READ_BITS _ReadBitsDirect_s8, lb
READ_BITS _ReadBitsDirect_u16, lhu
READ_BITS _ReadBitsDirect_s16, lh
READ_BITS _ReadBitsDirect_u32, lw
READ_BITS_BD _ReadBitsDirectBD_u8, lbu
READ_BITS_BD _ReadBitsDirectBD_s8, lb
READ_BITS_BD _ReadBitsDirectBD_u16, lhu
READ_BITS_BD _ReadBitsDirectBD_s16, lh
READ_BITS_BD _ReadBitsDirectBD_u32, lw
#######################################################################################
# These functions handle writing a value out to memory.
# They set up the PC (and optionally the branch delay flag for the BD versions)
# After the memory has been written, _ReturnFromDynaRecIfStuffToDo is called,
# which returns control back to the interpreter in the case that an exception
# was triggered.
#
# a0 address (pre-swizzled)
# a1 value
# a2 current_pc
#
_WriteBitsDirect_u32:
sw $ra, _Temp1($fp) # Temp storage
jal Write32BitsForDynaRec
sw $a2, _CurrentPC($fp) # CurrentPC
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
nop
_WriteBitsDirectBD_u32:
sw $ra, _Temp1($fp) # Temp storage
sw $a2, _CurrentPC($fp) # CurrentPC
li $v1, 1 # EXEC_DELAY
jal Write32BitsForDynaRec
sw $v1, _Delay($fp) # Delay
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
sw $0, _Delay($fp) # Delay <- NO_DELAY
_WriteBitsDirect_u16:
sw $ra, _Temp1($fp) # Temp storage
jal Write16BitsForDynaRec
sw $a2, _CurrentPC($fp) # CurrentPC
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
nop
_WriteBitsDirectBD_u16:
sw $ra, _Temp1($fp) # Temp storage
sw $a2, _CurrentPC($fp) # CurrentPC
li $v1, 1 # EXEC_DELAY
jal Write16BitsForDynaRec
sw $v1, _Delay($fp) # Delay
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
sw $0, _Delay($fp) # Delay <- NO_DELAY
_WriteBitsDirect_u8:
sw $ra, _Temp1($fp) # Temp storage
jal Write8BitsForDynaRec
sw $a2, _CurrentPC($fp) # CurrentPC
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
nop
_WriteBitsDirectBD_u8:
sw $ra, _Temp1($fp) # Temp storage
sw $a2, _CurrentPC($fp) # CurrentPC
li $v1, 1 # EXEC_DELAY
jal Write8BitsForDynaRec
sw $v1, _Delay($fp) # Delay
jal _ReturnFromDynaRecIfStuffToDo
move $a0,$0
lw $ra, _Temp1($fp) # Temp storage
jr $ra
sw $0, _Delay($fp) # Delay <- NO_DELAY
#######################################################################################
/**
* convert float to double
* double FloatToDouble(float a);
*
* input: a0
* output: v0,v1
* clobber: a0,a1
*/
_FloatToDouble:
ext $a1, $a0, 23, 8 /* a1 = (a0 >> 23) & 0xFF */
beqz $a1, ftod_denormal /* if (a1==0) goto ftod_denormal */
addiu $v0, $a1, (-0x7F+0x3FF) /* v0 = a1 - 0x7F + 0x3FF */
xori $a1, $a1, 0xFF /* a1 = a1 ^ 0xFF */
li $v1, 0x7FF /* v1 = 0x7FF */
movz $v0, $v1, $a1 /* v0 = (a1==0) ? v1 : v0 */
ext $v1, $a0, 3, 20 /* v1 = (a0 >> 3 ) & 0x00FFFFF */
ins $v1, $v0, 20, 11 /* v1 = (v1 & 0x800FFFFF) | ((v0<<20) & 0x7FF00000) */
sll $v0, $a0, 29 /* v0 = (a0 << 29) */
srl $a0, $a0, 31 /* a0 = (a0 >> 31) & 1 */
jr $ra /* return */
ins $v1, $a0, 31, 1 /* v1 = (v1 & 0x7FFFFFFF) | ((a0<<31) & 0x80000000) */
ftod_denormal:
sll $v0, $a0, 9 /* v0 = a0 << 9 */
beqzl $v0, ftod_zero /* if (v0==0) goto ftod_zero */
move $v1, $zero /* v1 = 0 */
li $v1, 0x380 /* v1 = 0x380 */
clz $a1, $v0 /* a1 = clz(v0) */
subu $v0, $v1, $a1 /* v0 = v1 - v0 = 0x380 - clz(a1) */
sllv $a1, $a0, $a1 /* a1 = a0 << a1 */
ext $v1, $a1, 2, 20 /* v1 = (a1 >> 2 ) & 0x00FFFFF */
ins $v1, $v0, 20, 11 /* v1 = (v1 & 0x800FFFFF) | ((v0<<20) & 0x7FF00000) */
sll $v0, $a1, 30 /* v0 = (a1 << 30) */
ftod_zero:
srl $a0, $a0, 31 /* a0 = (a0 >> 31) & 1 */
jr $ra /* return */
ins $v1, $a0, 31, 1 /* v1 = (v1 & 0x7FFFFFFF) | ((a0<<31) & 0x80000000) */
#######################################################################################
/**
* convert double to float
* float DoubleToFloat(double a);
* input: a0,a1
* output: v0
* clobber: v0,v1,a2,a3
*/
_DoubleToFloat:
ext $a2, $a1, 20, 11 /* a2 = (a1>>20) & 0x000007FF */
beqz $a2, dtof_zero /* if (a2==0) goto dtof_zero */
xori $a3, $a2, 0x7FF /* a3 = a2 ^ 0x7FF */
beqz $a3, dtof_naninf /* if (a3==0) goto dtof_naninf */
addiu $a3, $a2, (+0x7F-0x3FF) /* a3 = a2 + 0x7F - 0x3FF */
blez $a3, dtof_denormal /* if (a3<=0) goto dtof_denormal */
addiu $v1, $a3, -0xFE /* v1 = a3 - 0xFE */
bgtz $v1, dtof_inf /* if (v1 > 0) goto dtof_inf */
move $v0, $zero /* v0 = 0 */
srl $v0, $a0, 29 /* v0 = (a0>>29) & 0x00000007 */
ins $v0, $a1, 3, 20 /* v0 = (v0 & 0xFF800007) | ((a1 & 0FFFFF)<<3) */
beqz $v1, dtof_inf_normal /* if (v1==0) goto dtof_inf_normal */
dtof_normal:
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
dtof_normal2:
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | (v1 << 31) */
jr $ra /* return */
ins $v0, $a3, 23, 8 /* v0 = (v0 & 0x8007FFFF) | (a3 << 23) */
dtof_denormal:
sll $a2, $a1, 12 /* a2 = a1 << 12 */
srl $v0, $a2, 10 /* v0 = a2 >> 10 */
srl $a2, $a0, 30 /* a2 = a0 >> 30 */
or $v0, $v0, a2 /* v0 = v0 | a2 */
li $a2, 0x00400000 /* a2 = 0x00400000 */
or $v0, $v0, $a2 /* v0 = v0 | a2 */
subu $a2, $zero, $a3 /* a2 = zero - a3 */
sltiu $a3, $a2, 22 /* a3 = (a2 < 22) */
beqz $a3, dtof_min /* if (a3==0) goto dtof_min */
srlv $v0, $v0, $a2 /* v0 = v0 >> a2 */
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
jr $ra /* return */
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | (v1 << 31) */
dtof_zero:
sll $a2, $a1, 12 /* a2 = a1 << 12 */
or $a2, $a2, $a0 /* a2 = a2 | a0 */
dtof_min:
li $v0, 0x00000001 /* v0 = 0x00000001 */
movz $v0, $zero, $a2 /* v0 = (a2==0) ? zero : v0 */
srl $a2, $a1, 31 /* a2 = (a1 >> 31) & 1 */
jr $ra /* return */
ins $v0, $a2, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | ((a2<<31) & 0x80000000) */
dtof_inf_normal:
nor $a2, $zero, $a1 /* a2 = ~a1 */
sll $a2, $a2, 12 /* a2 = a2 << 12 */
bnez $a2, dtof_normal /* if (a2!=0) goto dtof_normal */
srl $a2, $a0, 28 /* a2 = a0 >> 28 */
sltiu $a2, $a2, 0xF /* a2 = (a2 < 0xF) */
bnez $a2, dtof_normal2 /* if (a2!=0) goto dtof_normal */
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
j dtof_inf /* goto dtof_inf */
move $v0, $zero /* v0 = 0 */
dtof_naninf:
sll $a2, $a1, 12 /* a2 = a1 << 12 */
or $a3, $a2, $a0 /* a3 = a2 | a0 */
srl $v0, $a2, 9 /* v0 = a2 >> 9 */
srl $a2, $a0, 29 /* a2 = a2 >> 29 */
or $v0, $v0, $a2 /* v0 = v0 | a2 */
sltiu $a2, $v0, 1 /* a2 = (v0 < 1) */
or $v0, $v0, $a2 /* v0 = v0 | a2 */
movz $v0, $zero, $a3 /* v0 = (a3==0) ? zero : v0 */
dtof_inf:
li $v1, 0x7F800000 /* v1 = 0x7F800000 */
or $v0, $v0, $v1 /* v0 = v0 | v1 */
srl $v1, $a1, 31 /* v1 = (a1 >> 31) & 1 */
jr $ra /* return */
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | ((v1<<31) & 0x80000000) */
#######################################################################################
/**
* context save and print value
* void printf_asm(u32 val);
* input: a0
* output: -
* clobber: -
* uses: -
*/
_printf_asm:
addiu $sp, $sp, -72 # Push on the stack
sw $ra, 0($sp)
sw $at, 4($sp)
sw $v0, 8($sp)
sw $v1, 12($sp)
sw $a0, 16($sp)
sw $a1, 20($sp)
sw $a2, 24($sp)
sw $a3, 28($sp)
sw $t0, 32($sp)
sw $t1, 36($sp)
sw $t2, 40($sp)
sw $t3, 44($sp)
sw $t4, 48($sp)
sw $t5, 52($sp)
sw $t6, 56($sp)
sw $t7, 60($sp)
sw $t8, 64($sp)
jal output_extern
sw $t9, 68($sp)
lw $ra, 0($sp)
lw $at, 4($sp)
lw $v0, 8($sp)
lw $v1, 12($sp)
lw $a0, 16($sp)
lw $a1, 20($sp)
lw $a2, 24($sp)
lw $a3, 28($sp)
lw $t0, 32($sp)
lw $t1, 36($sp)
lw $t2, 40($sp)
lw $t3, 44($sp)
lw $t4, 48($sp)
lw $t5, 52($sp)
lw $t6, 56($sp)
lw $t7, 60($sp)
lw $t8, 64($sp)
lw $t9, 68($sp)
jr $ra /* return */
addiu $sp, $sp, 72 # restore the stack
#######################################################################################
/**
* Unsigned 64bit multiply (A * B) -> 128bit result
* void _DMULTU(u32 A_LSB, u32 A_MSB, u32 B_LSB, u32 B_MSB);
* input: a0, a1, a2, a3
* output: -
* clobber: a0, a1, a2, a3, v0, v1
* uses: -
*/
_DMULTU:
multu $a2,$a0
mfhi $v0
mflo $v1
sw $v1,_MultLo+0($fp)
multu $a2,$a1
mflo $a2
addu $v1,$v0,$a2
sltu $a2,$v1,$v0
mfhi $v0
multu $a3,$a0
mflo $a0
addu $a0,$v1,$a0
sltu $v1,$a0,$v1
sw $a0,_MultLo+4($fp)
mfhi $a0
addu $a2,$a2,$a0
addu $v1,$v1,$a2
multu $a3,$a1
mflo $a1
addu $a1,$v0,$a1
addu $v1,$a1,$v1
sw $v1,_MultHi+0($fp)
sltu $v0,$a1,$v0
mfhi $a0
addu $v0,$v0,$a0
sltu $a1,$v1,$a1
addu $a1,$a1,$v0
jr $ra
sw $a1,_MultHi+4($fp)
#######################################################################################
/**
* Signed 64bit multiply (A * B) -> 64bit result (should be 128bit!)
* void _DMULT(u32 A_LSB, u32 A_MSB, u32 B_LSB, u32 B_MSB);
* input: a0, a1, a2, a3
* output: -
* clobber: a0, a1, a2, a3, v0
* uses: -
*/
_DMULT:
multu $a2,$a0
mflo $v0
sw $v0,_MultLo+0($fp)
mfhi $v0
mult $a3,$a0
mflo $a0
mult $a1,$a2
mflo $a2
addu $a0,$a0,$a2
addu $v0,$a0,$v0
sw $v0,_MultLo+4($fp)
sw $zero,_MultHi+0($fp)
jr $ra
sw $zero,_MultHi+4($fp)
#######################################################################################
/**
* Unsigned 64bit division (Num /Div) -> 64bit quotient and 32bit reminder
* void _DDIVU(u32 Num_LSB, u32 Num_MSB, u32 Div_LSB);
* input: a0, a1, a2
* output: -
* clobber: a0, a1, a2, a3, v0, v1
* uses: t0
*/
_DDIVU:
beqz $a1, DDIVU_skip1 //Check if we need a full 64bit division
sw $zero,_MultHi+4($fp) //Reminder hi
divu $a1,$a2
mflo $a3
mfhi $a1
sw $a3,_MultLo+4($fp) //Quot hi
sw $t0,_Temp1($fp) //save reg content
move $v0,$zero
move $v1,$zero
b DDIVU_skip2
li $a3,33
DDIVU_loop1:
srl $v1,$a1,0x1f
sll $a1,$a1,0x1
or $a1,$a1,$t0
sll $a0,$a0,0x1
sll $v0,$v0,0x1
bnez $v1, DDIVU_skip3
DDIVU_skip2:
sltu $t0,$a1,$a2
bnez $t0, DDIVU_skip4
DDIVU_skip3:
addiu $a3,$a3,-1
subu $a1,$a1,$a2
addiu $v0,$v0,1
DDIVU_skip4:
bnez $a3, DDIVU_loop1
srl $t0,$a0,0x1f
lw $t0,_Temp1($fp) //restore reg content
sw $v0,_MultLo+0($fp) //Quot lo
jr $ra
sw $a1,_MultHi+0($fp) //Reminder lo
DDIVU_skip1: //Do a 32bit div only
divu $a0,$a2
mflo $v0
mfhi $a1
sw $v0,_MultLo+0($fp) //Quot lo
sw $zero,_MultLo+4($fp) //Quot hi
jr $ra
sw $a1,_MultHi+0($fp) //Reminder lo
#######################################################################################
/**
* Signed 64bit division (Num / Div) -> 64bit quotient and 32bit reminder
* void _DDIV(s64 Num, s32 Div);
* input: a0, a1, a2
* output: -
* clobber: a0, a1, a2, a3, v0, v1
* uses: t0, t1, t2
*/
_DDIV:
sw $t2,_Temp3($fp) //save reg content
bgez $a1,DDIV_skip1 //Make numerator positive if needed
slt $t2, $a1, $zero //sign = value < 0 ? 1 : 0
negu $a0,$a0
negu $a1,$a1
sltu $v0,$zero,$a0
subu $a1,$a1,$v0
DDIV_skip1:
bgez $a2,DDIV_skip2 //Make dividend positive if needed
sw $t0,_Temp1($fp) //save reg content
xori $t2,$t2,0x1 //sign ^= 1
negu $a2,$a2
DDIV_skip2:
beqz $a1,DDIV_skip8 //Check if top 32bit == 0
sw $t1,_Temp2($fp) //save reg content
divu $a1,$a2 //Do long 64bit division
move $v0,$zero
move $t0,$zero
li $v1,33
mflo $t1
b DDIV_skip4
mfhi $a3
DDIV_loop1:
srl $t0,$a3,0x1f
sll $a3,$a3,0x1
or $a3,$a3,$a1
sll $a0,$a0,0x1
sll $v0,$v0,0x1
bnez $t0,DDIV_skip5
DDIV_skip4:
sltu $a1,$a3,$a2
bnez $a1,DDIV_skip6
DDIV_skip5:
addiu $v1,$v1,-1
subu $a3,$a3,$a2
addiu $v0,$v0,1
DDIV_skip6:
bnez $v1,DDIV_loop1
srl $a1,$a0,0x1f
DDIV_loop2:
beqz $t2,DDIV_skip7 //Need sign flip on result?
lw $t0,_Temp1($fp) //restore reg content
negu $v0,$v0
negu $t1,$t1
sltu $v1,$zero,$v0
subu $t1,$t1,$v1
negu $a3,$a3
DDIV_skip7: //No sign flip needed
sw $v0,_MultLo+0($fp) //Quot lo
sw $t1,_MultLo+4($fp) //Quot hi
sw $a3,_MultHi+0($fp) //Rem lo
lw $t1,_Temp2($fp) //restore reg content
jr $ra
lw $t2,_Temp3($fp) //restore reg content
DDIV_skip8: //Do short 32bit division
divu $a0,$a2
move $t1,$zero //hi part of reminder will be zero
mflo $v0
b DDIV_loop2
mfhi $a3
#######################################################################################
.set pop