mirror of
https://github.com/DaedalusX64/daedalus.git
synced 2025-04-02 10:21:48 -04:00
git-svn-id: https://subversion.assembla.com/svn/Daedalusx64/trunk@1099 42e9bfbe-799a-4a2d-bad1-236e862a387a
807 lines
21 KiB
ArmAsm
807 lines
21 KiB
ArmAsm
#include "as_reg_compat.h"
|
|
|
|
//The top two defines needs to be adjusted depending on how gCPUState struct is formated in CPU.h!! //Corn
|
|
//
|
|
#define _C0_Count (0x100 + 9 * 4) //CPU_Control_base + 9*8(64bit regs) or 9*4(32bit regs)
|
|
#define _AuxBase 0x280 //Base pointer to Aux regs
|
|
#define _CurrentPC (_AuxBase + 0x00)
|
|
#define _TargetPC (_AuxBase + 0x04)
|
|
#define _Delay (_AuxBase + 0x08)
|
|
#define _StuffToDo (_AuxBase + 0x0c)
|
|
#define _MultLo (_AuxBase + 0x10)
|
|
#define _MultHi (_AuxBase + 0x18)
|
|
#define _Temp1 (_AuxBase + 0x20)
|
|
#define _Temp2 (_AuxBase + 0x24)
|
|
#define _Temp3 (_AuxBase + 0x28)
|
|
#define _Temp4 (_AuxBase + 0x2C)
|
|
#define _Events (_AuxBase + 0x30)
|
|
|
|
.set noat
|
|
|
|
.extern HandleException_extern
|
|
.extern CPU_UpdateCounter
|
|
.extern IndirectExitMap_Lookup
|
|
.extern g_MemoryLookupTableReadForDynarec
|
|
.extern Write32BitsForDynaRec
|
|
.extern Write16BitsForDynaRec
|
|
.extern Write8BitsForDynaRec
|
|
.extern CPU_HANDLE_COUNT_INTERRUPT
|
|
|
|
|
|
.global _EnterDynaRec
|
|
.global _ReturnFromDynaRec
|
|
.global _DirectExitCheckNoDelay
|
|
.global _DirectExitCheckDelay
|
|
.global _IndirectExitCheck
|
|
.global _ReturnFromDynaRecIfStuffToDo
|
|
.global _DaedalusICacheInvalidate
|
|
|
|
.global _ReadBitsDirect_u8
|
|
.global _ReadBitsDirect_s8
|
|
.global _ReadBitsDirect_u16
|
|
.global _ReadBitsDirect_s16
|
|
.global _ReadBitsDirect_u32
|
|
|
|
.global _ReadBitsDirectBD_u8
|
|
.global _ReadBitsDirectBD_s8
|
|
.global _ReadBitsDirectBD_u16
|
|
.global _ReadBitsDirectBD_s16
|
|
.global _ReadBitsDirectBD_u32
|
|
|
|
.global _WriteBitsDirect_u32
|
|
.global _WriteBitsDirect_u16
|
|
.global _WriteBitsDirect_u8
|
|
.global _WriteBitsDirectBD_u32
|
|
.global _WriteBitsDirectBD_u16
|
|
.global _WriteBitsDirectBD_u8
|
|
|
|
.global _FloatToDouble
|
|
.global _DoubleToFloat
|
|
.global _printf_asm
|
|
.global _DMULTU
|
|
.global _DMULT
|
|
.global _DDIVU
|
|
.global _DDIV
|
|
|
|
.data
|
|
exit_dynarec_text:
|
|
.asciiz "Exiting dynarec (PC is %08x StuffToDo is 0x%x)\n"
|
|
.text
|
|
.set push
|
|
.set noreorder
|
|
|
|
#######################################################################################
|
|
# Invalidate a1 bytes of icache from a0
|
|
# a0 - the base address of the memory to invalidate in the icache
|
|
# a1 - the number of bytes to invalidate
|
|
_DaedalusICacheInvalidate:
|
|
|
|
addiu $v0, $0, 0xffc0 # v0 = ~63
|
|
|
|
# Store base+size(-1) in a1
|
|
addu $a1, $a1, $a0
|
|
addiu $a1, $a1, -1
|
|
|
|
# Truncate 'start' pointer down to nearest 64 bytes boundary
|
|
and $a0, $a0, $v0
|
|
|
|
# Truncate 'end' pointer down to nearest 64 bytes boundary
|
|
and $a1, $a1, $v0
|
|
|
|
# Do while current < end
|
|
invalidate_next:
|
|
cache 8, 0($a0) # 8 is for icache invalidate
|
|
bne $a0, $a1, invalidate_next # Keep looping until
|
|
addiu $a0, $a0, 64
|
|
|
|
jr $ra
|
|
nop
|
|
|
|
#######################################################################################
|
|
#
|
|
# a0 - fragment function to enter
|
|
# a1 - gCPUState base pointer
|
|
# a2 - Memory base offset (i.e. g_pu8RamBase - 0x80000000 )
|
|
# a3 - Memory upper bound (e.g. 0x80400000)
|
|
#
|
|
#
|
|
_EnterDynaRec:
|
|
addiu $sp, $sp, -40 # Push return address on the stack
|
|
sw $ra, 0($sp)
|
|
sw $fp, 4($sp) # Store s8 - we use this as our base pointer
|
|
sw $s0, 8($sp)
|
|
sw $s1, 12($sp)
|
|
sw $s2, 16($sp)
|
|
sw $s3, 20($sp)
|
|
sw $s4, 24($sp)
|
|
sw $s5, 28($sp)
|
|
sw $s6, 32($sp)
|
|
sw $s7, 36($sp)
|
|
|
|
or $s7, $a2, $0 # Read address table
|
|
or $s6, $a3, $0 # Upper bound
|
|
|
|
jr $a0 # Jump to our target function
|
|
or $fp, $a1, $0 # set frame pointer to Emulated CPU registers
|
|
|
|
#######################################################################################
|
|
# Check gCPUState.StuffToDo. If non-zero, performs any required handling then exits
|
|
# the dynarec system. If the flag is zero this just returns immediately.
|
|
# NB: As a significant optinisation the dynarec system patches the first two ops
|
|
# of this function to return immediately in the case the gCPUState.StuffToDo is not set.
|
|
#
|
|
_ReturnFromDynaRecIfStuffToDo:
|
|
|
|
# Sanity checking logic
|
|
# lw $v0, _StuffToDo($fp) # StuffToDo
|
|
# bne $v0, $0, exception_exit
|
|
# nop
|
|
# jr $ra # Just return back to caller
|
|
# nop
|
|
#exception_exit:
|
|
|
|
jal HandleException_extern
|
|
nop
|
|
|
|
# Fall through to this
|
|
# j _ReturnFromDynaRec
|
|
# nop
|
|
|
|
#######################################################################################
|
|
#
|
|
_ReturnFromDynaRec:
|
|
lw $ra, 0($sp) # Restore our return address
|
|
lw $fp, 4($sp) # And s8
|
|
lw $s0, 8($sp)
|
|
lw $s1, 12($sp)
|
|
lw $s2, 16($sp)
|
|
lw $s3, 20($sp)
|
|
lw $s4, 24($sp)
|
|
lw $s5, 28($sp)
|
|
lw $s6, 32($sp)
|
|
lw $s7, 36($sp)
|
|
jr $ra
|
|
addiu $sp, $sp, +40
|
|
|
|
#######################################################################################
|
|
# Check if we need to exit the dynarec system and jump out as necessary.
|
|
# If we are ok to continue, this returns control to the calling code.
|
|
# Calling this function updates the COUNT register with the specified number of cycles.
|
|
# a0 - instructions executed
|
|
# a1 - exit pc
|
|
_DirectExitCheckNoDelay:
|
|
|
|
# The code below corresponds to CPU_UpdateCounter
|
|
lw $v0, _C0_Count($fp) # COUNT register
|
|
lw $v1, _Events($fp) # Events[0].mCount
|
|
|
|
addu $v0, $v0, $a0 # COUNT + ops_executed
|
|
sw $v0, _C0_Count($fp) # COUNT = COUNT + ops_executed
|
|
|
|
sw $a1, _CurrentPC($fp) # CurrentPC
|
|
sw $0, _Delay($fp) # Delay = NO_DELAY
|
|
|
|
subu $v1, $v1, $a0 # Events[0].mCount - ops_executed
|
|
blez $v1, _DirectExitCheckCheckCount
|
|
sw $v1, _Events($fp) # Events[0].mCount = Events[0].mCount - ops_executed
|
|
|
|
jr $ra # Return back to caller
|
|
nop
|
|
|
|
#######################################################################################
|
|
# Check if we need to exit the dynarec system and jump out as necessary.
|
|
# If we are ok to continue, this returns control to the calling code.
|
|
# Calling this function updates the COUNT register with the specified number of cycles.
|
|
# a0 - instructions executed
|
|
# a1 - exit pc
|
|
# a2 - target pc
|
|
_DirectExitCheckDelay:
|
|
|
|
# The code below corresponds to CPU_UpdateCounter
|
|
lw $v0, _C0_Count($fp) # COUNT register
|
|
lw $v1, _Events($fp) # Events[0].mCount
|
|
|
|
addu $v0, $v0, $a0 # COUNT + ops_executed
|
|
sw $v0, _C0_Count($fp) # COUNT = COUNT + ops_executed
|
|
|
|
sw $a1, _CurrentPC($fp) # CurrentPC
|
|
sw $a2, _TargetPC($fp) # TargetPC
|
|
li $v0, 1 # EXEC_DELAY
|
|
sw $v0, _Delay($fp) # Delay
|
|
|
|
subu $v1, $v1, $a0 # Events[0].mCount - ops_executed
|
|
blez $v1, _DirectExitCheckCheckCount
|
|
sw $v1, _Events($fp) # Events[0].mCount = Events[0].mCount - ops_executed
|
|
|
|
jr $ra
|
|
nop
|
|
|
|
#######################################################################################
|
|
# Utility routine for _DirectExitCheckXX.
|
|
#
|
|
_DirectExitCheckCheckCount:
|
|
or $s0, $ra, $0 # Keep track of return addresss
|
|
jal CPU_HANDLE_COUNT_INTERRUPT
|
|
nop
|
|
|
|
lw $v0, _StuffToDo($fp) # StuffToDo
|
|
bne $v0, $0, _ReturnFromDynaRec
|
|
nop
|
|
jr $s0 # Return back to caller
|
|
nop
|
|
|
|
|
|
#######################################################################################
|
|
# Update counter. If StuffToDo flags is clear on return,
|
|
# a0 - instructions executed
|
|
# a1 - CIndirectExitMap pointer
|
|
# a2 - exit pc (exit delay is always NO_DELAY)
|
|
_IndirectExitCheck:
|
|
or $s0, $a1, $0 # Keep track of map pointer
|
|
or $s1, $a2, $0 # and the exit pc
|
|
|
|
# Can avoid these until _ReturnFromDynaRec?
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
jal CPU_UpdateCounter # a0 holds instructions executed
|
|
sw $0, _Delay($fp) # Delay (NO_DELAY)
|
|
|
|
lw $v0, _StuffToDo($fp) # StuffToDo
|
|
bne $v0, $0, _ReturnFromDynaRec
|
|
nop
|
|
|
|
or $a0, $s0, $0 # p_map
|
|
jal IndirectExitMap_Lookup
|
|
or $a1, $s1, $0 # exit_pc
|
|
|
|
# $v0 holds pointer to indirect target. If it's 0, it means it's not compiled yet
|
|
beq $v0, $0, _ReturnFromDynaRec
|
|
nop
|
|
|
|
jr $v0
|
|
nop
|
|
|
|
|
|
#######################################################################################
|
|
# u32 ret = u32( *(T *)FuncTableReadAddress( address ) );
|
|
# _ReturnFromDynaRecIfStuffToDo( 0 );
|
|
# return ret;
|
|
#
|
|
# a0 address (pre-swizzled)
|
|
# a1 current_pc
|
|
#
|
|
.macro READ_BITS function, load_instruction
|
|
\function:
|
|
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
srl $v1, $a0, 0x12
|
|
sll $v1, $v1, 0x3 # * 8 to index the two pointer struct
|
|
la $v0, g_MemoryLookupTableReadForDynarec
|
|
lw $v0, 0($v0) # The above is a pointer to our table
|
|
addu $v1, $v1, $v0
|
|
lw $v0, 4($v1) #offset 4 to get the second pointer
|
|
|
|
jalr $v0
|
|
sw $a1, _CurrentPC($fp) # CurrentPC
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
|
|
jr $ra
|
|
\load_instruction $v0, 0($v0) # e.g. lbu, lhu, lw etc
|
|
|
|
.endm
|
|
|
|
.macro READ_BITS_BD function, load_instruction
|
|
\function:
|
|
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
sw $a1, _CurrentPC($fp) # CurrentPC
|
|
|
|
srl $v1, $a0, 0x12
|
|
sll $v1, $v1, 0x3 # * 8 to index the two pointer struct
|
|
la $v0, g_MemoryLookupTableReadForDynarec
|
|
lw $v0, 0($v0) # The above is a pointer to our table
|
|
addu $v1, $v1, $v0
|
|
lw $v0, 4($v1) #offset 4 to get the second pointer
|
|
|
|
li $v1, 1 # EXEC_DELAY
|
|
jalr $v0
|
|
sw $v1, _Delay($fp) # Delay
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
sw $0, _Delay($fp) # Delay <- NO_DELAY
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
|
|
jr $ra
|
|
\load_instruction $v0, 0($v0) # e.g. lbu, lhu, lw etc
|
|
|
|
.endm
|
|
|
|
READ_BITS _ReadBitsDirect_u8, lbu
|
|
READ_BITS _ReadBitsDirect_s8, lb
|
|
READ_BITS _ReadBitsDirect_u16, lhu
|
|
READ_BITS _ReadBitsDirect_s16, lh
|
|
READ_BITS _ReadBitsDirect_u32, lw
|
|
|
|
READ_BITS_BD _ReadBitsDirectBD_u8, lbu
|
|
READ_BITS_BD _ReadBitsDirectBD_s8, lb
|
|
READ_BITS_BD _ReadBitsDirectBD_u16, lhu
|
|
READ_BITS_BD _ReadBitsDirectBD_s16, lh
|
|
READ_BITS_BD _ReadBitsDirectBD_u32, lw
|
|
|
|
#######################################################################################
|
|
# These functions handle writing a value out to memory.
|
|
# They set up the PC (and optionally the branch delay flag for the BD versions)
|
|
# After the memory has been written, _ReturnFromDynaRecIfStuffToDo is called,
|
|
# which returns control back to the interpreter in the case that an exception
|
|
# was triggered.
|
|
#
|
|
# a0 address (pre-swizzled)
|
|
# a1 value
|
|
# a2 current_pc
|
|
#
|
|
_WriteBitsDirect_u32:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
jal Write32BitsForDynaRec
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
nop
|
|
|
|
_WriteBitsDirectBD_u32:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
li $v1, 1 # EXEC_DELAY
|
|
|
|
jal Write32BitsForDynaRec
|
|
sw $v1, _Delay($fp) # Delay
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
sw $0, _Delay($fp) # Delay <- NO_DELAY
|
|
|
|
_WriteBitsDirect_u16:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
jal Write16BitsForDynaRec
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
nop
|
|
|
|
|
|
_WriteBitsDirectBD_u16:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
li $v1, 1 # EXEC_DELAY
|
|
|
|
jal Write16BitsForDynaRec
|
|
sw $v1, _Delay($fp) # Delay
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
sw $0, _Delay($fp) # Delay <- NO_DELAY
|
|
|
|
_WriteBitsDirect_u8:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
|
|
jal Write8BitsForDynaRec
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
nop
|
|
|
|
_WriteBitsDirectBD_u8:
|
|
sw $ra, _Temp1($fp) # Temp storage
|
|
sw $a2, _CurrentPC($fp) # CurrentPC
|
|
li $v1, 1 # EXEC_DELAY
|
|
|
|
jal Write8BitsForDynaRec
|
|
sw $v1, _Delay($fp) # Delay
|
|
|
|
jal _ReturnFromDynaRecIfStuffToDo
|
|
move $a0,$0
|
|
|
|
lw $ra, _Temp1($fp) # Temp storage
|
|
jr $ra
|
|
sw $0, _Delay($fp) # Delay <- NO_DELAY
|
|
|
|
#######################################################################################
|
|
/**
|
|
* convert float to double
|
|
* double FloatToDouble(float a);
|
|
*
|
|
* input: a0
|
|
* output: v0,v1
|
|
* clobber: a0,a1
|
|
*/
|
|
_FloatToDouble:
|
|
ext $a1, $a0, 23, 8 /* a1 = (a0 >> 23) & 0xFF */
|
|
beqz $a1, ftod_denormal /* if (a1==0) goto ftod_denormal */
|
|
addiu $v0, $a1, (-0x7F+0x3FF) /* v0 = a1 - 0x7F + 0x3FF */
|
|
xori $a1, $a1, 0xFF /* a1 = a1 ^ 0xFF */
|
|
li $v1, 0x7FF /* v1 = 0x7FF */
|
|
movz $v0, $v1, $a1 /* v0 = (a1==0) ? v1 : v0 */
|
|
ext $v1, $a0, 3, 20 /* v1 = (a0 >> 3 ) & 0x00FFFFF */
|
|
ins $v1, $v0, 20, 11 /* v1 = (v1 & 0x800FFFFF) | ((v0<<20) & 0x7FF00000) */
|
|
sll $v0, $a0, 29 /* v0 = (a0 << 29) */
|
|
srl $a0, $a0, 31 /* a0 = (a0 >> 31) & 1 */
|
|
jr $ra /* return */
|
|
ins $v1, $a0, 31, 1 /* v1 = (v1 & 0x7FFFFFFF) | ((a0<<31) & 0x80000000) */
|
|
|
|
ftod_denormal:
|
|
sll $v0, $a0, 9 /* v0 = a0 << 9 */
|
|
beqzl $v0, ftod_zero /* if (v0==0) goto ftod_zero */
|
|
move $v1, $zero /* v1 = 0 */
|
|
li $v1, 0x380 /* v1 = 0x380 */
|
|
clz $a1, $v0 /* a1 = clz(v0) */
|
|
subu $v0, $v1, $a1 /* v0 = v1 - v0 = 0x380 - clz(a1) */
|
|
sllv $a1, $a0, $a1 /* a1 = a0 << a1 */
|
|
ext $v1, $a1, 2, 20 /* v1 = (a1 >> 2 ) & 0x00FFFFF */
|
|
ins $v1, $v0, 20, 11 /* v1 = (v1 & 0x800FFFFF) | ((v0<<20) & 0x7FF00000) */
|
|
sll $v0, $a1, 30 /* v0 = (a1 << 30) */
|
|
ftod_zero:
|
|
srl $a0, $a0, 31 /* a0 = (a0 >> 31) & 1 */
|
|
jr $ra /* return */
|
|
ins $v1, $a0, 31, 1 /* v1 = (v1 & 0x7FFFFFFF) | ((a0<<31) & 0x80000000) */
|
|
|
|
#######################################################################################
|
|
/**
|
|
* convert double to float
|
|
* float DoubleToFloat(double a);
|
|
* input: a0,a1
|
|
* output: v0
|
|
* clobber: v0,v1,a2,a3
|
|
*/
|
|
_DoubleToFloat:
|
|
ext $a2, $a1, 20, 11 /* a2 = (a1>>20) & 0x000007FF */
|
|
beqz $a2, dtof_zero /* if (a2==0) goto dtof_zero */
|
|
xori $a3, $a2, 0x7FF /* a3 = a2 ^ 0x7FF */
|
|
beqz $a3, dtof_naninf /* if (a3==0) goto dtof_naninf */
|
|
addiu $a3, $a2, (+0x7F-0x3FF) /* a3 = a2 + 0x7F - 0x3FF */
|
|
blez $a3, dtof_denormal /* if (a3<=0) goto dtof_denormal */
|
|
addiu $v1, $a3, -0xFE /* v1 = a3 - 0xFE */
|
|
bgtz $v1, dtof_inf /* if (v1 > 0) goto dtof_inf */
|
|
move $v0, $zero /* v0 = 0 */
|
|
|
|
srl $v0, $a0, 29 /* v0 = (a0>>29) & 0x00000007 */
|
|
ins $v0, $a1, 3, 20 /* v0 = (v0 & 0xFF800007) | ((a1 & 0FFFFF)<<3) */
|
|
beqz $v1, dtof_inf_normal /* if (v1==0) goto dtof_inf_normal */
|
|
dtof_normal:
|
|
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
|
|
dtof_normal2:
|
|
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | (v1 << 31) */
|
|
jr $ra /* return */
|
|
ins $v0, $a3, 23, 8 /* v0 = (v0 & 0x8007FFFF) | (a3 << 23) */
|
|
|
|
dtof_denormal:
|
|
sll $a2, $a1, 12 /* a2 = a1 << 12 */
|
|
srl $v0, $a2, 10 /* v0 = a2 >> 10 */
|
|
srl $a2, $a0, 30 /* a2 = a0 >> 30 */
|
|
or $v0, $v0, a2 /* v0 = v0 | a2 */
|
|
li $a2, 0x00400000 /* a2 = 0x00400000 */
|
|
or $v0, $v0, $a2 /* v0 = v0 | a2 */
|
|
subu $a2, $zero, $a3 /* a2 = zero - a3 */
|
|
sltiu $a3, $a2, 22 /* a3 = (a2 < 22) */
|
|
beqz $a3, dtof_min /* if (a3==0) goto dtof_min */
|
|
srlv $v0, $v0, $a2 /* v0 = v0 >> a2 */
|
|
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
|
|
jr $ra /* return */
|
|
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | (v1 << 31) */
|
|
|
|
dtof_zero:
|
|
sll $a2, $a1, 12 /* a2 = a1 << 12 */
|
|
or $a2, $a2, $a0 /* a2 = a2 | a0 */
|
|
dtof_min:
|
|
li $v0, 0x00000001 /* v0 = 0x00000001 */
|
|
movz $v0, $zero, $a2 /* v0 = (a2==0) ? zero : v0 */
|
|
srl $a2, $a1, 31 /* a2 = (a1 >> 31) & 1 */
|
|
jr $ra /* return */
|
|
ins $v0, $a2, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | ((a2<<31) & 0x80000000) */
|
|
|
|
dtof_inf_normal:
|
|
nor $a2, $zero, $a1 /* a2 = ~a1 */
|
|
sll $a2, $a2, 12 /* a2 = a2 << 12 */
|
|
bnez $a2, dtof_normal /* if (a2!=0) goto dtof_normal */
|
|
srl $a2, $a0, 28 /* a2 = a0 >> 28 */
|
|
sltiu $a2, $a2, 0xF /* a2 = (a2 < 0xF) */
|
|
bnez $a2, dtof_normal2 /* if (a2!=0) goto dtof_normal */
|
|
srl $v1, $a1, 31 /* v1 = (a1>>31) & 1 */
|
|
j dtof_inf /* goto dtof_inf */
|
|
move $v0, $zero /* v0 = 0 */
|
|
|
|
dtof_naninf:
|
|
sll $a2, $a1, 12 /* a2 = a1 << 12 */
|
|
or $a3, $a2, $a0 /* a3 = a2 | a0 */
|
|
srl $v0, $a2, 9 /* v0 = a2 >> 9 */
|
|
srl $a2, $a0, 29 /* a2 = a2 >> 29 */
|
|
or $v0, $v0, $a2 /* v0 = v0 | a2 */
|
|
sltiu $a2, $v0, 1 /* a2 = (v0 < 1) */
|
|
or $v0, $v0, $a2 /* v0 = v0 | a2 */
|
|
movz $v0, $zero, $a3 /* v0 = (a3==0) ? zero : v0 */
|
|
dtof_inf:
|
|
li $v1, 0x7F800000 /* v1 = 0x7F800000 */
|
|
or $v0, $v0, $v1 /* v0 = v0 | v1 */
|
|
srl $v1, $a1, 31 /* v1 = (a1 >> 31) & 1 */
|
|
jr $ra /* return */
|
|
ins $v0, $v1, 31, 1 /* v0 = (v0 & 0x7FFFFFFF) | ((v1<<31) & 0x80000000) */
|
|
|
|
#######################################################################################
|
|
/**
|
|
* context save and print value
|
|
* void printf_asm(u32 val);
|
|
* input: a0
|
|
* output: -
|
|
* clobber: -
|
|
* uses: -
|
|
*/
|
|
_printf_asm:
|
|
addiu $sp, $sp, -72 # Push on the stack
|
|
sw $ra, 0($sp)
|
|
sw $at, 4($sp)
|
|
sw $v0, 8($sp)
|
|
sw $v1, 12($sp)
|
|
sw $a0, 16($sp)
|
|
sw $a1, 20($sp)
|
|
sw $a2, 24($sp)
|
|
sw $a3, 28($sp)
|
|
sw $t0, 32($sp)
|
|
sw $t1, 36($sp)
|
|
sw $t2, 40($sp)
|
|
sw $t3, 44($sp)
|
|
sw $t4, 48($sp)
|
|
sw $t5, 52($sp)
|
|
sw $t6, 56($sp)
|
|
sw $t7, 60($sp)
|
|
sw $t8, 64($sp)
|
|
jal output_extern
|
|
sw $t9, 68($sp)
|
|
lw $ra, 0($sp)
|
|
lw $at, 4($sp)
|
|
lw $v0, 8($sp)
|
|
lw $v1, 12($sp)
|
|
lw $a0, 16($sp)
|
|
lw $a1, 20($sp)
|
|
lw $a2, 24($sp)
|
|
lw $a3, 28($sp)
|
|
lw $t0, 32($sp)
|
|
lw $t1, 36($sp)
|
|
lw $t2, 40($sp)
|
|
lw $t3, 44($sp)
|
|
lw $t4, 48($sp)
|
|
lw $t5, 52($sp)
|
|
lw $t6, 56($sp)
|
|
lw $t7, 60($sp)
|
|
lw $t8, 64($sp)
|
|
lw $t9, 68($sp)
|
|
jr $ra /* return */
|
|
addiu $sp, $sp, 72 # restore the stack
|
|
|
|
#######################################################################################
|
|
/**
|
|
* Unsigned 64bit multiply (A * B) -> 128bit result
|
|
* void _DMULTU(u32 A_LSB, u32 A_MSB, u32 B_LSB, u32 B_MSB);
|
|
* input: a0, a1, a2, a3
|
|
* output: -
|
|
* clobber: a0, a1, a2, a3, v0, v1
|
|
* uses: -
|
|
*/
|
|
_DMULTU:
|
|
multu $a2,$a0
|
|
mfhi $v0
|
|
mflo $v1
|
|
sw $v1,_MultLo+0($fp)
|
|
multu $a2,$a1
|
|
mflo $a2
|
|
addu $v1,$v0,$a2
|
|
sltu $a2,$v1,$v0
|
|
mfhi $v0
|
|
multu $a3,$a0
|
|
mflo $a0
|
|
addu $a0,$v1,$a0
|
|
sltu $v1,$a0,$v1
|
|
sw $a0,_MultLo+4($fp)
|
|
mfhi $a0
|
|
addu $a2,$a2,$a0
|
|
addu $v1,$v1,$a2
|
|
multu $a3,$a1
|
|
mflo $a1
|
|
addu $a1,$v0,$a1
|
|
addu $v1,$a1,$v1
|
|
sw $v1,_MultHi+0($fp)
|
|
sltu $v0,$a1,$v0
|
|
mfhi $a0
|
|
addu $v0,$v0,$a0
|
|
sltu $a1,$v1,$a1
|
|
addu $a1,$a1,$v0
|
|
jr $ra
|
|
sw $a1,_MultHi+4($fp)
|
|
|
|
#######################################################################################
|
|
/**
|
|
* Signed 64bit multiply (A * B) -> 64bit result (should be 128bit!)
|
|
* void _DMULT(u32 A_LSB, u32 A_MSB, u32 B_LSB, u32 B_MSB);
|
|
* input: a0, a1, a2, a3
|
|
* output: -
|
|
* clobber: a0, a1, a2, a3, v0
|
|
* uses: -
|
|
*/
|
|
_DMULT:
|
|
multu $a2,$a0
|
|
mflo $v0
|
|
sw $v0,_MultLo+0($fp)
|
|
mfhi $v0
|
|
mult $a3,$a0
|
|
mflo $a0
|
|
mult $a1,$a2
|
|
mflo $a2
|
|
addu $a0,$a0,$a2
|
|
addu $v0,$a0,$v0
|
|
sw $v0,_MultLo+4($fp)
|
|
sw $zero,_MultHi+0($fp)
|
|
jr $ra
|
|
sw $zero,_MultHi+4($fp)
|
|
|
|
#######################################################################################
|
|
/**
|
|
* Unsigned 64bit division (Num /Div) -> 64bit quotient and 32bit reminder
|
|
* void _DDIVU(u32 Num_LSB, u32 Num_MSB, u32 Div_LSB);
|
|
* input: a0, a1, a2
|
|
* output: -
|
|
* clobber: a0, a1, a2, a3, v0, v1
|
|
* uses: t0
|
|
*/
|
|
_DDIVU:
|
|
beqz $a1, DDIVU_skip1 //Check if we need a full 64bit division
|
|
sw $zero,_MultHi+4($fp) //Reminder hi
|
|
|
|
divu $a1,$a2
|
|
mflo $a3
|
|
mfhi $a1
|
|
sw $a3,_MultLo+4($fp) //Quot hi
|
|
sw $t0,_Temp1($fp) //save reg content
|
|
move $v0,$zero
|
|
move $v1,$zero
|
|
b DDIVU_skip2
|
|
li $a3,33
|
|
DDIVU_loop1:
|
|
srl $v1,$a1,0x1f
|
|
sll $a1,$a1,0x1
|
|
or $a1,$a1,$t0
|
|
sll $a0,$a0,0x1
|
|
sll $v0,$v0,0x1
|
|
bnez $v1, DDIVU_skip3
|
|
DDIVU_skip2:
|
|
sltu $t0,$a1,$a2
|
|
|
|
bnez $t0, DDIVU_skip4
|
|
DDIVU_skip3:
|
|
addiu $a3,$a3,-1
|
|
subu $a1,$a1,$a2
|
|
addiu $v0,$v0,1
|
|
|
|
DDIVU_skip4:
|
|
bnez $a3, DDIVU_loop1
|
|
srl $t0,$a0,0x1f
|
|
|
|
lw $t0,_Temp1($fp) //restore reg content
|
|
sw $v0,_MultLo+0($fp) //Quot lo
|
|
jr $ra
|
|
sw $a1,_MultHi+0($fp) //Reminder lo
|
|
|
|
DDIVU_skip1: //Do a 32bit div only
|
|
divu $a0,$a2
|
|
mflo $v0
|
|
mfhi $a1
|
|
sw $v0,_MultLo+0($fp) //Quot lo
|
|
sw $zero,_MultLo+4($fp) //Quot hi
|
|
jr $ra
|
|
sw $a1,_MultHi+0($fp) //Reminder lo
|
|
|
|
#######################################################################################
|
|
/**
|
|
* Signed 64bit division (Num / Div) -> 64bit quotient and 32bit reminder
|
|
* void _DDIV(s64 Num, s32 Div);
|
|
* input: a0, a1, a2
|
|
* output: -
|
|
* clobber: a0, a1, a2, a3, v0, v1
|
|
* uses: t0, t1, t2
|
|
*/
|
|
_DDIV:
|
|
sw $t2,_Temp3($fp) //save reg content
|
|
|
|
bgez $a1,DDIV_skip1 //Make numerator positive if needed
|
|
slt $t2, $a1, $zero //sign = value < 0 ? 1 : 0
|
|
negu $a0,$a0
|
|
negu $a1,$a1
|
|
sltu $v0,$zero,$a0
|
|
subu $a1,$a1,$v0
|
|
DDIV_skip1:
|
|
bgez $a2,DDIV_skip2 //Make dividend positive if needed
|
|
sw $t0,_Temp1($fp) //save reg content
|
|
xori $t2,$t2,0x1 //sign ^= 1
|
|
negu $a2,$a2
|
|
DDIV_skip2:
|
|
beqz $a1,DDIV_skip8 //Check if top 32bit == 0
|
|
sw $t1,_Temp2($fp) //save reg content
|
|
|
|
divu $a1,$a2 //Do long 64bit division
|
|
move $v0,$zero
|
|
move $t0,$zero
|
|
li $v1,33
|
|
mflo $t1
|
|
b DDIV_skip4
|
|
mfhi $a3
|
|
|
|
DDIV_loop1:
|
|
srl $t0,$a3,0x1f
|
|
sll $a3,$a3,0x1
|
|
or $a3,$a3,$a1
|
|
sll $a0,$a0,0x1
|
|
sll $v0,$v0,0x1
|
|
bnez $t0,DDIV_skip5
|
|
DDIV_skip4:
|
|
sltu $a1,$a3,$a2
|
|
bnez $a1,DDIV_skip6
|
|
DDIV_skip5:
|
|
addiu $v1,$v1,-1
|
|
subu $a3,$a3,$a2
|
|
addiu $v0,$v0,1
|
|
DDIV_skip6:
|
|
bnez $v1,DDIV_loop1
|
|
srl $a1,$a0,0x1f
|
|
|
|
DDIV_loop2:
|
|
beqz $t2,DDIV_skip7 //Need sign flip on result?
|
|
lw $t0,_Temp1($fp) //restore reg content
|
|
|
|
negu $v0,$v0
|
|
negu $t1,$t1
|
|
sltu $v1,$zero,$v0
|
|
subu $t1,$t1,$v1
|
|
negu $a3,$a3
|
|
DDIV_skip7: //No sign flip needed
|
|
sw $v0,_MultLo+0($fp) //Quot lo
|
|
sw $t1,_MultLo+4($fp) //Quot hi
|
|
sw $a3,_MultHi+0($fp) //Rem lo
|
|
lw $t1,_Temp2($fp) //restore reg content
|
|
jr $ra
|
|
lw $t2,_Temp3($fp) //restore reg content
|
|
|
|
DDIV_skip8: //Do short 32bit division
|
|
divu $a0,$a2
|
|
move $t1,$zero //hi part of reminder will be zero
|
|
mflo $v0
|
|
b DDIV_loop2
|
|
mfhi $a3
|
|
|
|
#######################################################################################
|
|
|
|
.set pop
|