mirror of
https://github.com/emu-russia/pureikyubu.git
synced 2025-04-02 10:42:15 -04:00
612 lines
19 KiB
ArmAsm
612 lines
19 KiB
ArmAsm
# Project Dolwin (IPL Replacement): Bootstrap Stage (BS).
|
|
|
|
# Brief runflow of boot stage execution:
|
|
#
|
|
# - Init Flipper (ARAM, MI, reset DVD).
|
|
# - Init Gekko (enable cache, set Dolphin OS memory model, enable translation).
|
|
# - Probe AD16.
|
|
# - Test memory. Halt CPU, if test failed.
|
|
# - Load IPL into main memory and disable bootrom scrambler.
|
|
# - Jump to IPL's __start.
|
|
|
|
.text
|
|
.org 0 # Will be actually 0xFFF0_0100 later.
|
|
|
|
boot:
|
|
|
|
# This will initialize Gekko implementation specifics.
|
|
lis r4, 0x0011
|
|
addi r4, r4, 0x0C64
|
|
mtspr HID0, r4
|
|
|
|
# This will initialize CPU program model.
|
|
lis r4, 0x0000
|
|
addi r4, r4, 0x2000
|
|
mtmsr r4
|
|
|
|
# Initialize auxiliary memory (ARAM).
|
|
lis r4, 0x0C00
|
|
addi r4, r4, 0x5012
|
|
li r5, 67
|
|
sth r5, 0 (r4) # Set 0x5012 register value to 0x43.
|
|
li r5, 156
|
|
sth r5, 8 (r4) # Set ARAM refresh value.
|
|
|
|
# Initialize Flipper memory interface.
|
|
lis r3, 0x0C00
|
|
ori r3, r3, 0x4000
|
|
li r4, 64
|
|
sth r4, 38 (r3) # Set 0x4026 register value to 0x40.
|
|
nop
|
|
nop
|
|
|
|
# Enable data and instruction cache.
|
|
mfspr r3, HID0
|
|
ori r4, r3, 0xC000
|
|
mtspr HID0, r4
|
|
nop
|
|
nop
|
|
nop
|
|
isync
|
|
|
|
# Initialize CPU memory model. Clear BATs and segment registers.
|
|
li r4, 0
|
|
mtspr DBAT0U, r4
|
|
mtspr DBAT1U, r4
|
|
mtspr DBAT2U, r4
|
|
mtspr DBAT3U, r4
|
|
mtspr IBAT0U, r4
|
|
mtspr IBAT1U, r4
|
|
mtspr IBAT2U, r4
|
|
mtspr IBAT3U, r4
|
|
isync
|
|
lis r4, 0x8000
|
|
addi r4, r4, 0
|
|
mtsr 0, r4
|
|
mtsr 1, r4
|
|
mtsr 2, r4
|
|
mtsr 3, r4
|
|
mtsr 4, r4
|
|
mtsr 5, r4
|
|
mtsr 6, r4
|
|
mtsr 7, r4
|
|
mtsr 8, r4
|
|
mtsr 9, r4
|
|
mtsr 10, r4
|
|
mtsr 11, r4
|
|
mtsr 12, r4
|
|
mtsr 13, r4
|
|
mtsr 14, r4
|
|
mtsr 15, r4
|
|
|
|
# Configure memory model:
|
|
#
|
|
# DBAT0: 80001FFF 00000002 Write-back cached main memory, 256MB block.
|
|
# DBAT1: C0001FFF 0000002A Cache inhibited, Guarded, 256MB block.
|
|
# DBAT2: 00000000 xxxxxxxx Dont care, reserved.
|
|
# DBAT3: FFF0001F FFF00001 Bootrom, 1MB block (temporary for BS)
|
|
#
|
|
# IBAT0: 80001FFF 00000002 Write-back cached main memory, 256MB block.
|
|
# IBAT1: 00000000 xxxxxxxx Dont care, reserved.
|
|
# IBAT2: 00000000 xxxxxxxx Dont care, reserved.
|
|
# IBAT3: FFF0001F FFF00001 Cache inhibited, Guarded, Bootrom, 1MB block (temporary for BS)
|
|
lis r4, 0x0000
|
|
addi r4, r4, 2
|
|
lis r3, 0x8000
|
|
addi r3, r3, 0x1FFF
|
|
mtspr DBAT0L, r4
|
|
mtspr DBAT0U, r3
|
|
isync
|
|
mtspr IBAT0L, r4
|
|
mtspr IBAT0U, r3
|
|
isync
|
|
lis r4, 0x0000
|
|
addi r4, r4, 42
|
|
lis r3, 0xC000
|
|
addi r3, r3, 0x1FFF
|
|
mtspr DBAT1L, r4
|
|
mtspr DBAT1U, r3
|
|
isync
|
|
lis r4, 0xFFF0
|
|
addi r4, r4, 1
|
|
lis r3, 0xFFF0
|
|
addi r3, r3, 31
|
|
mtspr DBAT3L, r4
|
|
mtspr DBAT3U, r3
|
|
isync
|
|
mtspr IBAT3L, r4
|
|
mtspr IBAT3U, r3
|
|
isync
|
|
|
|
# Enable instruction and data translation.
|
|
mfmsr r4
|
|
ori r4, r4, 0x0030 # Enable address translation.
|
|
mtmsr r4
|
|
isync
|
|
|
|
# Write 0x0245248A to 0x3030 register. Meaning is unknown. Register is unknown.
|
|
lis r3, 0xCC00
|
|
ori r3, r3, 0x3000
|
|
lis r4, 0x0245
|
|
ori r4, r4, 0x248A
|
|
stw r4, 48 (r3) # Write 0x0245248A to 0x3030 register.
|
|
|
|
# Reset DVD, through PI reset register.
|
|
lwz r4, 36 (r3) # Read PI reset register.
|
|
ori r4, r4, 0x0001
|
|
rlwinm r4, r4, 0, 31, 28 # Set bit 31, clear bit 29.
|
|
stw r4, 36 (r3) # Write new value in reset register.
|
|
mftb r5, TBL
|
|
WaitDVDReset:
|
|
mftb r6, TBL
|
|
sub r7, r6, r5
|
|
cmplwi r7, 4388
|
|
blt+ WaitDVDReset # Wait ~9 us (with 486MHz clock)
|
|
ori r4, r4, 0x0003 # Set bit 31, set bit 29.
|
|
stw r4, 36 (r3) # Write new value in reset register.
|
|
|
|
# Allow 32MHz EXI clock setting by CPU.
|
|
lis r14, 0xCC00
|
|
ori r14, r14, 0x6400
|
|
li r4, 0
|
|
stw r4, 60 (r14) # SI EXICLK[LOCK] = 1
|
|
|
|
# To probe AD16 we must read its EXI ID. It should be 0x04120000. Place it to R20.
|
|
lis r2, 0xCC00
|
|
ori r2, r2, 0x6800
|
|
lis r22, 0x0000
|
|
ori r22, r22, 0x00BA
|
|
li r8, 1
|
|
li r10, 0
|
|
lis r21, 0x0412
|
|
ori r21, r21, 0x0000
|
|
lis r3, 0x0000
|
|
ori r3, r3, 0x0000
|
|
lis r7, 0x0000
|
|
ori r7, r7, 0x0015
|
|
stw r3, 56 (r2) # EXI2 DATA = 0 (Get ID command)
|
|
stw r22, 40 (r2) # Select AD16, through EXI2 CSR.
|
|
lwz r16, 40 (r2)
|
|
stw r7, 52 (r2) # Write immediate 2 bytes from DATA.
|
|
WaitAd16_0:
|
|
lwz r16, 52 (r2) # |
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
bgt+ WaitAd16_0 # |
|
|
lis r7, 0x0000
|
|
ori r7, r7, 0x0031
|
|
stw r7, 52 (r2) # Read immediate 4 bytes to DATA (ID).
|
|
WaitAd16_1:
|
|
lwz r16, 52 (r2) # |
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
bgt+ WaitAd16_1 # |
|
|
stw r10, 40 (r2) # Deselect device.
|
|
lwz r16, 40 (r2) # Read EXI2 CSR twice. Why? No idea.
|
|
lwz r16, 40 (r2) # Maybe its deselect attribute..
|
|
lwz r20, 56 (r2) # r20 = DATA. It should contain ID.
|
|
b Jump_0
|
|
.word 0
|
|
.word 0
|
|
|
|
# Write "trace step" value to AD16. Only when probe was success (R20 = AD16 ID).
|
|
# Input value (trace step) must be in R15.
|
|
Jump_0:
|
|
b Jump_1
|
|
DoAD16Write:
|
|
lis r3, 0xA000
|
|
ori r3, r3, 0x0000
|
|
lis r7, 0x0000
|
|
ori r7, r7, 0x0005
|
|
stw r3, 56 (r2) # EXI2 DATA = 0xA0000000 (Write AD16 command)
|
|
stw r22, 40 (r2) # Select AD16, through EXI2 CSR.
|
|
lwz r16, 40 (r2)
|
|
b AD16Write_0
|
|
Jump_1:
|
|
b Jump_2
|
|
AD16Write_0:
|
|
stw r7, 52 (r2) # Write immediate 1 byte from DATA.
|
|
WaitAd16Write_0:
|
|
lwz r16, 52 (r2) # |
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
bgt+ WaitAd16Write_0 # |
|
|
nop
|
|
nop
|
|
b AD16Write_1
|
|
Jump_2:
|
|
b Jump_3
|
|
AD16Write_1:
|
|
lis r7, 0x0000
|
|
ori r7, r7, 0x0035
|
|
stw r15, 56 (r2) # EXI2 DATA = trace step
|
|
stw r7, 52 (r2) # Write immediate 4 bytes from DATA.
|
|
WaitAd16Write_1:
|
|
lwz r16, 52 (r2) # |
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
b AD16Write_2 # |
|
|
Jump_3:
|
|
b Jump_4
|
|
AD16Write_2:
|
|
bgt+ WaitAd16Write_1
|
|
stw r10, 40 (r2) # Deselect device.
|
|
lwz r16, 40 (r2)
|
|
lwz r16, 40 (r2)
|
|
blr
|
|
.word 0
|
|
Jump_4:
|
|
b Trace_01
|
|
AD16Write:
|
|
cmplw r20, r21 # If AD16 probe failed, then skip.
|
|
beq+ DoAD16Write
|
|
blr
|
|
|
|
# Trace step 0x01 - Nothing ?
|
|
Trace_01:
|
|
lis r15, 0x0100 # AD16 = 0x01000000
|
|
bl AD16Write
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
b Trace_02
|
|
.word 0
|
|
.word 0
|
|
|
|
# Trace step 0x02 - Nothing ?
|
|
Trace_02:
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
lis r15, 0x0200 # AD16 = 0x02000000
|
|
bl AD16Write
|
|
b Jump_5
|
|
|
|
# Memory self test with given pattern.
|
|
Jump_5:
|
|
b Jump_6
|
|
TestMem:
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
mr r23, r25
|
|
lis r24, 0x0180 # Main memory size (24MB)
|
|
b TestMem_0
|
|
Jump_6:
|
|
b Jump_7
|
|
TestMem_0:
|
|
rlwinm r24, r24, 27, 5, 31 # Fill memory (by 32 byte portions).
|
|
mtctr r24
|
|
FillMem:
|
|
stw r26, 0 (r23)
|
|
stw r26, 4 (r23)
|
|
stw r26, 8 (r23)
|
|
stw r26, 12 (r23)
|
|
b TestMem_1
|
|
Jump_7:
|
|
b Jump_8
|
|
TestMem_1:
|
|
stw r26, 16 (r23)
|
|
stw r26, 20 (r23)
|
|
stw r26, 24 (r23)
|
|
stw r26, 28 (r23)
|
|
addi r23, r23, 32
|
|
bdnz+ FillMem
|
|
b TestMem_2
|
|
Jump_8:
|
|
b Jump_9
|
|
TestMem_2:
|
|
mr r23, r25
|
|
lis r24, 0x0180
|
|
rlwinm r24, r24, 30, 2, 31
|
|
mtctr r24
|
|
TestLoop:
|
|
lwz r15, 0 (r23) # Begin to test.
|
|
cmplw r15, r26
|
|
b TestMem_3
|
|
Jump_9:
|
|
b Jump_10
|
|
TestMem_3:
|
|
beq- NextIteration
|
|
rlwinm r15, r23, 14, 18, 31
|
|
andi. r15, r15, 0x001F
|
|
li r16, 1
|
|
slw r16, r16, r15
|
|
or r17, r17, r16
|
|
b TestMem_4
|
|
Jump_10:
|
|
b Jump_11
|
|
TestMem_4:
|
|
rlwinm r15, r23, 14, 18, 31
|
|
subi r15, r15, 32
|
|
andi. r15, r15, 0x001F
|
|
li r16, 1
|
|
slw r16, r16, r15
|
|
or r18, r18, r16
|
|
b TestMem_5
|
|
Jump_11:
|
|
b Jump_12
|
|
TestMem_5:
|
|
rlwinm r15, r23, 14, 18, 31
|
|
subi r15, r15, 64
|
|
andi. r15, r15, 0x001F
|
|
li r16, 1
|
|
slw r16, r16, r15
|
|
or r19, r19, r16
|
|
b TestMem_6
|
|
Jump_12:
|
|
b Jump_13
|
|
TestMem_6:
|
|
rlwinm r15, r23, 0, 28, 31
|
|
cmplwi r15, 8
|
|
bge- ReadErrorBank0
|
|
ReadErrorBank0:
|
|
addi r28, r28, 1
|
|
b NextIteration
|
|
ReadErrorBank1:
|
|
addi r27, r27, 1
|
|
b TestMem_7
|
|
Jump_13:
|
|
b Trace_03
|
|
TestMem_7:
|
|
cmplw r29, r23
|
|
bge- NextIteration
|
|
mr r29, r23
|
|
NextIteration:
|
|
addi r23, r23, 4
|
|
bdnz+ TestLoop
|
|
blr
|
|
|
|
# Clear registers for memory test (see next). Trace step 0x03 - Nothing ?
|
|
Trace_03:
|
|
li r17, 0
|
|
li r18, 0
|
|
li r19, 0
|
|
li r27, 0
|
|
li r28, 0
|
|
li r29, 0
|
|
lis r15, 0x0300 # AD16 = 0x03000000
|
|
bl AD16Write
|
|
b Jump_14
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
|
|
# Test memory by some patterns.
|
|
Jump_14:
|
|
b Jump_15
|
|
MemorySelfTest:
|
|
lis r25, 0x8000
|
|
lis r26, 0xAAAA
|
|
ori r26, r26, 0xAAAA
|
|
bl TestMem # Test memory with 0xAA pattern.
|
|
not r26, r26
|
|
bl TestMem # Test memory with 0x55 pattern.
|
|
nop
|
|
b MemorySelfTest_0
|
|
Jump_15:
|
|
b Jump_16
|
|
MemorySelfTest_0:
|
|
lis r15, 0x0400
|
|
mr. r16, r27
|
|
beq- ReadError_8_F
|
|
ReadError_0_7:
|
|
oris r15, r15, 0x0200
|
|
ReadError_8_F:
|
|
mr. r16, r28
|
|
beq- MemorySelfTestWriteAd
|
|
b MemorySelfTest_1
|
|
Jump_16:
|
|
b Jump_17
|
|
MemorySelfTest_1:
|
|
oris r15, r15, 0x0100
|
|
MemorySelfTestWriteAd:
|
|
rlwinm r29, r29, 30, 2, 31
|
|
or r15, r15, r29
|
|
bl AD16Write # Set AD16 value.
|
|
nop
|
|
nop
|
|
b Halt
|
|
|
|
# Halt execution if memory test failed.
|
|
Jump_17:
|
|
cmplw r20, r21
|
|
beq+ MemorySelfTest
|
|
Halt:
|
|
mr. r16, r27 # Bad address with last digit 8-F ?
|
|
bne+ Halt
|
|
mr. r16, r28 # Bad address with last digit 0-7 ?
|
|
bne+ Halt
|
|
|
|
# Prepare GPR registers for IPL loading.
|
|
lis r2, 0xCC00 # EXI registers base
|
|
ori r2, r2, 0x6800
|
|
lis r6, 0x0000 # EXI0 CSR setup: device 1, 32MHz
|
|
ori r6, r6, 0x0150
|
|
lis r7, 0x0000
|
|
ori r7, r7, 0x0035
|
|
li r8, 1
|
|
lis r9, 0x0000
|
|
ori r9, r9, 0x0003
|
|
li r10, 0
|
|
lis r11, 0x0000 # Max length of single transfer
|
|
ori r11, r11, 0x0400
|
|
lis r12, 0x0001
|
|
ori r12, r12, 0x0000
|
|
lis r3, 0x0002 # Bootrom starting offset (0x800)
|
|
ori r3, r3, 0x0000
|
|
lis r4, 0x012F # Main memory starting address
|
|
ori r4, r4, 0xFFE0
|
|
lis r13, 0x0017 # Transfer length
|
|
ori r13, r13, 0x0000
|
|
b Jump_18
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_18:
|
|
b Jump_19
|
|
|
|
# Transfer IPL to main memory. Bootrom scrambler is decrypting data on the fly.
|
|
# Starting memory address: 0x012FFFE0.
|
|
# Starting bootrom offset: 0x800.
|
|
# Common length of transfer: 0x170000 bytes.
|
|
TransferIPL:
|
|
cmpwi r13, 0 # All bytes transferred ?
|
|
beq- SetupEntrypoint
|
|
mr r5, r11
|
|
cmplw r13, r5
|
|
bgt- SelectEXI
|
|
mr r5, r13
|
|
SelectEXI:
|
|
stw r6, 0 (r2) # Select bootrom, through EXI0 CSR.
|
|
b TransferIPL_0
|
|
Jump_19:
|
|
b Jump_20
|
|
TransferIPL_0:
|
|
stw r3, 16 (r2) # EXI0 DATA - offset in bootrom + write command
|
|
lwz r16, 0 (r2)
|
|
stw r7, 12 (r2) # Write immediate 4 bytes from DATA.
|
|
WaitTransferIPL_0:
|
|
lwz r16, 12 (r2) # |
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
bgt+ WaitTransferIPL_0 # |
|
|
b TransferIPL_1
|
|
Jump_20:
|
|
b Jump_21
|
|
TransferIPL_1:
|
|
stw r4, 4 (r2) # EXI0 MAR - DMA memory address.
|
|
lwz r4, 4 (r2)
|
|
stw r5, 8 (r2) # EXI0 LEN - DMA transfer length.
|
|
lwz r5, 8 (r2)
|
|
stw r9, 12 (r2) # Start EXI0 DMA write transfer.
|
|
WaitTransferIPL_1:
|
|
lwz r16, 12 (r2)
|
|
b TransferIPL_2
|
|
Jump_21:
|
|
b Jump_22
|
|
TransferIPL_2:
|
|
and. r16, r16, r8 # | Wait until transfer complete.
|
|
bgt+ WaitTransferIPL_1 # |
|
|
stw r10, 0 (r2) # Deselect device.
|
|
lwz r16, 0 (r2)
|
|
lwz r16, 0 (r2)
|
|
add r3, r3, r12
|
|
b TransferIPL_3
|
|
Jump_22:
|
|
b Jump_23
|
|
TransferIPL_3:
|
|
add r4, r4, r11 # Advance pointers.
|
|
sub r13, r13, r5
|
|
b TransferIPL
|
|
|
|
# Set link register to IPL entrypoint.
|
|
SetupEntrypoint:
|
|
lis r4, 0x8130
|
|
ori r4, r4, 0x0000
|
|
mtlr r4 # LR = 0x81300000
|
|
b DisableScrambler
|
|
|
|
# Disable bootrom decryption logic and disallow 32MHz EXI clock setting by CPU.
|
|
Jump_23:
|
|
b Jump_24
|
|
DisableScrambler:
|
|
lis r6, 0x0000
|
|
ori r6, r6, 0x2000
|
|
stw r6, 0 (r2) # Set ROMDIS bit in EXI0 CSR.
|
|
li r4, 1
|
|
stw r4, 60 (r14) # SI EXICLK[LOCK] = 1
|
|
lwz r4, 60 (r14)
|
|
b StartExecuteIPL
|
|
Jump_24:
|
|
b Jump_25
|
|
|
|
# Clear OS pointer to DVD BI2 location. Jump to IPL entrypoint.
|
|
StartExecuteIPL:
|
|
lis r4, 0x8000
|
|
li r3, 0
|
|
stw r3, 0x00F4 (r4)
|
|
blr # !! IPL START TO EXECUTE !!
|
|
.word 0
|
|
.word 0
|
|
|
|
# This how may look BS from the scratch.. Actual code fills zeroed words.
|
|
Padding:
|
|
Jump_25:
|
|
b Jump_26
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_26:
|
|
b Jump_27
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_27:
|
|
b Jump_28
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_28:
|
|
b Jump_29
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_29:
|
|
b Jump_30
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_30:
|
|
b Jump_31
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
Jump_31:
|
|
b TransferIPL
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
.word 0
|
|
|
|
# Note by tmbinc:
|
|
# Why BS is jumping around? The jumping is because the way the instructions are
|
|
# fetched. They must be fetched in exact linear order, otherwise the scrambling
|
|
# goes out of sync. So in order to do any loops, BS enable the icache and
|
|
# to fill the icache, its jump to the first location in each icache line.
|
|
# That's why BS jump in 0x20 byte steps.
|