pureikyubu/Docs/RE/boot.s
2020-05-01 15:51:27 +03:00

612 lines
19 KiB
ArmAsm

# Project Dolwin (IPL Replacement): Bootstrap Stage (BS).
# Brief runflow of boot stage execution:
#
# - Init Flipper (ARAM, MI, reset DVD).
# - Init Gekko (enable cache, set Dolphin OS memory model, enable translation).
# - Probe AD16.
# - Test memory. Halt CPU, if test failed.
# - Load IPL into main memory and disable bootrom scrambler.
# - Jump to IPL's __start.
.text
.org 0 # Will be actually 0xFFF0_0100 later.
boot:
# This will initialize Gekko implementation specifics.
lis r4, 0x0011
addi r4, r4, 0x0C64
mtspr HID0, r4
# This will initialize CPU program model.
lis r4, 0x0000
addi r4, r4, 0x2000
mtmsr r4
# Initialize auxiliary memory (ARAM).
lis r4, 0x0C00
addi r4, r4, 0x5012
li r5, 67
sth r5, 0 (r4) # Set 0x5012 register value to 0x43.
li r5, 156
sth r5, 8 (r4) # Set ARAM refresh value.
# Initialize Flipper memory interface.
lis r3, 0x0C00
ori r3, r3, 0x4000
li r4, 64
sth r4, 38 (r3) # Set 0x4026 register value to 0x40.
nop
nop
# Enable data and instruction cache.
mfspr r3, HID0
ori r4, r3, 0xC000
mtspr HID0, r4
nop
nop
nop
isync
# Initialize CPU memory model. Clear BATs and segment registers.
li r4, 0
mtspr DBAT0U, r4
mtspr DBAT1U, r4
mtspr DBAT2U, r4
mtspr DBAT3U, r4
mtspr IBAT0U, r4
mtspr IBAT1U, r4
mtspr IBAT2U, r4
mtspr IBAT3U, r4
isync
lis r4, 0x8000
addi r4, r4, 0
mtsr 0, r4
mtsr 1, r4
mtsr 2, r4
mtsr 3, r4
mtsr 4, r4
mtsr 5, r4
mtsr 6, r4
mtsr 7, r4
mtsr 8, r4
mtsr 9, r4
mtsr 10, r4
mtsr 11, r4
mtsr 12, r4
mtsr 13, r4
mtsr 14, r4
mtsr 15, r4
# Configure memory model:
#
# DBAT0: 80001FFF 00000002 Write-back cached main memory, 256MB block.
# DBAT1: C0001FFF 0000002A Cache inhibited, Guarded, 256MB block.
# DBAT2: 00000000 xxxxxxxx Dont care, reserved.
# DBAT3: FFF0001F FFF00001 Bootrom, 1MB block (temporary for BS)
#
# IBAT0: 80001FFF 00000002 Write-back cached main memory, 256MB block.
# IBAT1: 00000000 xxxxxxxx Dont care, reserved.
# IBAT2: 00000000 xxxxxxxx Dont care, reserved.
# IBAT3: FFF0001F FFF00001 Cache inhibited, Guarded, Bootrom, 1MB block (temporary for BS)
lis r4, 0x0000
addi r4, r4, 2
lis r3, 0x8000
addi r3, r3, 0x1FFF
mtspr DBAT0L, r4
mtspr DBAT0U, r3
isync
mtspr IBAT0L, r4
mtspr IBAT0U, r3
isync
lis r4, 0x0000
addi r4, r4, 42
lis r3, 0xC000
addi r3, r3, 0x1FFF
mtspr DBAT1L, r4
mtspr DBAT1U, r3
isync
lis r4, 0xFFF0
addi r4, r4, 1
lis r3, 0xFFF0
addi r3, r3, 31
mtspr DBAT3L, r4
mtspr DBAT3U, r3
isync
mtspr IBAT3L, r4
mtspr IBAT3U, r3
isync
# Enable instruction and data translation.
mfmsr r4
ori r4, r4, 0x0030 # Enable address translation.
mtmsr r4
isync
# Write 0x0245248A to 0x3030 register. Meaning is unknown. Register is unknown.
lis r3, 0xCC00
ori r3, r3, 0x3000
lis r4, 0x0245
ori r4, r4, 0x248A
stw r4, 48 (r3) # Write 0x0245248A to 0x3030 register.
# Reset DVD, through PI reset register.
lwz r4, 36 (r3) # Read PI reset register.
ori r4, r4, 0x0001
rlwinm r4, r4, 0, 31, 28 # Set bit 31, clear bit 29.
stw r4, 36 (r3) # Write new value in reset register.
mftb r5, TBL
WaitDVDReset:
mftb r6, TBL
sub r7, r6, r5
cmplwi r7, 4388
blt+ WaitDVDReset # Wait ~9 us (with 486MHz clock)
ori r4, r4, 0x0003 # Set bit 31, set bit 29.
stw r4, 36 (r3) # Write new value in reset register.
# Allow 32MHz EXI clock setting by CPU.
lis r14, 0xCC00
ori r14, r14, 0x6400
li r4, 0
stw r4, 60 (r14) # SI EXICLK[LOCK] = 1
# To probe AD16 we must read its EXI ID. It should be 0x04120000. Place it to R20.
lis r2, 0xCC00
ori r2, r2, 0x6800
lis r22, 0x0000
ori r22, r22, 0x00BA
li r8, 1
li r10, 0
lis r21, 0x0412
ori r21, r21, 0x0000
lis r3, 0x0000
ori r3, r3, 0x0000
lis r7, 0x0000
ori r7, r7, 0x0015
stw r3, 56 (r2) # EXI2 DATA = 0 (Get ID command)
stw r22, 40 (r2) # Select AD16, through EXI2 CSR.
lwz r16, 40 (r2)
stw r7, 52 (r2) # Write immediate 2 bytes from DATA.
WaitAd16_0:
lwz r16, 52 (r2) # |
and. r16, r16, r8 # | Wait until transfer complete.
bgt+ WaitAd16_0 # |
lis r7, 0x0000
ori r7, r7, 0x0031
stw r7, 52 (r2) # Read immediate 4 bytes to DATA (ID).
WaitAd16_1:
lwz r16, 52 (r2) # |
and. r16, r16, r8 # | Wait until transfer complete.
bgt+ WaitAd16_1 # |
stw r10, 40 (r2) # Deselect device.
lwz r16, 40 (r2) # Read EXI2 CSR twice. Why? No idea.
lwz r16, 40 (r2) # Maybe its deselect attribute..
lwz r20, 56 (r2) # r20 = DATA. It should contain ID.
b Jump_0
.word 0
.word 0
# Write "trace step" value to AD16. Only when probe was success (R20 = AD16 ID).
# Input value (trace step) must be in R15.
Jump_0:
b Jump_1
DoAD16Write:
lis r3, 0xA000
ori r3, r3, 0x0000
lis r7, 0x0000
ori r7, r7, 0x0005
stw r3, 56 (r2) # EXI2 DATA = 0xA0000000 (Write AD16 command)
stw r22, 40 (r2) # Select AD16, through EXI2 CSR.
lwz r16, 40 (r2)
b AD16Write_0
Jump_1:
b Jump_2
AD16Write_0:
stw r7, 52 (r2) # Write immediate 1 byte from DATA.
WaitAd16Write_0:
lwz r16, 52 (r2) # |
and. r16, r16, r8 # | Wait until transfer complete.
bgt+ WaitAd16Write_0 # |
nop
nop
b AD16Write_1
Jump_2:
b Jump_3
AD16Write_1:
lis r7, 0x0000
ori r7, r7, 0x0035
stw r15, 56 (r2) # EXI2 DATA = trace step
stw r7, 52 (r2) # Write immediate 4 bytes from DATA.
WaitAd16Write_1:
lwz r16, 52 (r2) # |
and. r16, r16, r8 # | Wait until transfer complete.
b AD16Write_2 # |
Jump_3:
b Jump_4
AD16Write_2:
bgt+ WaitAd16Write_1
stw r10, 40 (r2) # Deselect device.
lwz r16, 40 (r2)
lwz r16, 40 (r2)
blr
.word 0
Jump_4:
b Trace_01
AD16Write:
cmplw r20, r21 # If AD16 probe failed, then skip.
beq+ DoAD16Write
blr
# Trace step 0x01 - Nothing ?
Trace_01:
lis r15, 0x0100 # AD16 = 0x01000000
bl AD16Write
nop
nop
nop
nop
nop
nop
nop
b Trace_02
.word 0
.word 0
# Trace step 0x02 - Nothing ?
Trace_02:
nop
nop
nop
nop
nop
lis r15, 0x0200 # AD16 = 0x02000000
bl AD16Write
b Jump_5
# Memory self test with given pattern.
Jump_5:
b Jump_6
TestMem:
nop
nop
nop
nop
nop
mr r23, r25
lis r24, 0x0180 # Main memory size (24MB)
b TestMem_0
Jump_6:
b Jump_7
TestMem_0:
rlwinm r24, r24, 27, 5, 31 # Fill memory (by 32 byte portions).
mtctr r24
FillMem:
stw r26, 0 (r23)
stw r26, 4 (r23)
stw r26, 8 (r23)
stw r26, 12 (r23)
b TestMem_1
Jump_7:
b Jump_8
TestMem_1:
stw r26, 16 (r23)
stw r26, 20 (r23)
stw r26, 24 (r23)
stw r26, 28 (r23)
addi r23, r23, 32
bdnz+ FillMem
b TestMem_2
Jump_8:
b Jump_9
TestMem_2:
mr r23, r25
lis r24, 0x0180
rlwinm r24, r24, 30, 2, 31
mtctr r24
TestLoop:
lwz r15, 0 (r23) # Begin to test.
cmplw r15, r26
b TestMem_3
Jump_9:
b Jump_10
TestMem_3:
beq- NextIteration
rlwinm r15, r23, 14, 18, 31
andi. r15, r15, 0x001F
li r16, 1
slw r16, r16, r15
or r17, r17, r16
b TestMem_4
Jump_10:
b Jump_11
TestMem_4:
rlwinm r15, r23, 14, 18, 31
subi r15, r15, 32
andi. r15, r15, 0x001F
li r16, 1
slw r16, r16, r15
or r18, r18, r16
b TestMem_5
Jump_11:
b Jump_12
TestMem_5:
rlwinm r15, r23, 14, 18, 31
subi r15, r15, 64
andi. r15, r15, 0x001F
li r16, 1
slw r16, r16, r15
or r19, r19, r16
b TestMem_6
Jump_12:
b Jump_13
TestMem_6:
rlwinm r15, r23, 0, 28, 31
cmplwi r15, 8
bge- ReadErrorBank0
ReadErrorBank0:
addi r28, r28, 1
b NextIteration
ReadErrorBank1:
addi r27, r27, 1
b TestMem_7
Jump_13:
b Trace_03
TestMem_7:
cmplw r29, r23
bge- NextIteration
mr r29, r23
NextIteration:
addi r23, r23, 4
bdnz+ TestLoop
blr
# Clear registers for memory test (see next). Trace step 0x03 - Nothing ?
Trace_03:
li r17, 0
li r18, 0
li r19, 0
li r27, 0
li r28, 0
li r29, 0
lis r15, 0x0300 # AD16 = 0x03000000
bl AD16Write
b Jump_14
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
# Test memory by some patterns.
Jump_14:
b Jump_15
MemorySelfTest:
lis r25, 0x8000
lis r26, 0xAAAA
ori r26, r26, 0xAAAA
bl TestMem # Test memory with 0xAA pattern.
not r26, r26
bl TestMem # Test memory with 0x55 pattern.
nop
b MemorySelfTest_0
Jump_15:
b Jump_16
MemorySelfTest_0:
lis r15, 0x0400
mr. r16, r27
beq- ReadError_8_F
ReadError_0_7:
oris r15, r15, 0x0200
ReadError_8_F:
mr. r16, r28
beq- MemorySelfTestWriteAd
b MemorySelfTest_1
Jump_16:
b Jump_17
MemorySelfTest_1:
oris r15, r15, 0x0100
MemorySelfTestWriteAd:
rlwinm r29, r29, 30, 2, 31
or r15, r15, r29
bl AD16Write # Set AD16 value.
nop
nop
b Halt
# Halt execution if memory test failed.
Jump_17:
cmplw r20, r21
beq+ MemorySelfTest
Halt:
mr. r16, r27 # Bad address with last digit 8-F ?
bne+ Halt
mr. r16, r28 # Bad address with last digit 0-7 ?
bne+ Halt
# Prepare GPR registers for IPL loading.
lis r2, 0xCC00 # EXI registers base
ori r2, r2, 0x6800
lis r6, 0x0000 # EXI0 CSR setup: device 1, 32MHz
ori r6, r6, 0x0150
lis r7, 0x0000
ori r7, r7, 0x0035
li r8, 1
lis r9, 0x0000
ori r9, r9, 0x0003
li r10, 0
lis r11, 0x0000 # Max length of single transfer
ori r11, r11, 0x0400
lis r12, 0x0001
ori r12, r12, 0x0000
lis r3, 0x0002 # Bootrom starting offset (0x800)
ori r3, r3, 0x0000
lis r4, 0x012F # Main memory starting address
ori r4, r4, 0xFFE0
lis r13, 0x0017 # Transfer length
ori r13, r13, 0x0000
b Jump_18
.word 0
.word 0
.word 0
.word 0
Jump_18:
b Jump_19
# Transfer IPL to main memory. Bootrom scrambler is decrypting data on the fly.
# Starting memory address: 0x012FFFE0.
# Starting bootrom offset: 0x800.
# Common length of transfer: 0x170000 bytes.
TransferIPL:
cmpwi r13, 0 # All bytes transferred ?
beq- SetupEntrypoint
mr r5, r11
cmplw r13, r5
bgt- SelectEXI
mr r5, r13
SelectEXI:
stw r6, 0 (r2) # Select bootrom, through EXI0 CSR.
b TransferIPL_0
Jump_19:
b Jump_20
TransferIPL_0:
stw r3, 16 (r2) # EXI0 DATA - offset in bootrom + write command
lwz r16, 0 (r2)
stw r7, 12 (r2) # Write immediate 4 bytes from DATA.
WaitTransferIPL_0:
lwz r16, 12 (r2) # |
and. r16, r16, r8 # | Wait until transfer complete.
bgt+ WaitTransferIPL_0 # |
b TransferIPL_1
Jump_20:
b Jump_21
TransferIPL_1:
stw r4, 4 (r2) # EXI0 MAR - DMA memory address.
lwz r4, 4 (r2)
stw r5, 8 (r2) # EXI0 LEN - DMA transfer length.
lwz r5, 8 (r2)
stw r9, 12 (r2) # Start EXI0 DMA write transfer.
WaitTransferIPL_1:
lwz r16, 12 (r2)
b TransferIPL_2
Jump_21:
b Jump_22
TransferIPL_2:
and. r16, r16, r8 # | Wait until transfer complete.
bgt+ WaitTransferIPL_1 # |
stw r10, 0 (r2) # Deselect device.
lwz r16, 0 (r2)
lwz r16, 0 (r2)
add r3, r3, r12
b TransferIPL_3
Jump_22:
b Jump_23
TransferIPL_3:
add r4, r4, r11 # Advance pointers.
sub r13, r13, r5
b TransferIPL
# Set link register to IPL entrypoint.
SetupEntrypoint:
lis r4, 0x8130
ori r4, r4, 0x0000
mtlr r4 # LR = 0x81300000
b DisableScrambler
# Disable bootrom decryption logic and disallow 32MHz EXI clock setting by CPU.
Jump_23:
b Jump_24
DisableScrambler:
lis r6, 0x0000
ori r6, r6, 0x2000
stw r6, 0 (r2) # Set ROMDIS bit in EXI0 CSR.
li r4, 1
stw r4, 60 (r14) # SI EXICLK[LOCK] = 1
lwz r4, 60 (r14)
b StartExecuteIPL
Jump_24:
b Jump_25
# Clear OS pointer to DVD BI2 location. Jump to IPL entrypoint.
StartExecuteIPL:
lis r4, 0x8000
li r3, 0
stw r3, 0x00F4 (r4)
blr # !! IPL START TO EXECUTE !!
.word 0
.word 0
# This how may look BS from the scratch.. Actual code fills zeroed words.
Padding:
Jump_25:
b Jump_26
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_26:
b Jump_27
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_27:
b Jump_28
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_28:
b Jump_29
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_29:
b Jump_30
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_30:
b Jump_31
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
Jump_31:
b TransferIPL
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
# Note by tmbinc:
# Why BS is jumping around? The jumping is because the way the instructions are
# fetched. They must be fetched in exact linear order, otherwise the scrambling
# goes out of sync. So in order to do any loops, BS enable the icache and
# to fill the icache, its jump to the first location in each icache line.
# That's why BS jump in 0x20 byte steps.