pureikyubu/Bootstrap_Reversing.md
2015-03-21 23:03:42 -04:00

19 KiB

# Brief runflow of boot stage execution:
#
#   - Init Flipper (ARAM, MI, reset DVD).
#   - Init Gekko (enable cache, set Dolphin OS memory model, enable translation).
#   - Probe AD16.
#   - Test memory. Halt CPU, if test failed.
#   - Load IPL into main memory and disable bootrom scrambler.
#   - Jump to IPL's __start.

.text
.org 0              # Will be actually 0xFFF0_0100 later.

boot:

# This will initialize Gekko implementation specifics.
        lis         r4, 0x0011          # XXX: This must be proved somehow..
        addi        r4, r4, 0x0C64
        mtspr       HID0, r4

# This will initialize CPU program model.        
        lis         r4, 0x0000
        addi        r4, r4, 0x2000
        mtmsr       r4

# Initialize auxiliary memory (ARAM).        
        lis         r4, 0x0C00
        addi        r4, r4, 0x5012
        li          r5, 67
        sth         r5, 0 (r4)          # Set 0x5012 register value to 0x43.
        li          r5, 156
        sth         r5, 8 (r4)          # Set ARAM refresh value.        

# Initialize Flipper memory interface.
        lis         r3, 0x0C00
        ori         r3, r3, 0x4000
        li          r4, 64
        sth         r4, 38 (r3)         # Set 0x4026 register value to 0x40.
        nop
        nop

# Enable data and instruction cache.
        mfspr       r3, HID0
        ori         r4, r3, 0xC000
        mtspr       HID0, r4
        nop
        nop
        nop
        isync

# Initialize CPU memory model. Clear BATs and segment registers.        
        li          r4, 0
        mtspr       DBAT0U, r4
        mtspr       DBAT1U, r4
        mtspr       DBAT2U, r4
        mtspr       DBAT3U, r4
        mtspr       IBAT0U, r4
        mtspr       IBAT1U, r4
        mtspr       IBAT2U, r4
        mtspr       IBAT3U, r4
        isync
        lis         r4, 0x8000
        addi        r4, r4, 0
        mtsr        0, r4
        mtsr        1, r4
        mtsr        2, r4
        mtsr        3, r4
        mtsr        4, r4
        mtsr        5, r4
        mtsr        6, r4
        mtsr        7, r4
        mtsr        8, r4
        mtsr        9, r4
        mtsr        10, r4
        mtsr        11, r4
        mtsr        12, r4
        mtsr        13, r4
        mtsr        14, r4
        mtsr        15, r4

# Configure memory model:
#
# DBAT0: 80001FFF 00000002    Write-back cached main memory, 256MB block.
# DBAT1: C0001FFF 0000002A    Write-through cached main memory, 256MB block.
# DBAT2: 00000000 xxxxxxxx    Dont care, reserved.
# DBAT3: FFF0001F FFF00001    Bootrom, 1MB block (temporary for BS)
#
# IBAT0: 80001FFF 00000002    Write-back cached main memory, 256MB block.
# IBAT1: 00000000 xxxxxxxx    Dont care, reserved.
# IBAT2: 00000000 xxxxxxxx    Dont care, reserved.
# IBAT3: FFF0001F FFF00001    Bootrom, 1MB block (temporary for BS)
        lis         r4, 0x0000
        addi        r4, r4, 2
        lis         r3, 0x8000
        addi        r3, r3, 0x1FFF
        mtspr       DBAT0L, r4
        mtspr       DBAT0U, r3
        isync
        mtspr       IBAT0L, r4
        mtspr       IBAT0U, r3
        isync
        lis         r4, 0x0000
        addi        r4, r4, 42
        lis         r3, 0xC000
        addi        r3, r3, 0x1FFF
        mtspr       DBAT1L, r4
        mtspr       DBAT1U, r3
        isync
        lis         r4, 0xFFF0
        addi        r4, r4, 1
        lis         r3, 0xFFF0
        addi        r3, r3, 31
        mtspr       DBAT3L, r4
        mtspr       DBAT3U, r3
        isync
        mtspr       IBAT3L, r4
        mtspr       IBAT3U, r3
        isync

# Enable instruction and data translation.
        mfmsr       r4
        ori         r4, r4, 0x0030      # Enable address translation.
        mtmsr       r4
        isync

# Write 0x0245248A to 0x3030 register. Meaning is unknown. Register is unknown.
        lis         r3, 0xCC00
        ori         r3, r3, 0x3000
        lis         r4, 0x0245
        ori         r4, r4, 0x248A
        stw         r4, 48 (r3)         # Write 0x0245248A to 0x3030 register.

# Reset DVD, through PI reset register.
        lwz         r4, 36 (r3)         # Read PI reset register.
        ori         r4, r4, 0x0001
        rlwinm      r4, r4, 0, 31, 28   # Set bit 31, clear bit 29.
        stw         r4, 36 (r3)         # Write new value in reset register.
        mftb        r5, TBL
WaitDVDReset:        
        mftb        r6, TBL
        sub         r7, r6, r5
        cmplwi      r7, 4388
        blt+        WaitDVDReset        # Wait ~9 us (with 486MHz clock)
        ori         r4, r4, 0x0003      # Set bit 31, set bit 29.
        stw         r4, 36 (r3)         # Write new value in reset register.

# Allow 32MHz EXI clock setting by CPU.
        lis         r14, 0xCC00
        ori         r14, r14, 0x6400
        li          r4, 0
        stw         r4, 60 (r14)        # SI EXICLK[LOCK] = 1

# To probe AD16 we must read its EXI ID. It should be 0x04120000. Place it to R20.
        lis         r2, 0xCC00
        ori         r2, r2, 0x6800
        lis         r22, 0x0000
        ori         r22, r22, 0x00BA
        li          r8, 1
        li          r10, 0
        lis         r21, 0x0412
        ori         r21, r21, 0x0000
        lis         r3, 0x0000
        ori         r3, r3, 0x0000
        lis         r7, 0x0000
        ori         r7, r7, 0x0015
        stw         r3, 56 (r2)         # EXI2 DATA = 0 (Get ID command)
        stw         r22, 40 (r2)        # Select AD16, through EXI2 CSR.
        lwz         r16, 40 (r2)
        stw         r7, 52 (r2)         # Write immediate 2 bytes from DATA.
WaitAd16_0:
        lwz         r16, 52 (r2)        # |
        and.        r16, r16, r8        # | Wait until transfer complete.
        bgt+        WaitAd16_0          # |
        lis         r7, 0x0000
        ori         r7, r7, 0x0031
        stw         r7, 52 (r2)         # Read immediate 4 bytes to DATA (ID).
WaitAd16_1:        
        lwz         r16, 52 (r2)        # |
        and.        r16, r16, r8        # | Wait until transfer complete.
        bgt+        WaitAd16_1          # |
        stw         r10, 40 (r2)        # Deselect device.
        lwz         r16, 40 (r2)        # Read EXI2 CSR twice. Why? No idea.
        lwz         r16, 40 (r2)        # Maybe its deselect attribute..
        lwz         r20, 56 (r2)        # r20 = DATA. It should contain ID.
        b           Jump_0
        .word       0
        .word       0

# Write "trace step" value to AD16. Only when probe was success (R20 = AD16 ID).
# Input value (trace step) must be in R15.        
Jump_0:
        b           Jump_1
DoAD16Write:
        lis         r3, 0xA000
        ori         r3, r3, 0x0000
        lis         r7, 0x0000
        ori         r7, r7, 0x0005
        stw         r3, 56 (r2)         # EXI2 DATA = 0xA0000000 (Write AD16 command)
        stw         r22, 40 (r2)        # Select AD16, through EXI2 CSR.
        lwz         r16, 40 (r2)
        b           AD16Write_0  
Jump_1:
        b           Jump_2
AD16Write_0:        
        stw         r7, 52 (r2)         # Write immediate 1 byte from DATA.
WaitAd16Write_0:
        lwz         r16, 52 (r2)        # |
        and.        r16, r16, r8        # | Wait until transfer complete.
        bgt+        WaitAd16Write_0     # |
        nop
        nop
        b           AD16Write_1  
Jump_2:        
        b           Jump_3
AD16Write_1:        
        lis         r7, 0x0000
        ori         r7, r7, 0x0035
        stw         r15, 56 (r2)        # EXI2 DATA = trace step
        stw         r7, 52 (r2)         # Write immediate 4 bytes from DATA.
WaitAd16Write_1:        
        lwz         r16, 52 (r2)        # |
        and.        r16, r16, r8        # | Wait until transfer complete.
        b           AD16Write_2         # |
Jump_3:        
        b           Jump_4
AD16Write_2:        
        bgt+        WaitAd16Write_1
        stw         r10, 40 (r2)        # Deselect device.
        lwz         r16, 40 (r2)
        lwz         r16, 40 (r2)
        blr
        .word       0
Jump_4:        
        b           Trace_01  
AD16Write:        
        cmplw       r20, r21            # If AD16 probe failed, then skip.
        beq+        DoAD16Write  
        blr

# Trace step 0x01 - Nothing ?
Trace_01:
        lis         r15, 0x0100         # AD16 = 0x01000000
        bl          AD16Write
        nop
        nop
        nop
        nop
        nop
        nop
        nop
        b           Trace_02
        .word       0
        .word       0

# Trace step 0x02 - Nothing ?
Trace_02:
        nop
        nop
        nop
        nop
        nop
        lis         r15, 0x0200         # AD16 = 0x02000000
        bl          AD16Write
        b           Jump_5

# Memory self test with given pattern.
Jump_5:
        b           Jump_6
TestMem:
        nop
        nop
        nop
        nop
        nop
        mr          r23, r25
        lis         r24, 0x0180         # Main memory size (24MB)
        b           TestMem_0
Jump_6:
        b           Jump_7
TestMem_0:        
        rlwinm      r24, r24, 27, 5, 31 # Fill memory (by 32 byte portions).
        mtctr       r24
FillMem:
        stw         r26, 0 (r23)
        stw         r26, 4 (r23)
        stw         r26, 8 (r23)
        stw         r26, 12 (r23)
        b           TestMem_1
Jump_7:
        b           Jump_8
TestMem_1:
        stw         r26, 16 (r23)
        stw         r26, 20 (r23)
        stw         r26, 24 (r23)
        stw         r26, 28 (r23)
        addi        r23, r23, 32
        bdnz+       FillMem
        b           TestMem_2
Jump_8:
        b           Jump_9
TestMem_2:        
        mr          r23, r25
        lis         r24, 0x0180
        rlwinm      r24, r24, 30, 2, 31
        mtctr       r24
TestLoop:
        lwz         r15, 0 (r23)        # Begin to test.
        cmplw       r15, r26
        b           TestMem_3
Jump_9:
        b           Jump_10
TestMem_3:        
        beq-        NextIteration
        rlwinm      r15, r23, 14, 18, 31
        andi.       r15, r15, 0x001F
        li          r16, 1
        slw         r16, r16, r15
        or          r17, r17, r16
        b           TestMem_4
Jump_10:
        b           Jump_11
TestMem_4:
        rlwinm      r15, r23, 14, 18, 31
        subi        r15, r15, 32
        andi.       r15, r15, 0x001F
        li          r16, 1
        slw         r16, r16, r15
        or          r18, r18, r16
        b           TestMem_5
Jump_11:
        b           Jump_12
TestMem_5:
        rlwinm      r15, r23, 14, 18, 31
        subi        r15, r15, 64
        andi.       r15, r15, 0x001F
        li          r16, 1
        slw         r16, r16, r15
        or          r19, r19, r16
        b           TestMem_6
Jump_12:
        b           Jump_13
TestMem_6:
        rlwinm      r15, r23, 0, 28, 31
        cmplwi      r15, 8
        bge-        ReadErrorBank0
ReadErrorBank0:        
        addi        r28, r28, 1
        b           NextIteration
ReadErrorBank1:
        addi        r27, r27, 1
        b           TestMem_7
Jump_13:
        b           Trace_03
TestMem_7:
        cmplw       r29, r23
        bge-        NextIteration
        mr          r29, r23
NextIteration:
        addi        r23, r23, 4
        bdnz+       TestLoop
        blr

# Clear registers for memory test (see next). Trace step 0x03 - Nothing ?
Trace_03:
        li          r17, 0
        li          r18, 0
        li          r19, 0
        li          r27, 0
        li          r28, 0
        li          r29, 0
        lis         r15, 0x0300         # AD16 = 0x03000000
        bl          AD16Write
        b           Jump_14
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0

# Test memory by some patterns.
Jump_14:
        b           Jump_15
MemorySelfTest:
        lis         r25, 0x8000
        lis         r26, 0xAAAA
        ori         r26, r26, 0xAAAA
        bl          TestMem             # Test memory with 0xAA pattern.
        not         r26, r26
        bl          TestMem             # Test memory with 0x55 pattern.
        nop
        b           MemorySelfTest_0
Jump_15:
        b           Jump_16
MemorySelfTest_0:
        lis         r15, 0x0400
        mr.         r16, r27
        beq-        ReadError_8_F
ReadError_0_7:        
        oris        r15, r15, 0x0200
ReadError_8_F:
        mr.         r16, r28
        beq-        MemorySelfTestWriteAd
        b           MemorySelfTest_1
Jump_16:
        b           Jump_17
MemorySelfTest_1:
        oris        r15, r15, 0x0100
MemorySelfTestWriteAd:
        rlwinm      r29, r29, 30, 2, 31
        or          r15, r15, r29
        bl          AD16Write           # Set AD16 value.
        nop
        nop
        b           Halt

# Halt execution if memory test failed.
Jump_17:
        cmplw       r20, r21
        beq+        MemorySelfTest
Halt:
        mr.         r16, r27            # Bad address with last digit 8-F ?
        bne+        Halt
        mr.         r16, r28            # Bad address with last digit 0-7 ?
        bne+        Halt

# Prepare GPR registers for IPL loading.
        lis         r2, 0xCC00          # EXI registers base
        ori         r2, r2, 0x6800
        lis         r6, 0x0000          # EXI0 CSR setup: device 1, 32MHz
        ori         r6, r6, 0x0150
        lis         r7, 0x0000
        ori         r7, r7, 0x0035
        li          r8, 1
        lis         r9, 0x0000
        ori         r9, r9, 0x0003
        li          r10, 0
        lis         r11, 0x0000         # Max length of single transfer
        ori         r11, r11, 0x0400
        lis         r12, 0x0001
        ori         r12, r12, 0x0000
        lis         r3, 0x0002          # Bootrom starting offset (0x800)
        ori         r3, r3, 0x0000
        lis         r4, 0x012F          # Main memory starting address
        ori         r4, r4, 0xFFE0
        lis         r13, 0x0017         # Transfer length
        ori         r13, r13, 0x0000
        b           Jump_18
        .word       0
        .word       0
        .word       0
        .word       0
Jump_18:
        b           Jump_19

# Transfer IPL to main memory. Bootrom scrambler is decrypting data on the fly.
# Starting memory address: 0x012FFFE0.
# Starting bootrom offset: 0x800.
# Common length of transfer: 0x170000 bytes.
TransferIPL:
        cmpwi       r13, 0              # All bytes transferred ?
        beq-        SetupEntrypoint
        mr          r5, r11
        cmplw       r13, r5
        bgt-        SelectEXI
        mr          r5, r13
SelectEXI:
        stw         r6, 0 (r2)          # Select bootrom, through EXI0 CSR.
        b           TransferIPL_0
Jump_19:
        b           Jump_20
TransferIPL_0:
        stw         r3, 16 (r2)         # EXI0 DATA - offset in bootrom + write command
        lwz         r16, 0 (r2) 
        stw         r7, 12 (r2)         # Write immediate 4 bytes from DATA.
WaitTransferIPL_0:
        lwz         r16, 12 (r2)        # |
        and.        r16, r16, r8        # | Wait until transfer complete.
        bgt+        WaitTransferIPL_0   # |
        b           TransferIPL_1
Jump_20:
        b           Jump_21
TransferIPL_1:
        stw         r4, 4 (r2)          # EXI0 MAR - DMA memory address.
        lwz         r4, 4 (r2) 
        stw         r5, 8 (r2)          # EXI0 LEN - DMA transfer length.
        lwz         r5, 8 (r2) 
        stw         r9, 12 (r2)         # Start EXI0 DMA write transfer.
WaitTransferIPL_1:
        lwz         r16, 12 (r2)
        b           TransferIPL_2
Jump_21:
        b           Jump_22
TransferIPL_2:
        and.        r16, r16, r8        # | Wait until transfer complete.
        bgt+        WaitTransferIPL_1   # |
        stw         r10, 0 (r2)         # Deselect device.
        lwz         r16, 0 (r2)
        lwz         r16, 0 (r2)
        add         r3, r3, r12
        b           TransferIPL_3
Jump_22:
        b           Jump_23
TransferIPL_3:
        add         r4, r4, r11         # Advance pointers.
        sub         r13, r13, r5
        b           TransferIPL

# Set link register to IPL entrypoint.
SetupEntrypoint:
        lis         r4, 0x8130
        ori         r4, r4, 0x0000
        mtlr        r4                  # LR = 0x81300000
        b           DisableScrambler

# Disable bootrom decryption logic and disallow 32MHz EXI clock setting by CPU.        
Jump_23:
        b           Jump_24
DisableScrambler:
        lis         r6, 0x0000
        ori         r6, r6, 0x2000
        stw         r6, 0 (r2)          # Set ROMDIS bit in EXI0 CSR.
        li          r4, 1
        stw         r4, 60 (r14)        # SI EXICLK[LOCK] = 1
        lwz         r4, 60 (r14)
        b           StartExecuteIPL
Jump_24:
        b           Jump_25

# Clear OS pointer to DVD BI2 location. Jump to IPL entrypoint.
StartExecuteIPL:
        lis         r4, 0x8000
        li          r3, 0
        stw         r3, 0x00F4 (r4)
        blr                             # !! IPL START TO EXECUTE !!
        .word       0
        .word       0

# This how may look BS from the scratch.. Actual code fills zeroed words.
Padding:
Jump_25:
        b           Jump_26
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_26:
        b           Jump_27
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_27:        
        b           Jump_28
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_28:        
        b           Jump_29
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_29:        
        b           Jump_30
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_30:        
        b           Jump_31
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
Jump_31:        
        b           TransferIPL
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0
        .word       0

# Note by tmbinc:
# Why BS is jumping around? The jumping is because the way the instructions are
# fetched. They must be fetched in exact linear order, otherwise the scrambling
# goes out of sync. So in order to do any loops, BS enable the icache and 
# to fill the icache, its jump to the first location in each icache line.
# That's why BS jump in 0x20 byte steps.