mirror of
https://github.com/daniel5151/ANESE.git
synced 2025-04-02 10:32:00 -04:00
1540 lines
31 KiB
PHP
Executable file
Vendored
1540 lines
31 KiB
PHP
Executable file
Vendored
|
|
|
|
P_SYSCTRL db 0
|
|
P_DISPCTRL db 0
|
|
P_STATUS db 0
|
|
PPU_READ_BUFFER db 0
|
|
|
|
P_SCANLINE dw 0
|
|
vseg dw 0xA000 ; VGA video segment
|
|
; vseg is here so that ShowScanline can use the "les" instruction.
|
|
|
|
P_SPRINPOS dw 0
|
|
P_SPRRENPOS dw 0
|
|
|
|
section .text
|
|
|
|
|
|
PPU_tick:
|
|
mov al, 0
|
|
P_VBL_STATE EQU $-1
|
|
cmp al, 0
|
|
jz .VBLstate0
|
|
jl .VBLminus
|
|
.VBLplus:
|
|
cmp al, 2
|
|
jne .VBLplus2
|
|
or byte [P_STATUS], 0x80 ; set invblank flag
|
|
.VBLplus2:
|
|
dec ax
|
|
mov [P_VBL_STATE], al
|
|
jmp .VBLstateDef
|
|
.VBLminus:
|
|
cmp al, -5
|
|
jne .VBLminus2
|
|
mov byte [P_STATUS], 0x00
|
|
.VBLminus2:
|
|
inc ax
|
|
mov [P_VBL_STATE], al
|
|
jmp .VBLstateDef
|
|
.VBLstate0:
|
|
; NMI = status & sysctrl & 80h
|
|
mov al, [P_STATUS]
|
|
and al, [P_SYSCTRL]
|
|
and al, 80h
|
|
;shr al, 7
|
|
mov [C_NMI], al
|
|
.VBLstateDef:
|
|
cmp word [P_SCANLINE], 240
|
|
jge .NoRendering
|
|
|
|
;; rendering...
|
|
|
|
;if(reg.ShowBGSP) rendering_tick();
|
|
mov al, [P_DISPCTRL]
|
|
test al, 3 << 3
|
|
jz .NoRenderingTick
|
|
call PPU_RenderingTick
|
|
.NoRenderingTick:
|
|
;if(scanline >= 0 && x < 256) render_pixel();
|
|
cmp word [P_SCANLINE], 0
|
|
jl .DoneRendering
|
|
cmp byte [P_X+1], 0
|
|
jnz .DoneRendering
|
|
call PPU_RenderPixel
|
|
.DoneRendering:
|
|
;; end rendering...
|
|
|
|
.NoRendering:
|
|
inc word [P_X]
|
|
|
|
; NTSC phase is incremented by 8 after every pixel,
|
|
; rendered or not.
|
|
mov bl, 0
|
|
P_NTSC_PHASE EQU $-1
|
|
call NTSC_phase_inc_bl
|
|
mov [P_NTSC_PHASE], bl
|
|
|
|
mov ax, [P_X]
|
|
cmp ax, 341
|
|
P_SCANLINE_END EQU $-2
|
|
jb .ScanlineUnfinished
|
|
|
|
mov word [P_SCANLINE_END], 341
|
|
mov [P_X], word 0
|
|
|
|
mov ax, [P_SCANLINE]
|
|
cmp ax, 240
|
|
jae .DontShow
|
|
|
|
call ShowScanline
|
|
mov ax, [P_SCANLINE]
|
|
.DontShow:
|
|
inc ax
|
|
mov [P_SCANLINE], ax
|
|
cmp ax, 241
|
|
je .VBLbegin
|
|
cmp ax, 0
|
|
je .EndOfPreRenderLine
|
|
cmp ax, 261
|
|
jl .ScanlineUnfinished
|
|
.VBLend:mov [P_VBL_STATE], byte -5
|
|
mov [P_SCANLINE], word -1
|
|
xor [P_EVENODD], byte 1
|
|
%if 0
|
|
mov ax, 0
|
|
P_FRAMECOUNT EQU $-2
|
|
dec ax
|
|
jns .NotReset
|
|
mov ax, 0
|
|
P_FRAME_SKIP EQU $-2
|
|
.NotReset:
|
|
mov [P_FRAMECOUNT], ax
|
|
%endif
|
|
jmp short .ScanlineUnfinished
|
|
.VBLbegin:
|
|
mov [P_VBL_STATE], byte 2
|
|
jmp short .ScanlineUnfinished
|
|
.EndOfPreRenderLine:
|
|
; FIXME: This should happen at x=304 (small timing difference):
|
|
test byte [P_DISPCTRL], 15*2
|
|
jz .ScanlineUnfinished
|
|
; Only do vaddr=taddr if _some_ part of rendering is enabled
|
|
mov ax, [TADDR_BUF]
|
|
mov [VADDR_BUF], ax
|
|
|
|
; The 340-length scanline also happens only if rendering is enabled
|
|
mov ax, word 341
|
|
sub al, byte 0
|
|
P_EVENODD EQU $-1
|
|
mov [P_SCANLINE_END], ax
|
|
.ScanlineUnfinished:
|
|
ret
|
|
|
|
|
|
|
|
PPU_write:
|
|
; dl = byte
|
|
; ax = index (0..7), already clamped
|
|
mov [PPU_OPEN_BUS], dl
|
|
cmp ax, 1
|
|
jb .WriteSysCtrl
|
|
je .WriteDispCtrl
|
|
cmp ax, 3
|
|
je .WriteOAMaddr
|
|
jb .WriteNothing
|
|
cmp ax, 5
|
|
jb .WriteOAM
|
|
je .WriteScroll
|
|
cmp ax, 7
|
|
je .WriteMemory
|
|
jb .WriteMemoryPosition
|
|
.WriteNothing:
|
|
ret
|
|
.WriteSysCtrl:
|
|
mov [P_SYSCTRL], dl
|
|
; scroll.basenta = reg.BaseNTA
|
|
mov ax, [TADDR_BUF]
|
|
and ax, ~(3 << 10)
|
|
and dx, 3
|
|
shl dx, 10
|
|
or ax, dx
|
|
mov [TADDR_BUF], ax
|
|
ret
|
|
.WriteDispCtrl:
|
|
mov [P_DISPCTRL], dl
|
|
; Build attenuation mask for generating
|
|
; the color de-emphasis bits in NTSC signal
|
|
xor eax, eax
|
|
shl dl, 1
|
|
jnc .Not4
|
|
or eax, 00111111000000111111000000111111b
|
|
.Not4: shl dl, 1
|
|
jnc .Not2
|
|
or eax, 11110000001111110000001111110000b
|
|
.Not2: shl dl, 1
|
|
jnc .Not1
|
|
or eax, 00000011111100000011111100000011b
|
|
.Not1: mov [P_ATTENUATION_MASK], eax
|
|
ret
|
|
.WriteOAMaddr:
|
|
mov [P_OAMADDR], dl
|
|
ret
|
|
.WriteOAM:
|
|
push bx
|
|
mov bx, word 0x0000
|
|
P_OAMADDR EQU $-2
|
|
mov [OAM+bx], dl
|
|
inc byte [P_OAMADDR]
|
|
pop bx
|
|
ret
|
|
.WriteScroll:
|
|
mov al, [P_OFFSETTOGGLE]
|
|
cmp al, 0
|
|
mov ax, [TADDR_BUF]
|
|
jnz .SHi
|
|
.SLo:
|
|
; Set xfine and xcoarse
|
|
and al, ~31
|
|
mov dh, dl
|
|
shr dh, 3
|
|
or al, dh
|
|
and dx, 7
|
|
mov [XFINE], dx
|
|
jmp short .Sdone
|
|
.SHi:
|
|
; Set yfine and ycoarse
|
|
and ax, ~((7 << 12) | (31 << 5))
|
|
mov dh, dl
|
|
and dh, 7 ; yfine = dl&7
|
|
shl dh, 4
|
|
or ah, dh
|
|
shl dx, 2 ; ycoarse = dl>>3 (placed at bitpos 5)
|
|
and dx, 31<<5
|
|
or ax, dx ; y
|
|
;jmp short .Sdone
|
|
.Sdone:
|
|
mov [TADDR_BUF], ax
|
|
jmp short .DidWriteToggle
|
|
.WriteMemoryPosition:
|
|
;pusha
|
|
; mov dx, .WMP
|
|
; mov ah, 9
|
|
; int 21h
|
|
;popa
|
|
;pusha
|
|
; mov al, [PPU_OPEN_BUS]
|
|
; call PrintHexByte
|
|
; call PrintNewline
|
|
; jmp .WMP2
|
|
; .WMP: db 'Set mem pos $'
|
|
; .WMP2:
|
|
;popa
|
|
mov al, byte 00h
|
|
P_OFFSETTOGGLE EQU $-1
|
|
cmp al, 0
|
|
jnz .MLo
|
|
.MHi:
|
|
and dl, 0x3F
|
|
mov [TADDR_BUF+1], dl
|
|
jmp .DidWriteToggle
|
|
.MLo:
|
|
mov ah, [TADDR_BUF+1]
|
|
mov al, dl
|
|
mov [TADDR_BUF+0], al
|
|
mov [VADDR_BUF+0], ax
|
|
;jmp .DidWriteToggle
|
|
.DidWriteToggle:
|
|
not byte [P_OFFSETTOGGLE]
|
|
;pusha
|
|
; mov dx, .WMP3
|
|
; mov ah, 9
|
|
; int 21h
|
|
;popa
|
|
;pusha
|
|
; mov ax, [VADDR_BUF]
|
|
; call PrintHexWord
|
|
; call PrintNewline
|
|
; jmp .WMP4
|
|
; .WMP3: db 'Begets $'
|
|
; .WMP4:
|
|
;popa
|
|
ret
|
|
.WriteMemory:
|
|
push bx
|
|
push dx
|
|
mov ax, [VADDR_BUF]
|
|
call PPU_mmap ; bx = t
|
|
jc .DoneWriteMemory ; Don't write to read-only memory (0000-1FFF). 2000-3FFF is ok.
|
|
cmp ax, 0x3F00
|
|
jae .PaletteWrite
|
|
mov al, [PPU_OPEN_BUS]
|
|
mov [bx], al
|
|
jmp short .DoneWriteMemory
|
|
.PaletteWrite:
|
|
mov bx, ax
|
|
test al, 3
|
|
jnz .NotZero
|
|
and bx, 0x0F ; x10, x14, x18 and x1C must be mirrors of x00, x04, x08 and x0C
|
|
.NotZero:
|
|
and bx, 0x1F
|
|
mov al, [PPU_OPEN_BUS]
|
|
and al, 0x3F
|
|
mov [PALETTE+bx], al
|
|
; pusha
|
|
; mov dx, .PPPT
|
|
; mov ah, 9
|
|
; int 21h
|
|
; mov al, bl
|
|
; call PrintHexByte
|
|
; mov dl, ' '
|
|
; call ConsolePutc
|
|
; mov al, [PPU_OPEN_BUS]
|
|
; and al, 0x3F
|
|
; call PrintHexByte
|
|
; call PrintNewline
|
|
; jmp .PPPT2
|
|
; .PPPT: db 'Wrote palette $'
|
|
; .PPPT2:
|
|
; popa
|
|
.DoneWriteMemory:
|
|
call PPU_IncAddr
|
|
pop dx
|
|
pop bx
|
|
ret
|
|
|
|
PPU_read:
|
|
; ax = index (0..7), already clamped
|
|
cmp ax, 2
|
|
jb .DoneRead
|
|
je .ReadStatus
|
|
cmp ax, 4
|
|
jb .DoneRead
|
|
je .ReadOAM
|
|
cmp ax, 7
|
|
je .ReadMemory
|
|
.DoneRead:
|
|
mov al, 0x00
|
|
PPU_OPEN_BUS EQU $-1
|
|
ret
|
|
.ReadOAM:
|
|
push bx
|
|
mov bx, [P_OAMADDR]
|
|
mov al, [OAM+bx]
|
|
mov [PPU_OPEN_BUS], al; FIXME: For %4=2, update only &0xE3
|
|
pop bx
|
|
ret
|
|
.ReadStatus:
|
|
mov al, [PPU_OPEN_BUS]
|
|
and al, 0x1F
|
|
or al, [P_STATUS]
|
|
mov [PPU_OPEN_BUS], al
|
|
and byte [P_STATUS], 0x7F ; clear invblank flag
|
|
mov byte [P_OFFSETTOGGLE], 0
|
|
cmp [P_VBL_STATE], byte -5
|
|
je .DontResetVBLstate
|
|
mov [P_VBL_STATE], byte 0 ; this may also cancel the setting of InVBlank
|
|
.DontResetVBLstate:
|
|
ret
|
|
.ReadMemory:
|
|
push bx
|
|
push dx
|
|
mov ax, [VADDR_BUF]
|
|
call PPU_mmap ; bx = t; ax = vaddr_raw
|
|
mov dl, [bx]
|
|
xchg dl, [PPU_READ_BUFFER] ; Put memory data in read-buffer
|
|
; The read-buffer thing happens even if the address is of palette.
|
|
; pusha
|
|
; pusha
|
|
; mov dx, .BBB
|
|
; mov ah, 9
|
|
; int 21h
|
|
; popa
|
|
; call PrintHexWord
|
|
; call PrintNewline
|
|
; jmp .RBB2
|
|
; .BBB: db 'Reads from $'
|
|
; .RBB2:
|
|
; popa
|
|
cmp ax, 0x3F00
|
|
jae short .PaletteRead
|
|
mov [PPU_OPEN_BUS], dl ; Old contents of read-buffer
|
|
jmp short .DoneReadMemory
|
|
.PaletteRead:
|
|
mov bx, PALETTE
|
|
; mov [.PPPR3], al
|
|
test al, 3
|
|
jnz .NotZero
|
|
and al, 0x0F ; x10, x14, x18 and x1C must be mirrors of x00, x04, x08 and x0C
|
|
.NotZero:
|
|
and al, 0x1F
|
|
xlatb ; mov al, [PALETTE+al]
|
|
mov ah, [PPU_OPEN_BUS]
|
|
and ax, 0xC03F ; update only &0x3F
|
|
or al, ah
|
|
mov [PPU_OPEN_BUS], al
|
|
; pusha
|
|
; mov dx, .PPPR
|
|
; mov ah, 9
|
|
; int 21h
|
|
; mov al, byte 00h
|
|
; .PPPR3 EQU $-1
|
|
; call PrintHexByte
|
|
; mov dl, ' '
|
|
; call ConsolePutc
|
|
; mov al, [PPU_OPEN_BUS]
|
|
; call PrintHexByte
|
|
; call PrintNewline
|
|
; jmp .PPPR2
|
|
; .PPPR: db 'Read palette $'
|
|
; .PPPR2:
|
|
; popa
|
|
.DoneReadMemory:
|
|
call PPU_IncAddr
|
|
pop dx
|
|
pop bx
|
|
jmp .DoneRead
|
|
|
|
|
|
PPU_mmap:
|
|
; In: AX = address in PPU's memory
|
|
; Out: BX = address of physical data in emulator's memory
|
|
; CF = address is VROM (not writable)
|
|
; Preserves AX LOW 14 BITS; PRESERVES CX-DI
|
|
and ax, 0x3FFF
|
|
test ax, 0x2000
|
|
jz .VBankRead
|
|
; Nametable read
|
|
mov bx, ax
|
|
shr bx, 10-1
|
|
and bx, 3*2
|
|
mov bx, [C_NTA + bx]
|
|
push ax
|
|
and ax, 0x3FF
|
|
add bx, ax
|
|
pop ax
|
|
clc ; clear carry flag (content is RAM)
|
|
ret
|
|
.VBankRead:
|
|
; FOR NOW, OUR GRANULARITY IS 8k (CNROM)
|
|
mov bx, ax
|
|
;and bx, 0x1FFF
|
|
add bx, [C_VROMPAGE]
|
|
stc ; set carry flag to indicate the content is ROM
|
|
ret
|
|
|
|
PPU_IncAddr:
|
|
inc word [VADDR_BUF] ; add 1
|
|
test byte [P_SYSCTRL], 4
|
|
jnz .IncBy32
|
|
ret
|
|
.IncBy32:
|
|
add word [VADDR_BUF], 31 ; add 32
|
|
ret
|
|
|
|
|
|
PPU_RenderingTick:
|
|
mov cx, 0x0000
|
|
P_X EQU $-2
|
|
mov bx, cx
|
|
|
|
; tile_decode_mode = x<256 || (x >= 320 && x < 336)
|
|
shr cx, 4 ; cl = x/16
|
|
mov edx, 1
|
|
shl edx, cl ; edx = 1 << (x/16)
|
|
and edx, 0x10FFFF ;tile_decode_mode = edx<>0
|
|
|
|
and bx, 7 ; x % 8
|
|
shl bx, 1
|
|
jmp [.Mod8Table + bx]
|
|
.Mod8Table:
|
|
dw .Mod8_0, .Mod8_1, .Mod8_2, .Mod8_3, .Mod8_4, .Mod8_5, .Mod8_6, .Mod8_7
|
|
|
|
.Mod8_2: ; Point to attribute table
|
|
;ioaddr = 0x23C0 + 0x400*reg.vaddr_basenta + 8*(reg.vaddr_ycoarse/4) + (reg.vaddr_xcoarse/4);
|
|
mov ax, [VADDR_BUF]
|
|
mov cx, ax
|
|
mov bx, ax
|
|
;shr bx, 7
|
|
;and bx, 7
|
|
;shl bx, 3 ; bx = (ycoarse/4)*8
|
|
shr bx, 4
|
|
and bx, (7<<3) ; bx = (ycoarse/4)*8
|
|
shr ax, 2
|
|
and ax, 7 ; ax = xcoarse/4
|
|
;add ax, bx
|
|
and cx, 0xC00 ; cx = basenta*0x400
|
|
lea ax, [0x23C0 + ebx + eax]
|
|
add ax, cx
|
|
;add ax, 0x23C0
|
|
mov [P_IOADDR], ax
|
|
or edx, edx
|
|
jnz .Mod8_break ;passthru if zero (sprite mode)
|
|
|
|
.Mod8_0: ;Point to nametable
|
|
mov ax, word 0x0000
|
|
VADDR_BUF EQU $-2
|
|
and ax, 0xFFF
|
|
or ax, 0x2000
|
|
mov [P_IOADDR], ax
|
|
; Reset sprite data
|
|
xor ax, ax
|
|
mov bx, [P_X]
|
|
or bx, bx
|
|
jz .Mod8_0_0
|
|
cmp bx, 256
|
|
jnz .Mod8_break
|
|
.Mod8_0_256:
|
|
mov [P_SPRRENPOS], al
|
|
jmp .Mod8_break
|
|
.Mod8_0_0:
|
|
mov [P_SPRINPOS], al
|
|
mov [P_SPROUTPOS], al
|
|
mov [P_OAMADDR], byte 0x00
|
|
jmp .Mod8_break
|
|
|
|
.Mod8_1: ;Name table access
|
|
; pat_addr = 0x1000*reg.BGaddr + 16*mmap(ioaddr) + reg.vaddr_yfine
|
|
mov ax, word 0x0000
|
|
P_IOADDR EQU $-2
|
|
call PPU_mmap
|
|
mov bl, [bx]
|
|
mov bh, 0
|
|
shl bx, 4 ; bx = 16*mmap(ioaddr)
|
|
mov al, [VADDR_BUF+1]
|
|
shr al, 4
|
|
and ax, 7 ; ax = yfine
|
|
add bx, ax
|
|
mov al, 0
|
|
mov ah, [P_SYSCTRL]
|
|
and ah, 0x10 ; ax = bgaddr*0x1000 (bgaddr happens to be sysctrl&0x10)
|
|
add ax, bx
|
|
mov [P_PATADDR], ax
|
|
or edx, edx
|
|
jnz .Mod8_1_continues
|
|
; Not tile mode? Check special actions
|
|
cmp word [P_X], 257
|
|
jne .Mod8_break
|
|
; copy xcoarse, basenta_h from scroll to vaddr
|
|
mov ax, word 0x0000
|
|
TADDR_BUF EQU $-2
|
|
and ax, ((1 << 10) | (31 << 0))
|
|
and word [VADDR_BUF], ~((1 << 10) | (31 << 0))
|
|
or word [VADDR_BUF], ax
|
|
jmp .Mod8_break
|
|
.Mod8_1_continues:
|
|
;// Push the current tile into shift registers.
|
|
;// The bitmap pattern is 16 bits, while the attribute is 2 bits, repeated 8 times.
|
|
;misc.bg_shift_pat = (misc.bg_shift_pat >> 16) + 0x00010000 * tilepat;
|
|
;misc.bg_shift_attr = (misc.bg_shift_attr >> 16) + 0x55550000 * tileattr;
|
|
mov eax, [P_BG_SHIFT_PAT]
|
|
mov ax, word 0xAAAA
|
|
P_TILEPAT EQU $-2
|
|
ror eax, 16
|
|
mov [P_BG_SHIFT_PAT], eax
|
|
|
|
mov eax, [P_BG_SHIFT_ATTR]
|
|
mov bx, word 0xAAAA
|
|
P_TILEATTR EQU $-2
|
|
mov si, bx ; bx+si = bx*2
|
|
mov ax, [.AttrTable + bx+si]
|
|
ror eax, 16
|
|
mov [P_BG_SHIFT_ATTR], eax
|
|
jmp .Mod8_break
|
|
section .const
|
|
.AttrTable:
|
|
; This lookup table translates
|
|
; a 2-bit value into 16-bit value
|
|
; by duplicating it 8 times.
|
|
dw 0000000000000000b
|
|
dw 0101010101010101b
|
|
dw 1010101010101010b
|
|
dw 1111111111111111b
|
|
section .text
|
|
.Mod8_3: ;attribute table access
|
|
or edx, edx
|
|
jnz .Mod8_3_tilemode
|
|
cmp word [P_X], 335
|
|
jae .Mod8_3_done
|
|
.Mod8_3_spritemode:
|
|
; pat_addr = 0x1000 * reg.SPaddr
|
|
mov al, [P_SYSCTRL]
|
|
and ax, 8
|
|
shl ax, 9
|
|
mov [P_PATADDR], ax
|
|
mov bx, [P_SPRRENPOS] ; sno
|
|
cmp bl, [P_SPROUTPOS]
|
|
jae .Mod8_3_done
|
|
; Select sprite pattern instead of background pattern
|
|
mov al, [OAM2_sprindex + bx]
|
|
mov ah, [OAM2_attr + bx]
|
|
mov cl, [OAM2_x + bx]
|
|
mov [OAM3_sprindex + bx], al
|
|
mov [OAM3_attr + bx], ah
|
|
mov [OAM3_x + bx], cl
|
|
; y = scanline - OAM2_y[sno]
|
|
mov al, [OAM2_y + bx]
|
|
mov ah, 0
|
|
sub ax, [P_SCANLINE]
|
|
neg ax ; ax = y
|
|
mov cx, [OAM2_index + bx]
|
|
test byte [P_SYSCTRL], 32 ; 16-tall?
|
|
jz .Mod8_3_sprite_8
|
|
.Mod8_3_sprite_16:
|
|
; Deal with 16-tall sprites
|
|
test byte [OAM3_attr + bx], 0x80
|
|
jz .NoYflip_16
|
|
xor al, 15
|
|
.NoYflip_16:
|
|
shl cx, 12
|
|
and cx, 0x1000
|
|
mov [P_PATADDR], cx
|
|
mov cx, [OAM2_index + bx]
|
|
and cx, 0xFE
|
|
jmp short .ChoseSprite
|
|
.Mod8_3_sprite_8:
|
|
; Deal with 8-tall sprites
|
|
test byte [OAM3_attr + bx], 0x80
|
|
jz .NoYflip_8
|
|
xor al, 7
|
|
.NoYflip_8:
|
|
mov ch, 0 ;and cx, 0xFF
|
|
.ChoseSprite:
|
|
shl cx, 4
|
|
add [P_PATADDR], cx
|
|
.Mod8_3_sprite_done:
|
|
mov bx, ax ; bx, ax = y
|
|
and bx, 8
|
|
shl bx, 1 ; bx = (y&8)*2
|
|
and ax, 7
|
|
add ax, bx ; ax = (y&7) + (y&8)*2
|
|
add [P_PATADDR], ax
|
|
jmp .Mod8_3_done
|
|
.Mod8_3_tilemode:
|
|
;tileattr = (mmap(ioaddr) >> ((reg.vaddr_xcoarse&2) + 2*(reg.vaddr_ycoarse&2))) & 3
|
|
mov al, [VADDR_BUF] ; fedcba9876543210
|
|
mov cl, al ; .........4....2.
|
|
and cl, 2 ; cl = xcoarse&2
|
|
shr al, 4
|
|
and al, 2<<1 ; al = (ycoarse&2)*2
|
|
add cl, al ; cl = (xcoarse&2) + 2*(ycoarse&2)
|
|
mov ax, [P_IOADDR]
|
|
call PPU_mmap
|
|
mov al, [bx]
|
|
shr al, cl
|
|
;mov al, 2 ; TEST
|
|
and ax, 3
|
|
mov [P_TILEATTR], ax
|
|
|
|
; Go to the next tile horizontally (and switch nametable if it wraps)
|
|
;
|
|
; Increment xcoarse (0..31 at bitpos 0).
|
|
; If it wraps, toggle basenta_h (0..1 at bitpos 10).
|
|
mov ax, [VADDR_BUF]
|
|
mov cl, al
|
|
inc cx
|
|
and cl, 31
|
|
jnz .DidntWrapHoriz
|
|
xor ah, 1 << (10-8) ; Toggle horizontal nametable index
|
|
.DidntWrapHoriz:
|
|
and al, ~31
|
|
or al, cl
|
|
|
|
cmp word [P_X], 251
|
|
jne .Mod8_3_tilemode_done
|
|
; At the edge of the screen, do the same but vertically
|
|
;
|
|
; Increment yfine (0..7 at bitpos 12).
|
|
; If it wraps, increment ycoarse (0..31 at bitpos 5).
|
|
; If ycoarse hits 30, set ycoarse=0
|
|
; and toggle basenta_v (0..1 at bitpos 11).
|
|
;
|
|
add ah, 1 << (12-8)
|
|
and ah, 0x7F
|
|
test ah, 7 << (12-8)
|
|
jnz .Mod8_3_tilemode_done
|
|
; ++ycoarse
|
|
mov cx, ax
|
|
add cx, 1 << 5
|
|
and cx, 31 << 5
|
|
cmp cx, 30 << 5
|
|
jne .DidntWrapVert
|
|
xor cx, cx ;ycoarse=0
|
|
xor ah, 1 << (11-8) ; Toggle vertical nametable index
|
|
.DidntWrapVert:
|
|
and ax, ~(31 << 5)
|
|
or ax, cx
|
|
.Mod8_3_tilemode_done:
|
|
mov [VADDR_BUF], ax
|
|
;jmp .Mod8_3_done
|
|
.Mod8_3_done:
|
|
mov [P_IOADDR], word 0xAAAA
|
|
P_PATADDR EQU $-2
|
|
jmp .Mod8_break
|
|
|
|
.Mod8_5:
|
|
; Read first byte of tile pattern
|
|
mov ax, [P_IOADDR]
|
|
call PPU_mmap
|
|
mov al, [bx]
|
|
mov [P_TILEPAT], al
|
|
jmp .Mod8_break
|
|
|
|
.Mod8_7:
|
|
; Read second byte of tile pattern
|
|
mov ax, [P_IOADDR]
|
|
or ax, 8
|
|
call PPU_mmap
|
|
mov ah, [bx] ; high byte (now read)
|
|
mov al, [P_TILEPAT] ; low byte (previously read)
|
|
; interleave the bits of the two pattern bytes
|
|
mov bx, ax
|
|
mov cx, ax
|
|
and ax, 0xF00F ; AAAAbbbbccccAAAA FEDCBA9876543210
|
|
and bx, 0x0F00 ; becomes
|
|
and cx, 0x00F0 ; AAAAccccbbbbAAAA FEDC7654BA983210
|
|
shr bx, 4
|
|
shl cx, 4
|
|
or ax, bx
|
|
or ax, cx
|
|
mov bx, ax
|
|
mov cx, ax
|
|
and ax, 0xC3C3 ; AAbbccAAAAbbccAA FEDC7654BA983210
|
|
and bx, 0x3030 ; becomes
|
|
and cx, 0x0C0C ; AAccbbAAAAccbbAA FE76DC54BA329810
|
|
shr bx, 2
|
|
shl cx, 2
|
|
or ax, bx
|
|
or ax, cx
|
|
mov bx, ax
|
|
mov cx, ax
|
|
and ax, 0x9999 ; AbcAAbcAAbcAAbcA FE76DC54BA329810
|
|
and bx, 0x4444 ; becomes
|
|
and cx, 0x2222 ; AcbAAcbAAcbAAcbA F.E.D.C.B.A.9.8.
|
|
shr bx, 1 ; 7 6 5 4 3 2 1 0
|
|
shl cx, 1
|
|
or ax, bx
|
|
or ax, cx
|
|
mov [P_TILEPAT], ax ; save 16-bit tile
|
|
; When decoding sprites, save the sprite graphics and move to next sprite
|
|
or edx, edx
|
|
jnz .Mod8_break
|
|
mov bx, [P_SPRRENPOS]
|
|
cmp bl, [P_SPROUTPOS]
|
|
jae .Mod8_break
|
|
inc bx
|
|
mov [P_SPRRENPOS], bx
|
|
shl bx, 1
|
|
mov [OAM3_pattern + bx-2], ax
|
|
.Mod8_6:
|
|
.Mod8_4:
|
|
.Mod8_break:
|
|
|
|
mov ax, [P_X]
|
|
cmp ax, 64
|
|
jb .DoneRenderTick
|
|
cmp ax, 256
|
|
jae .DoneRenderTick
|
|
; THIS PART USES SIMPLER CODE FROM YOUTUBE VIDEO
|
|
; Rather than the complex one that supports
|
|
; the crazy 9-sprite malfunction.
|
|
|
|
mov bx, [P_OAMADDR]
|
|
test ax, 1
|
|
jz .SpriteAccessOAM
|
|
|
|
inc byte [P_OAMADDR]
|
|
and ebx, 3
|
|
mov al, 0xAA
|
|
P_SPR_DATA EQU $-1
|
|
mov si, word 0x0000
|
|
P_SPROUTPOS EQU $-2
|
|
jmp [.SpriteCases + ebx*2]
|
|
.SpriteAccessOAM:
|
|
mov bh, 0
|
|
mov al, [OAM+bx]
|
|
mov [P_SPR_DATA], al
|
|
jmp .DoneRenderTick
|
|
.SpriteCases:
|
|
dw .SpriteCase0, .SpriteCase1, .SpriteCase2, .SpriteCase3
|
|
.SpriteCase0:
|
|
cmp byte [P_SPRINPOS], 64
|
|
jae .SpriteDone
|
|
inc byte [P_SPRINPOS] ; next sprite
|
|
cmp si, 8
|
|
jae .Already8
|
|
mov [OAM2_y+si], al
|
|
mov ah, [P_OAMADDR]
|
|
mov [OAM2_sprindex+si], ah
|
|
.Already8:
|
|
; if(!(scanline >= y1 && scanline < y2 ))
|
|
; if(scanline < y1 || scanline >= y2 )
|
|
|
|
mov dx, [P_SCANLINE]
|
|
; ax = y1
|
|
mov ah, 0
|
|
cmp dx, ax
|
|
jl .SpriteNotInRange
|
|
; make y2
|
|
mov ah, [P_SYSCTRL]
|
|
and ah, 32 ; 0 or 32
|
|
shr ah, 2 ; 0 or 8
|
|
add ah, 8
|
|
add al, ah
|
|
mov ah, 0
|
|
cmp dx, ax
|
|
jl short .DoneRenderTick ; Sprite in range, will go to next case.
|
|
.SpriteNotInRange:
|
|
add byte [P_OAMADDR], 3
|
|
jmp .SpriteCase3_cont
|
|
.SpriteCase1:
|
|
cmp si, 8
|
|
jae short .DoneRenderTick
|
|
mov [OAM2_index+si], al
|
|
jmp short .DoneRenderTick
|
|
.SpriteCase2:
|
|
cmp si, 8
|
|
jae short .DoneRenderTick
|
|
mov [OAM2_attr+ si], al
|
|
jmp short .DoneRenderTick
|
|
.SpriteCase3:
|
|
cmp si, 8
|
|
jae .SpriteOverflow
|
|
mov [OAM2_x+ si], al
|
|
inc word [P_SPROUTPOS]
|
|
jmp .SpriteCase3_cont
|
|
.SpriteOverflow:
|
|
or byte [P_STATUS], 0x20
|
|
.SpriteCase3_cont:
|
|
cmp byte [P_SPRINPOS], 2
|
|
jne short .DoneRenderTick
|
|
mov byte [P_OAMADDR], 8
|
|
.DoneRenderTick:
|
|
ret
|
|
.SpriteDone:
|
|
mov byte [P_OAMADDR], 0
|
|
ret
|
|
|
|
PPU_RenderPixel:
|
|
|
|
|
|
mov cx, [P_X]
|
|
mov dh, cl
|
|
add dh, 8 ; dh = u8(x+8)
|
|
; xpos = ~((x&7) + (reg.taddr_xfine&7) + ((x&7) ? 8 : 0)) & 15
|
|
and cx, 7 ; x&7
|
|
jz .zero
|
|
or cl, 8 ; x&7 + ((x&7)?8:0)
|
|
.zero: add cx, word 0xAAAA ;0-7 really
|
|
XFINE EQU $-2
|
|
not cx
|
|
and cx, 15
|
|
shl cl, 1 ; cl = xpos*2
|
|
; showbg and showsp:
|
|
mov al, [P_DISPCTRL]
|
|
mov dl, 0
|
|
test al, 8+2 ; No BG/BG8 = deny
|
|
jz .Showbg_false
|
|
test al, 8 ; Yes BG = allow
|
|
jnz .Showbg_true
|
|
cmp dh, 16 ; In edge = deny
|
|
jb .Showbg_false
|
|
.Showbg_true:
|
|
inc dx ; dl&1 = showbg
|
|
.Showbg_false:
|
|
test al, 16+4
|
|
jz .Showsp_false
|
|
test al, 16
|
|
jnz .Showsp_true
|
|
cmp dh, 16
|
|
jb .Showsp_false
|
|
.Showsp_true:
|
|
inc dx
|
|
inc dx ; dl&2 = showsp
|
|
.Showsp_false:
|
|
; Pick a pixel from the shift registers, if BG is allowed
|
|
;
|
|
xor si, si ; si = pixel
|
|
xor di, di ; di = attr
|
|
test dl, 1
|
|
jz .BGdisabled
|
|
mov esi, dword 0xAAAAAAAA
|
|
P_BG_SHIFT_PAT EQU $-4
|
|
shr esi, cl
|
|
and si, 3 ; pixel
|
|
jz .BGchosen ; Keep zero attribute if pixel=0
|
|
mov edi, dword 0xAAAAAAAA
|
|
P_BG_SHIFT_ATTR EQU $-4
|
|
shr edi, cl
|
|
and di, 3 ; attr
|
|
jmp .BGchosen
|
|
.BGdisabled:
|
|
mov ax, [VADDR_BUF]
|
|
push ax
|
|
and ax, 0x3F00
|
|
cmp ax, 0x3F00
|
|
pop ax
|
|
jne .BGchosen
|
|
test byte [P_DISPCTRL], 2+4+8+16
|
|
jnz .BGchosen ; only set bg from palette if BG/BG8/SP/SP8 are all false
|
|
mov si, ax ; pixel
|
|
.BGchosen:
|
|
test dl, 2
|
|
jz .DoneRenderingSprites
|
|
; Overlay the sprites
|
|
xor ebx, ebx
|
|
not bx
|
|
;mov bx, -1
|
|
.OverlaySpritesLoop:
|
|
inc bx
|
|
cmp bl, [P_SPRRENPOS]
|
|
jae .DoneRenderingSprites
|
|
; Check if the sprite is horizontally in range
|
|
mov ax, [P_X]
|
|
mov ch, 0
|
|
mov cl, [OAM3_x + bx]
|
|
sub ax, cx ; xdiff = x - oam3_x[sno]
|
|
cmp ax, 8
|
|
jae .OverlaySpritesLoop ; ax = xdiff
|
|
; Determine which pixel to display; skip transparent pixels
|
|
test [OAM3_attr+bx], byte 0x40
|
|
jnz .NoXflip
|
|
xor al, 7 ; ax = 7-ax
|
|
.NoXflip:
|
|
; spritepixel = (misc.OAM3_pattern[sno] >> (xdiff*2)) & 3
|
|
mov cl, al
|
|
shl cl, 1 ; cl = xdiff*2
|
|
mov ax, [OAM3_pattern + ebx*2]
|
|
shr ax, cl
|
|
and ax, 3 ; ax = spritepixel
|
|
jz .OverlaySpritesLoop ; spritepixel 0 is always transparent
|
|
; Register sprite-0 hit if applicable
|
|
cmp word [P_X], 255 ; x must be < 255
|
|
jae .NoSprite0hit
|
|
test si, si ; background pixel must be non-0
|
|
jz .NoSprite0hit
|
|
cmp byte [OAM3_sprindex + bx], 4 ; sprite index must be 0
|
|
jae .NoSprite0hit
|
|
or byte [P_STATUS], 0x40 ; set sp0hit flag
|
|
.NoSprite0hit:
|
|
; Render the pixel unless behind-background placement wanted
|
|
mov cl, [OAM3_attr + bx]
|
|
test si, si
|
|
jz .DoRenderSpritePixel ; background=0? Render
|
|
test cl, 0x20
|
|
jnz .DoneRenderingSprites ; 0x20 not set? Render -- 0x20 set = don't render
|
|
.DoRenderSpritePixel:
|
|
and cx, 3 ; attribute
|
|
add cx, 4
|
|
mov di, cx ; attr = (s.attr & 3) + 4
|
|
mov si, ax ; pixel = spritepixel
|
|
; Only process the first non-transparent sprite pixel.
|
|
.DoneRenderingSprites:
|
|
; map pixel through palette
|
|
;mov di, 1*4+2
|
|
lea di, [esi + edi*4] ; pixel + attr*4
|
|
and di, 0x1F
|
|
mov al, [PALETTE+di]
|
|
test byte [P_DISPCTRL], 1
|
|
jz .Notgrayscale
|
|
and al, 0x30
|
|
.Notgrayscale:
|
|
; Plot pixel (al=pixel, +use emphasis attributes)
|
|
|
|
mov di, [P_X]
|
|
|
|
NTSC_SYNTHESIS_DISABLE:
|
|
jmp short .DoNTSC ; REPLACED WITH 2*NOP if necessary
|
|
|
|
; NO NTSC SIM: Just store the raw pixel.
|
|
;mov ax, di
|
|
add al,16
|
|
mov [NTSCline + di], al
|
|
.DontGenerate:
|
|
ret
|
|
|
|
.DoNTSC:
|
|
%if 0
|
|
cmp word [P_FRAMECOUNT], 0
|
|
jnz .DontGenerate
|
|
%endif
|
|
; DO NTSC
|
|
and di, 0xFF ; Just to make sure we don't do buffer-overflow.
|
|
jnz .DontMakeBorders
|
|
|
|
; Generate borders while at it.
|
|
; Our NTSCline is 282*8 samples long. This means 26*8 is reserved for edges.
|
|
; We are supposed to render 15 pixels of edge at left, 11 pixels of edge at right.
|
|
push di
|
|
push ax
|
|
mov al, [P_NTSC_PHASE]
|
|
mov bl, al
|
|
cbw
|
|
shl ax, 2
|
|
mov [P_NTSC_PHASE_LINEBEGIN], ax
|
|
|
|
;sub bl, 15*8 ; which is 10*12. No effect.
|
|
|
|
mov di, NTSCline
|
|
.LeftLoop:
|
|
mov al, [PALETTE+0]
|
|
call NTSC_synthesize
|
|
cmp di, NTSCline + 15*8*4
|
|
jb .LeftLoop
|
|
mov di, NTSCline + 15*8*4 + 256*8*4
|
|
|
|
;call NTSC_phase_inc_bl
|
|
add bl, 8 ; 256*8 mod 12
|
|
|
|
.RightLoop:
|
|
mov al, [PALETTE+0]
|
|
call NTSC_synthesize
|
|
cmp di, NTSCline + 282*8*4
|
|
jb .RightLoop
|
|
pop ax
|
|
pop di
|
|
|
|
.DontMakeBorders:
|
|
shl di, 3+2 ; 8 floats per pixel, 4 bytes per float; 32 bytes per pixel
|
|
add di, NTSCline + 15*8*4
|
|
mov bl, [P_NTSC_PHASE]
|
|
jmp NTSC_synthesize ; tail-call
|
|
|
|
NTSC_synthesize_with_offset:
|
|
lea di, [NTSCline + ebx*4]
|
|
;passthru
|
|
NTSC_synthesize:
|
|
; DI = Pointer to NTSCline (Out: incremented by 8*4)
|
|
; BL = NTSC phase (Out: incremented by 8)
|
|
; Uses AX, CX, EDX, BP, SI
|
|
|
|
movzx dx, al ; level
|
|
and ax, 0x0F ; color
|
|
shr dl, 4
|
|
cmp al, 13
|
|
jbe .Not1415
|
|
mov dl, 1 ; For colors 14..15, level 1 is forced.
|
|
.Not1415:
|
|
;add di, 8*4 ; TEMPORARY
|
|
;ret ; TEMPORARY
|
|
|
|
; AX = color
|
|
; DX = level
|
|
lea si, [NTSC_levels + edx*4]
|
|
|
|
; Level has been handled. What remains still is AX = color
|
|
|
|
; right: phase
|
|
; down: color
|
|
; 1 1 1 1 1 1 1 1 1 1 1 1
|
|
; 1 1 1 1 1 0 0 0 0 0 0 1
|
|
; 1 1 1 1 0 0 0 0 0 0 1 1
|
|
; 1 1 1 0 0 0 0 0 0 1 1 1
|
|
; 1 1 0 0 0 0 0 0 1 1 1 1
|
|
; 1 0 0 0 0 0 0 1 1 1 1 1
|
|
; 0 0 0 0 0 0 1 1 1 1 1 1
|
|
; 0 0 0 0 0 1 1 1 1 1 1 0
|
|
; 0 0 0 0 1 1 1 1 1 1 0 0
|
|
; 0 0 0 1 1 1 1 1 1 0 0 0
|
|
; 0 0 1 1 1 1 1 1 0 0 0 0
|
|
; 0 1 1 1 1 1 1 0 0 0 0 0
|
|
; 1 1 1 1 1 1 0 0 0 0 0 0
|
|
; 0 0 0 0 0 0 0 0 0 0 0 0
|
|
; 0 0 0 0 0 0 0 0 0 0 0 0
|
|
; 0 0 0 0 0 0 0 0 0 0 0 0
|
|
xor dx, dx
|
|
cmp ax, 12
|
|
ja .BeginNTSCloop ; For colors 13..15, signal low is forced (000000000000)
|
|
not dx
|
|
test ax, ax
|
|
jz .BeginNTSCloop ; For color 0, signal high is forced (111111111111, from "not dx")
|
|
add al, bl ; NTSC phase, 0..20
|
|
;aam 12 ; modulo 12
|
|
mov cl, al
|
|
.mod12: mov edx, 00111111000000111111000000111111b
|
|
shr edx, cl
|
|
.BeginNTSCloop:
|
|
mov ebp, dword 0
|
|
P_ATTENUATION_MASK EQU $-4
|
|
mov cl, bl
|
|
shr ebp, cl
|
|
mov ecx, ebp
|
|
|
|
cld
|
|
; Using %rep and %endrep costs some ROM space, but it relieves CX as a register.
|
|
%rep 8
|
|
xor bp, bp
|
|
; Determine whether to add 4*4 or not, by judging color & phase
|
|
; 4 * (color <= 12 * ((color+phase)%12 < 6))
|
|
; TODO: Determine whether to add 8*4 or not, by judging
|
|
; the color emphasis bits and the phase.
|
|
|
|
rcr cx, 1
|
|
rcl bp, 1 ; Cf becomes 0x01
|
|
|
|
rcr dx, 1
|
|
rcl bp, 5 ; previous Cf becomes 0x20, Cf becomes 0x10
|
|
|
|
; flag = (0451326 >> (phase/2*3)) & emphasisbits
|
|
mov eax, [bp+si]
|
|
stosd
|
|
%endrep
|
|
|
|
;jmp NTSC_phase_inc_bl
|
|
NTSC_phase_inc_bl:
|
|
add bl, 8
|
|
push ax
|
|
mov ax, bx
|
|
aam 12 ; al = al mod 12
|
|
mov bl, al
|
|
pop ax
|
|
ret
|
|
|
|
|
|
section .const
|
|
NTSC_levels:
|
|
; Prenormalized values.
|
|
; We don't support de-emphasis bits for now.
|
|
; Calculated as:
|
|
; normalized_value = (%1 - 0.518) / (1.962 - 0.518)
|
|
; factored_value = normalized_value * brightness / 12
|
|
; with brightness = 1
|
|
|
|
dd -0.00969529 ;0.350
|
|
dd 0.00000000 ;0.518
|
|
dd 0.02562327 ;0.962
|
|
dd 0.05955679 ;1.550 ; Signal low
|
|
dd 0.03324100 ;1.094
|
|
dd 0.05701754 ;1.506
|
|
dd 0.08333333 ;1.962
|
|
dd 0.08333333 ;1.962 ; Signal high
|
|
; The same, but attenuated by a factor of 0.746 before normalization
|
|
dd -0.01482572
|
|
dd -0.00759303
|
|
dd 0.01152193
|
|
dd 0.03683633
|
|
dd 0.01720476
|
|
dd 0.03494206
|
|
dd 0.05457364
|
|
dd 0.05457364
|
|
|
|
saturation: dd 1.7
|
|
|
|
bayer4x4:
|
|
db 0, 12, 3, 15
|
|
db 8, 4, 11, 7
|
|
db 2, 14, 1, 13
|
|
db 10, 6, 9, 5
|
|
|
|
; YIQ matrix multiplied by (16*5, 16*6 and 16*8)
|
|
;y_r dd 1.0
|
|
i_r dd 0.946882
|
|
q_r dd 0.623357
|
|
;y_g dd 1.0
|
|
i_g dd -0.274788
|
|
q_g dd -0.635691
|
|
;y_b dd 1.0
|
|
i_b dd -1.108545
|
|
q_b dd 1.709007
|
|
|
|
section .text
|
|
|
|
;VESA_Granularity_kB dw 0
|
|
VESA_Granularity_bytes dd 0
|
|
|
|
ShowScanline:
|
|
%if 0
|
|
cmp word [P_FRAMECOUNT], 0
|
|
jz .DoRender
|
|
ret
|
|
.DoRender:
|
|
%endif
|
|
push es
|
|
; MODE-X:
|
|
; Memory position for (x,y) = (y*320+x)/4
|
|
; Into port 3C4h, put xx02h where xx = 1 << (x%4).
|
|
xor cx, cx ; Plane index
|
|
|
|
MODEX_RENDERING_ENABLE:
|
|
jmp short .ModeXrendering
|
|
|
|
.TrueColorRendering:
|
|
xor edi, edi
|
|
xor edx, edx
|
|
les di, [P_SCANLINE]
|
|
|
|
shl edi, 8
|
|
lea edi, [edi + edi*4] ; y = 320*4*scanline = 1280*scanline = 256*scanline + 1024*scanline
|
|
|
|
mov eax, edi
|
|
mov ebp, edi
|
|
div dword [VESA_Granularity_bytes]
|
|
|
|
; eax = bank number, edx = starting address in this bank
|
|
|
|
; Figure out which granularity-page this scanline begins from
|
|
mov di, dx ; Modulo bytes = starting address
|
|
|
|
call .SetVESAbank
|
|
|
|
; Figure out the beginning of the next bank
|
|
inc ax
|
|
mul dword [VESA_Granularity_bytes]
|
|
; eax = beginning of next bank
|
|
|
|
xor si, si
|
|
cld
|
|
|
|
; Figure out if a seam goes in the middle of this scanline
|
|
sub eax, ebp
|
|
sar eax, 2 ; from dwords into pixels
|
|
cmp ax, 320
|
|
jge .ScanlineLoop2
|
|
|
|
mov [.FirstLimit], ax
|
|
|
|
.ScanlineLoop:
|
|
call DoOnePix
|
|
stosd
|
|
cmp si, 320
|
|
.FirstLimit EQU $-2
|
|
jb .ScanlineLoop
|
|
|
|
mov ax, [.LastBank]
|
|
inc ax
|
|
call .SetVESAbank
|
|
xor di, di
|
|
|
|
; continue with new bank
|
|
;jmp .skip
|
|
.ScanlineLoop2:
|
|
call DoOnePix
|
|
stosd
|
|
cmp si, 320
|
|
jb .ScanlineLoop2
|
|
|
|
pop es
|
|
ret
|
|
|
|
.SetVESAbank:
|
|
cmp ax, 0xAAAA
|
|
.LastBank EQU $-2
|
|
je .Done
|
|
mov [.LastBank], ax
|
|
mov dx, ax
|
|
push ax
|
|
mov ax, 0x4F05
|
|
xor bx,bx
|
|
int 10h
|
|
pop ax
|
|
.Done: ret
|
|
|
|
|
|
|
|
.ModeXrendering:
|
|
AllPlanesLoop:
|
|
les di, [P_SCANLINE]
|
|
mov ax, di
|
|
and ax, 3
|
|
shl ax, 2
|
|
add ax, bayer4x4
|
|
mov word [bayer_base], ax
|
|
|
|
shl di, 4
|
|
lea di, [edi + edi*4] ; We'll begin at this address four times.
|
|
add di, byte 0 ; When NTSC is not disabled, add 32-pix margin by offseting the coordinate.
|
|
SCREEN_MARGIN EQU $-1
|
|
|
|
mov ax, 0x0102
|
|
shl ah, cl
|
|
mov dx, 0x3C4
|
|
out dx, ax ; Set plane index
|
|
mov si, cx ; Source pixel index (0..3), will cover to 320
|
|
|
|
; TODO: Calculate 80 pixels (320/4), in groups of 4 (20 loops)
|
|
OnePlaneLoop:
|
|
; Calculate four pixels at once before writing to VGA RAM
|
|
call DoOnePix
|
|
call DoOnePix
|
|
call DoOnePix
|
|
call DoOnePix
|
|
|
|
; Send four pixels
|
|
stosd
|
|
cmp si, 320
|
|
RENDER_WIDTH EQU $-2
|
|
jb OnePlaneLoop
|
|
|
|
inc cx ; Go to next plane
|
|
cmp cl, 3
|
|
jbe AllPlanesLoop
|
|
|
|
pop es
|
|
ret
|
|
|
|
|
|
|
|
DoOnePix:
|
|
;db 0xBB ;mov bx,,..
|
|
jmp short DecodeNTSCpixel
|
|
NTSC_DECODE_DISABLE EQU $-2
|
|
|
|
mov al, [NTSCline + si]
|
|
jmp DecodeNTSC_return
|
|
|
|
DecodeNTSCpixel:
|
|
;mov dx, si
|
|
;shr dx, 2
|
|
;mov al, dl
|
|
;add al, 16
|
|
;;mov al, 4*(6*8) + 5*(8) + 0
|
|
;jmp DecodeNTSC_return
|
|
|
|
; Translate si (0..292) into begin,end (both 0..2047)
|
|
; Center = si*2048/292+4
|
|
; Begin = Center-6
|
|
; End = Center+6
|
|
|
|
; Begin = si*2048/292-2
|
|
; End = si*2048/292+10
|
|
|
|
mov bx, si ; bx+si = si*2
|
|
mov bx, word [xbegins +bx+si] ; Begin
|
|
|
|
push eax ; backup eax (the four pixels)
|
|
|
|
; bp = sintable + 4*(bx % 12)
|
|
; bx is already pre-multiplied by 4.
|
|
lea ax, [bx + 24*4 + 4*4]
|
|
add ax, word 0x1111
|
|
P_NTSC_PHASE_LINEBEGIN EQU $-2
|
|
cwd
|
|
mov bp, 12*4
|
|
div bp ; modulo 12 (times 4)
|
|
add bx, NTSCline
|
|
lea bp, [sincos + edx]
|
|
|
|
lea dx, [bx + 12*4] ; End
|
|
cmp bx, NTSCline
|
|
jge .NotZero
|
|
mov bx, NTSCline
|
|
.NotZero:
|
|
|
|
; y=i=q=0
|
|
; while(bx < dx)
|
|
; {
|
|
; value = signal[bx] * factor
|
|
; y += value
|
|
; value *= saturation
|
|
; i += value * cos((pi/6) * (phase+bx))
|
|
; q += value * sin((pi/6) * (phase+bx))
|
|
; }
|
|
|
|
call NTSCdecodeIntoYUV
|
|
MODEX_DECODE_ENABLE:
|
|
jmp short .MakePalettedVersion
|
|
nop
|
|
nop
|
|
|
|
.MakeTrueColorVersion:
|
|
pop ebx
|
|
call NTSCdecodeMakeR
|
|
call .TrueColorHelper
|
|
|
|
call NTSCdecodeMakeG
|
|
call .TrueColorHelper
|
|
|
|
call NTSCdecodeMakeB
|
|
call .TrueColorHelper
|
|
|
|
xchg ebx, eax
|
|
inc si ; Jump 1 pixel ahead
|
|
ret
|
|
|
|
.TrueColorHelper:
|
|
call FloatToPositiveIntWithClamp
|
|
dd 255.49
|
|
dw 255
|
|
shl ebx, 8
|
|
mov bl, al
|
|
ret
|
|
|
|
.MakePalettedVersion:
|
|
xor ax, ax
|
|
|
|
call NTSCdecodeMakeR
|
|
call NTSCdecodeMakeLinear
|
|
dd 64.0 ; 16*4
|
|
|
|
mov bx, 4
|
|
|
|
call YIQcalc
|
|
|
|
call NTSCdecodeMakeG
|
|
call NTSCdecodeMakeLinear
|
|
dd 112.0 ; 16*7
|
|
|
|
mov bx, 7
|
|
mul bx
|
|
|
|
;mov bx, ax
|
|
;sal ax, 3
|
|
;sub ax, bx
|
|
;mov bx, 7
|
|
|
|
;shl ax, 8
|
|
;aad 7
|
|
|
|
call YIQcalc
|
|
|
|
call NTSCdecodeMakeB
|
|
call NTSCdecodeMakeLinear
|
|
dd 144.0 ; 16*9
|
|
|
|
mov bx, 9
|
|
mul bx
|
|
|
|
;mov bx, ax
|
|
;sal ax, 3
|
|
;add ax, bx
|
|
;mov bx, 9
|
|
|
|
;shl ax, 8
|
|
;aad 9
|
|
|
|
call YIQcalc
|
|
|
|
.Bypass:
|
|
;mov ax, si;4*(6*8) + 5*(8) + 0 ;test, should make a yellow pixel
|
|
add al, 4
|
|
xchg bx, ax
|
|
;lea bx, [eax+4]
|
|
pop eax
|
|
mov al, bl
|
|
|
|
DecodeNTSC_return:
|
|
add si, 4 ; jump 4 pixels ahead
|
|
ror eax, 8
|
|
ret
|
|
|
|
NTSCdecodeIntoYUV:
|
|
fld dword [saturation]
|
|
|
|
fldz ;i
|
|
fldz ;q
|
|
fldz ;y
|
|
jmp .L2
|
|
.L3:
|
|
fld dword [bx]
|
|
add bx, 4 ; next sample from scanline
|
|
|
|
fadd to st1 ;y(st1) += value
|
|
|
|
fmul st4 ;value(st0) *= saturation
|
|
|
|
fld dword [bp + 12] ; cos(x) = sin(x+3) when unit is pi/6
|
|
|
|
fmul st1 ;st0 = cos()*value
|
|
|
|
faddp st3 ;i += st0
|
|
|
|
fmul dword [bp] ;value*=sin
|
|
add bp, 4 ; next cell in sincos table
|
|
faddp st3 ;q += st0
|
|
.L2:
|
|
cmp bx, dx
|
|
jb .L3
|
|
fstp st3 ; forget the dummy saturation value
|
|
fxch st1
|
|
fxch st2
|
|
ret
|
|
|
|
NTSCdecodeMakeR:
|
|
fld dword [i_r]
|
|
fmul st2
|
|
fadd st1
|
|
fld dword [q_r]
|
|
jmp fmul_st4_faddp_st1_return
|
|
NTSCdecodeMakeG:
|
|
fld dword [i_g]
|
|
fmul st2
|
|
fadd st1
|
|
fld dword [q_g]
|
|
fmul_st4_faddp_st1_return:
|
|
fmul st4
|
|
jmp faddp_st1_return
|
|
NTSCdecodeMakeB:
|
|
fxch st1
|
|
fmul dword [i_b]
|
|
faddp st1
|
|
fxch st1
|
|
fmul dword [q_b]
|
|
faddp_st1_return:
|
|
faddp st1
|
|
ret
|
|
NTSCdecodeMakeLinear:
|
|
pop bp
|
|
; Convert gamma-corrected RGB into linear RGB
|
|
; For simplicity, we assume gamma of 2.0. It's close enough.
|
|
ftst
|
|
xchg bx,ax
|
|
fnstsw ax
|
|
test ah, 69
|
|
xchg bx,ax
|
|
je .notzero
|
|
fstp st0 ; Replace value with zero
|
|
fldz
|
|
add bp, 4
|
|
jmp bp
|
|
.notzero:
|
|
NTSC_DECODE_POWER2:
|
|
fmul st0 ; x^2
|
|
fmul dword [bp]
|
|
add bp, 4
|
|
jmp bp
|
|
|
|
|
|
FloatToPositiveIntWithClamp:
|
|
pop bp
|
|
ftst
|
|
fnstsw ax
|
|
test ah, 69
|
|
jne .zero
|
|
fmul dword [bp]
|
|
fistp word [.temp]
|
|
fwait
|
|
mov ax, 0
|
|
.temp EQU $-2
|
|
cmp ax, [bp+4]
|
|
jbe short .truedone
|
|
mov ax, [bp+4]
|
|
jmp short .truedone
|
|
.zero: fstp st0
|
|
xor ax, ax
|
|
.truedone:
|
|
add bp,6
|
|
jmp bp
|
|
|
|
|
|
YIQcalc:
|
|
fistp dword [YIQ_temp]
|
|
xor edx, edx
|
|
mov bp, si
|
|
and bp, 3
|
|
mov dl, [bayer4x4 + bp]
|
|
bayer_base EQU $-2
|
|
;and dl, 0x08
|
|
; Our video is not RGB, and our palette is not
|
|
; the NES palette. We use dithering to compensate.
|
|
fwait
|
|
add edx, dword 0xAAAAAAAA
|
|
YIQ_temp equ $-4
|
|
sar dx, 4 ; Divide by 16
|
|
cmp dx, bx
|
|
jb .ok
|
|
lea dx, [bx-1]
|
|
.ok:
|
|
add ax, dx
|
|
;.ZeroPix:
|
|
ret
|
|
|