scummvm/backends/graphics/atari/atari-c2p-asm.S
Miro Kropacek b39fa830e9 BACKENDS: ATARI: Optimize 4bpp C2P routine
The AND'ing is not needed, as the pixels are guaranteed to be in range
0 - 15.
2024-11-13 21:41:01 +01:00

962 lines
21 KiB
ArmAsm

/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY| without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "../../platform/atari/symbols.h"
| C2P by Mikael Kalms (public domain)
| See https://github.com/Kalmalyzer/kalms-c2p
.globl SYM(asm_c2p1x1_8)
.globl SYM(asm_c2p1x1_8_tt)
.globl SYM(asm_c2p1x1_8_rect)
.globl SYM(asm_c2p1x1_4)
.globl SYM(asm_c2p1x1_4_rect)
.text
| void asm_c2p1x1_8(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen);
SYM(asm_c2p1x1_8):
#ifdef __FASTCALL__
| a0: chunky
move.l a1,d0 | chunky end
move.l 4(sp),a1 | screen
#else
move.l (4,sp),a0 | chunky
move.l (8,sp),d0 | chunky end
move.l (12,sp),a1 | screen
#endif
movem.l d2-d7/a2-a6,-(sp)
move.l d0,a2
move.l #0x0f0f0f0f,d4
move.l #0x00ff00ff,d5
move.l #0x55555555,d6
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
bra.s c2p1x1_8_start
c2p1x1_8_pix16:
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
move.l a3,(a1)+
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
move.l a4,(a1)+
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
move.l a5,(a1)+
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
move.l a6,(a1)+
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
c2p1x1_8_start:
move.l d2,d7
lsr.l #1,d7
eor.l d0,d7
and.l d6,d7
eor.l d7,d0
add.l d7,d7
eor.l d7,d2
move.l d3,d7
lsr.l #1,d7
eor.l d1,d7
and.l d6,d7
eor.l d7,d1
add.l d7,d7
eor.l d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
| a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.w d2,d7
move.w d0,d2
swap d2
move.w d2,d0
move.w d7,d2
move.w d3,d7
move.w d1,d3
swap d3
move.w d3,d1
move.w d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
| c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.l d2,d7
lsr.l #2,d7
eor.l d0,d7
and.l #0x33333333,d7
eor.l d7,d0
lsl.l #2,d7
eor.l d7,d2
move.l d3,d7
lsr.l #2,d7
eor.l d1,d7
and.l #0x33333333,d7
eor.l d7,d1
lsl.l #2,d7
eor.l d7,d3
| a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
| a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
| a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
| a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
swap d0
swap d1
swap d2
swap d3
move.l d0,a6
move.l d2,a5
move.l d1,a4
move.l d3,a3
cmp.l a0,a2
bne c2p1x1_8_pix16
move.l a3,(a1)+
move.l a4,(a1)+
move.l a5,(a1)+
move.l a6,(a1)+
movem.l (sp)+,d2-d7/a2-a6
rts
| void asm_c2p1x1_8_tt(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen, uint32 screenPitch);
SYM(asm_c2p1x1_8_tt):
movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
#ifdef __FASTCALL__
| a0: chunky
move.l a1,a2 | a2: chunky end
move.l (11*4+4,sp),a1 | a1: screen
| d0.l: screen pitch (double width)
#else
move.l (11*4+4,sp),a0 | a0: chunky
move.l (11*4+8,sp),a2 | a2: chunky end
move.l (11*4+12,sp),a1 | a1: screen
move.l (11*4+16,sp),d0 | d0.l: screen pitch (double width)
#endif
move.l sp,old_sp
move.l d0,screen_pitch
lsr.l #1,d0
lea (a1,d0.l),a7 | a7: end of first dst line
move.l d0,screen_offset
move.l #0x0f0f0f0f,d4
move.l #0x00ff00ff,d5
move.l #0x55555555,d6
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
bra.s c2p1x1_8_tt_start
c2p1x1_8_tt_pix16:
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
move.l a3,(a1)+
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
move.l a4,(a1)+
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
move.l a5,(a1)+
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
move.l a6,(a1)+
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
cmp.l a1,a7 | end of dst line?
bne.s c2p1x1_8_tt_start
add.l (screen_offset,pc),a1
add.l (screen_pitch,pc),a7
c2p1x1_8_tt_start:
move.l d2,d7
lsr.l #1,d7
eor.l d0,d7
and.l d6,d7
eor.l d7,d0
add.l d7,d7
eor.l d7,d2
move.l d3,d7
lsr.l #1,d7
eor.l d1,d7
and.l d6,d7
eor.l d7,d1
add.l d7,d7
eor.l d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
| a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.w d2,d7
move.w d0,d2
swap d2
move.w d2,d0
move.w d7,d2
move.w d3,d7
move.w d1,d3
swap d3
move.w d3,d1
move.w d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
| c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.l d2,d7
lsr.l #2,d7
eor.l d0,d7
and.l #0x33333333,d7
eor.l d7,d0
lsl.l #2,d7
eor.l d7,d2
move.l d3,d7
lsr.l #2,d7
eor.l d1,d7
and.l #0x33333333,d7
eor.l d7,d1
lsl.l #2,d7
eor.l d7,d3
| a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
| a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
| a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
| a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
swap d0
swap d1
swap d2
swap d3
move.l d0,a6
move.l d2,a5
move.l d1,a4
move.l d3,a3
cmp.l a0,a2
bne c2p1x1_8_tt_pix16
move.l a3,(a1)+
move.l a4,(a1)+
move.l a5,(a1)+
move.l a6,(a1)+
move.l old_sp,sp
movem.l (sp)+,d2-d7/a2-a6
rts
| void asm_c2p1x1_8_rect(const byte *pChunky, const byte *pChunkyEnd, uint32 chunkyWidth, uint32 chunkyPitch, byte *pScreen, uint32 screenPitch);
SYM(asm_c2p1x1_8_rect):
movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
#ifdef __FASTCALL__
| a0: chunky
move.l a1,chunky_end
| d0.l: chunky width
move.l (11*4+4,sp),a1 | a1: screen
exg d1,d2 | d2.l: chunky pitch
| d1.l: screen pitch
#else
move.l (11*4+4,sp),a0 | a0: chunky
move.l (11*4+8,sp),chunky_end
move.l (11*4+12,sp),d0 | d0.l: chunky width
move.l (11*4+16,sp),d2 | d2.l: chunky pitch
move.l (11*4+20,sp),a1 | a1: screen
move.l (11*4+24,sp),d1 | d1.l: screen pitch
#endif
move.l sp,old_sp
lea (a0,d0.l),a2 | a2: end of first src line
lea (a1,d0.l),a7 | a7: end of first dst line
move.l d1,screen_pitch
sub.l d0,d1
move.l d1,screen_offset
move.l d2,chunky_pitch
sub.l d0,d2
move.l d2,chunky_offset
move.l #0x0f0f0f0f,d4
move.l #0x00ff00ff,d5
move.l #0x55555555,d6
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
bra.s c2p1x1_8_rect_start
c2p1x1_8_rect_pix16:
move.l (a0)+,d0
move.l (a0)+,d1
move.l (a0)+,d2
move.l (a0)+,d3
| a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
| e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
| i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
| m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
move.l d1,d7
lsr.l #4,d7
move.l a3,(a1)+
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #4,d7
eor.l d7,d1
move.l d3,d7
lsr.l #4,d7
eor.l d2,d7
and.l d4,d7
eor.l d7,d2
move.l a4,(a1)+
lsl.l #4,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d2,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
move.l a5,(a1)+
lsl.l #8,d7
eor.l d7,d2
move.l d3,d7
lsr.l #8,d7
eor.l d1,d7
and.l d5,d7
eor.l d7,d1
move.l a6,(a1)+
lsl.l #8,d7
eor.l d7,d3
| a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
cmp.l a1,a7 | end of dst line?
bne.s c2p1x1_8_rect_start
add.l (screen_offset,pc),a1
add.l (screen_pitch,pc),a7
c2p1x1_8_rect_start:
move.l d2,d7
lsr.l #1,d7
eor.l d0,d7
and.l d6,d7
eor.l d7,d0
add.l d7,d7
eor.l d7,d2
move.l d3,d7
lsr.l #1,d7
eor.l d1,d7
and.l d6,d7
eor.l d7,d1
add.l d7,d7
eor.l d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
| a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.w d2,d7
move.w d0,d2
swap d2
move.w d2,d0
move.w d7,d2
move.w d3,d7
move.w d1,d3
swap d3
move.w d3,d1
move.w d7,d3
| a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
| c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
| c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.l d2,d7
lsr.l #2,d7
eor.l d0,d7
and.l #0x33333333,d7
eor.l d7,d0
lsl.l #2,d7
eor.l d7,d2
move.l d3,d7
lsr.l #2,d7
eor.l d1,d7
and.l #0x33333333,d7
eor.l d7,d1
lsl.l #2,d7
eor.l d7,d3
| a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
| a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
| a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
| a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
swap d0
swap d1
swap d2
swap d3
move.l d0,a6
move.l d2,a5
move.l d1,a4
move.l d3,a3
cmp.l a0,a2 | end of src line?
bne c2p1x1_8_rect_pix16
cmp.l (chunky_end,pc),a2
beq.s c2p1x1_8_rect_done
add.l (chunky_offset,pc),a0
add.l (chunky_pitch,pc),a2
bra c2p1x1_8_rect_pix16
c2p1x1_8_rect_done:
move.l a3,(a1)+
move.l a4,(a1)+
move.l a5,(a1)+
move.l a6,(a1)+
move.l old_sp,sp
movem.l (sp)+,d2-d7/a2-a6
rts
| void asm_c2p1x1_4(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen);
SYM(asm_c2p1x1_4):
#ifdef __FASTCALL__
| a0: chunky
move.l a1,d0 | chunky end
move.l 4(sp),a1 | screen
#else
move.l (4,sp),a0 | chunky
move.l (8,sp),d0 | chunky end
move.l (12,sp),a1 | screen
#endif
movem.l d2-d7/a2-a6,-(sp)
move.l d0,a2
move.l #0x33333333,d4
move.l #0x00ff00ff,d5
move.l #0x55555555,d6
move.l (a0)+,d0
move.l (a0)+,d2
move.l (a0)+,d1
move.l (a0)+,d3
lsl.l #4,d0
lsl.l #4,d1
or.l d2,d0
or.l d3,d1
bra.s c2p1x1_4_start
c2p1x1_4_pix16:
move.l (a0)+,d0
move.l (a0)+,d2
move.l (a0)+,d1
move.l (a0)+,d3
lsl.l #4,d0
lsl.l #4,d1
move.l a5,(a1)+
or.l d2,d0
or.l d3,d1
move.l a6,(a1)+
c2p1x1_4_start:
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d1,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
lsl.l #8,d7
eor.l d7,d1
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
move.l d1,d7
lsr.l #1,d7
eor.l d0,d7
and.l d6,d7
eor.l d7,d0
add.l d7,d7
eor.l d7,d1
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
| a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.w d1,d7
move.w d0,d1
swap d1
move.w d1,d0
move.w d7,d1
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
| c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.l d1,d7
lsr.l #2,d7
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #2,d7
eor.l d7,d1
| a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
| a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
swap d0
swap d1
move.l d1,a5
move.l d0,a6
cmp.l a0,a2
bne.s c2p1x1_4_pix16
move.l a5,(a1)+
move.l a6,(a1)+
movem.l (sp)+,d2-d7/a2-a6
rts
| void asm_c2p1x1_4_rect(const byte *pChunky, const byte *pChunkyEnd, uint32 chunkyWidth, uint32 chunkyPitch, byte *pScreen, uint32 screenPitch);
SYM(asm_c2p1x1_4_rect):
movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
#ifdef __FASTCALL__
| a0: chunky
move.l a1,chunky_end
| d0.l: chunky width
move.l (11*4+4,sp),a1 | a1: screen
exg d1,d2 | d2.l: chunky pitch
| d1.l: screen pitch
#else
move.l (11*4+4,sp),a0 | a0: chunky
move.l (11*4+8,sp),chunky_end
move.l (11*4+12,sp),d0 | d0.l: chunky width
move.l (11*4+16,sp),d2 | d2.l: chunky pitch
move.l (11*4+20,sp),a1 | a1: screen
move.l (11*4+24,sp),d1 | d1.l: screen pitch
#endif
move.l sp,old_sp
move.l d0,d3 | d3.l: screen width
lsr.l #1,d3 |
lea (a0,d0.l),a2 | a2: end of first src line
lea (a1,d3.l),a7 | a7: end of first dst line
move.l d1,screen_pitch
sub.l d3,d1
move.l d1,screen_offset
move.l d2,chunky_pitch
sub.l d0,d2
move.l d2,chunky_offset
move.l #0x33333333,d4
move.l #0x00ff00ff,d5
move.l #0x55555555,d6
move.l (a0)+,d0
move.l (a0)+,d2
move.l (a0)+,d1
move.l (a0)+,d3
lsl.l #4,d0
lsl.l #4,d1
or.l d2,d0
or.l d3,d1
bra.s c2p1x1_4_rect_start
c2p1x1_4_rect_pix16:
move.l (a0)+,d0
move.l (a0)+,d2
move.l (a0)+,d1
move.l (a0)+,d3
lsl.l #4,d0
lsl.l #4,d1
move.l a5,(a1)+
or.l d2,d0
or.l d3,d1
move.l a6,(a1)+
cmp.l a1,a7 | end of dst line?
bne.s c2p1x1_4_rect_start
add.l (screen_offset,pc),a1
add.l (screen_pitch,pc),a7
c2p1x1_4_rect_start:
| a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
| i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
move.l d1,d7
lsr.l #8,d7
eor.l d0,d7
and.l d5,d7
eor.l d7,d0
lsl.l #8,d7
eor.l d7,d1
| a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
| b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
move.l d1,d7
lsr.l #1,d7
eor.l d0,d7
and.l d6,d7
eor.l d7,d0
add.l d7,d7
eor.l d7,d1
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
| a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.w d1,d7
move.w d0,d1
swap d1
move.w d1,d0
move.w d7,d1
| a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
| c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
move.l d1,d7
lsr.l #2,d7
eor.l d0,d7
and.l d4,d7
eor.l d7,d0
lsl.l #2,d7
eor.l d7,d1
| a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
| a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
swap d0
swap d1
move.l d1,a5
move.l d0,a6
cmp.l a0,a2 | end of src line?
bne.s c2p1x1_4_rect_pix16
cmp.l (chunky_end,pc),a2
beq.s c2p1x1_4_rect_done
add.l (chunky_offset,pc),a0
add.l (chunky_pitch,pc),a2
bra.s c2p1x1_4_rect_pix16
c2p1x1_4_rect_done:
move.l a5,(a1)+
move.l a6,(a1)+
move.l old_sp,sp
movem.l (sp)+,d2-d7/a2-a6
rts
| place it within reach of 32K (PC relative)
screen_pitch:
ds.l 1
screen_offset:
ds.l 1
chunky_pitch:
ds.l 1
chunky_offset:
ds.l 1
chunky_end:
ds.l 1
.bss
.even
old_sp: ds.l 1