Delay code

From NESdev Wiki
Revision as of 12:15, 26 March 2016 by Bisqwit (talk | contribs) (Notes on page wrapping in branches)
Jump to navigationJump to search

Delay code

Functions that cause a parametrised number of cycles of delay.

Note that all branch instructions are written assuming that no page wrap occurs. If you want to ensure this condition at compile time, use the bccnw/beqnw/etc. macros that are listed at Fixed dycle delay.

25..280 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A clocks + overhead
; Preserved: X, Y
; Time: A+25 clocks (including JSR)
;;;;;;;;;;;;;;;;;;;;;;;;
:       sbc #7          ; carry set by CMP
delay_a_25_clocks:
	cmp #7
	bcs :-          ; do multiples of 7
	lsr a           ; bit 0
	bcs :+
                       ; A=clocks/2, either 0,1,2,3
	beq @zero       ; 0: 5
	lsr a
	beq :+          ; 1: 7
	bcc :+          ; 2: 9
@zero:  bne :+          ; 3: 11
:       rts             ; (thanks to dclxvi for the algorithm)

33..65568 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+33 clocks (including JSR)
; Clobbers A. Preserves X,Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
	pha			; 3 cycles
	 lda #(256-25-10-2-4)   ; +2
	 jsr delay_a_25_clocks
	pla                     ; 4 cycles
delay_256a_x_33_clocks:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +27 = 33
        ; 15 + JSR + RTS overhead for the code below. JSR=6, RTS=6. 15+12=27
        ;          ;    Cycles        Accumulator     Carry flag
        ;          ; 0  1  2  3  4       (hex)        0 1 2 3 4
        sec        ; 0  0  0  0  0   00 01 02 03 04   1 1 1 1 1
:       sbc #5     ; 2  2  2  2  2   FB FC FD FE FF   0 0 0 0 0
        bcs :-     ; 4  4  4  4  4   FB FC FD FE FF   0 0 0 0 0
        lsr a      ; 6  6  6  6  6   7D 7E 7E 7F 7F   1 0 1 0 1
        bcc :+     ; 8  8  8  8  8   7D 7E 7E 7F 7F   1 0 1 0 1
:       sbc #$7E   ;10 11 10 11 10   FF FF 00 00 01   0 0 1 1 1
        bcc :+     ;12 13 12 13 12   FF FF 00 00 01   0 0 1 1 1
        beq :+     ;      14 15 14         00 00 01       1 1 1
        bne :+     ;            16               01           1
:       rts        ;15 16 17 18 19   (thanks to dclxvi for the algorithm)

30..65565 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays X:A clocks+overhead
; Time: 256*X+A+30 clocks (including JSR)
; Clobbers A,X. Preserves Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
delay_256x_a_30_clocks:
	cpx #0			; +2
	beq delay_a_25_clocks	; +3  (25+5 = 30 cycles overhead)
	; do 256 cycles.        ;  4 cycles so far. Loop is 1+1+ 2+3+ 1+3 = 11 bytes.
	dex                     ;  2 cycles
	pha                     ;  3 cycles
	 lda #(256-25-9-2-7)    ; +2
	 jsr delay_a_25_clocks
	pla                        ; 4
	jmp delay_256x_a_30_clocks ; 3.

16..65296 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A*256 clocks + overhead
; Preserved: X, Y
; Time: A*256+16 clocks (including JSR)
;;;;;;;;;;;;;;;;;;;;;;;;
delay_256a_16_clocks:
	cmp #0
	bne :+
	rts
delay_256a_11_clocks_:
:       pha
	 lda #256-19-22
	 jsr delay_a_25_clocks
	pla
	clc
	adc #-1&$FF
	bne :-
	rts

31..65566 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+31 clocks (including JSR)
; Clobbers A. Preserves X,Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
	pha			; 3 cycles
	 lda #(256-25-10-2-4)   ; +2
	 jsr delay_a_25_clocks
	pla                     ; 4 cycles
delay_256a_x_31_clocks:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +25 = 31
	;passthru
<<Place the function delay_a_25_clocks immediately following here>>

See also