Delay code

From NESdev Wiki
Revision as of 12:30, 26 March 2016 by Bisqwit (talk | contribs) (→‎33..65568 cycles of delay: Add alternatives that do not depend on delay_a_25_clocks)
Jump to navigationJump to search

Delay code

Functions that cause a parametrised number of cycles of delay.

Note that all branch instructions are written assuming that no page wrap occurs. If you want to ensure this condition at compile time, use the bccnw/beqnw/etc. macros that are listed at Fixed dycle delay.

25..280 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A clocks + overhead
; Preserved: X, Y
; Time: A+25 clocks (including JSR)
;;;;;;;;;;;;;;;;;;;;;;;;
:       sbc #7          ; carry set by CMP
delay_a_25_clocks:
	cmp #7
	bcs :-          ; do multiples of 7
	lsr a           ; bit 0
	bcs :+
                       ; A=clocks/2, either 0,1,2,3
	beq @zero       ; 0: 5
	lsr a
	beq :+          ; 1: 7
	bcc :+          ; 2: 9
@zero:  bne :+          ; 3: 11
:       rts             ; (thanks to dclxvi for the algorithm)

33..65568 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+33 clocks (including JSR)
; Clobbers A. Preserves X,Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
	pha			; 3 cycles
	 lda #(256-25-10-2-4)   ; +2
	 jsr delay_a_25_clocks
	pla                     ; 4 cycles
delay_256a_x_33_clocks:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +27 = 33
        ; 15 + JSR + RTS overhead for the code below. JSR=6, RTS=6. 15+12=27
        ;          ;    Cycles        Accumulator     Carry flag
        ;          ; 0  1  2  3  4       (hex)        0 1 2 3 4
        sec        ; 0  0  0  0  0   00 01 02 03 04   1 1 1 1 1
:       sbc #5     ; 2  2  2  2  2   FB FC FD FE FF   0 0 0 0 0
        bcs :-     ; 4  4  4  4  4   FB FC FD FE FF   0 0 0 0 0
        lsr a      ; 6  6  6  6  6   7D 7E 7E 7F 7F   1 0 1 0 1
        bcc :+     ; 8  8  8  8  8   7D 7E 7E 7F 7F   1 0 1 0 1
:       sbc #$7E   ;10 11 10 11 10   FF FF 00 00 01   0 0 1 1 1
        bcc :+     ;12 13 12 13 12   FF FF 00 00 01   0 0 1 1 1
        beq :+     ;      14 15 14         00 00 01       1 1 1
        bne :+     ;            16               01           1
:       rts        ;15 16 17 18 19   (thanks to dclxvi for the algorithm)


;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+33 clocks (including JSR)
; Clobbers A,Y. Preserves X. No relocations.
; Does not depend on delay_a_25_clocks.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
        ldy #48  ;\
        dey      ; |- Clobbers Y; 246 cycles, 253 total
        bpl *-1  ;/
        ldy $A4  ;              ; 3 cycles, 256 total
delay_256a_x_33_clocks_b:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +27 = 33
        ; 15 + JSR + RTS overhead for the code below. JSR=6, RTS=6. 15+12=27
        ;          ;    Cycles        Accumulator     Carry flag
        ;          ; 0  1  2  3  4       (hex)        0 1 2 3 4
        sec        ; 0  0  0  0  0   00 01 02 03 04   1 1 1 1 1
:       sbc #5     ; 2  2  2  2  2   FB FC FD FE FF   0 0 0 0 0
        bcs :-     ; 4  4  4  4  4   FB FC FD FE FF   0 0 0 0 0
        lsr a      ; 6  6  6  6  6   7D 7E 7E 7F 7F   1 0 1 0 1
        bcc :+     ; 8  8  8  8  8   7D 7E 7E 7F 7F   1 0 1 0 1
:       sbc #$7E   ;10 11 10 11 10   FF FF 00 00 01   0 0 1 1 1
        bcc :+     ;12 13 12 13 12   FF FF 00 00 01   0 0 1 1 1
        beq :+     ;      14 15 14         00 00 01       1 1 1
        bne :+     ;            16               01           1
:       rts        ;15 16 17 18 19   (thanks to dclxvi for the algorithm)
;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+33 clocks (including JSR)
; Clobbers A. Preserves X,Y. No relocations.
; Does not depend on delay_a_25_clocks.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
        pha       ;\
         txa      ; |
         ldx #46  ; |
         dex      ; |-          ; 247 cycles, 254 total
         bpl *-1  ; |
         tax      ; |
        pla       ;/
        nop                     ; 2 cycles; 256 cycles total
delay_256a_x_33_clocks_c:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +27 = 33
        ; 15 + JSR + RTS overhead for the code below. JSR=6, RTS=6. 15+12=27
        ;          ;    Cycles        Accumulator     Carry flag
        ;          ; 0  1  2  3  4       (hex)        0 1 2 3 4
        sec        ; 0  0  0  0  0   00 01 02 03 04   1 1 1 1 1
:       sbc #5     ; 2  2  2  2  2   FB FC FD FE FF   0 0 0 0 0
        bcs :-     ; 4  4  4  4  4   FB FC FD FE FF   0 0 0 0 0
        lsr a      ; 6  6  6  6  6   7D 7E 7E 7F 7F   1 0 1 0 1
        bcc :+     ; 8  8  8  8  8   7D 7E 7E 7F 7F   1 0 1 0 1
:       sbc #$7E   ;10 11 10 11 10   FF FF 00 00 01   0 0 1 1 1
        bcc :+     ;12 13 12 13 12   FF FF 00 00 01   0 0 1 1 1
        beq :+     ;      14 15 14         00 00 01       1 1 1
        bne :+     ;            16               01           1
:       rts        ;15 16 17 18 19   (thanks to dclxvi for the algorithm)

30..65565 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays X:A clocks+overhead
; Time: 256*X+A+30 clocks (including JSR)
; Clobbers A,X. Preserves Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
delay_256x_a_30_clocks:
	cpx #0			; +2
	beq delay_a_25_clocks	; +3  (25+5 = 30 cycles overhead)
	; do 256 cycles.        ;  4 cycles so far. Loop is 1+1+ 2+3+ 1+3 = 11 bytes.
	dex                     ;  2 cycles
	pha                     ;  3 cycles
	 lda #(256-25-9-2-7)    ; +2
	 jsr delay_a_25_clocks
	pla                        ; 4
	jmp delay_256x_a_30_clocks ; 3.

16..65296 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A*256 clocks + overhead
; Preserved: X, Y
; Time: A*256+16 clocks (including JSR)
;;;;;;;;;;;;;;;;;;;;;;;;
delay_256a_16_clocks:
	cmp #0
	bne :+
	rts
delay_256a_11_clocks_:
:       pha
	 lda #256-19-22
	 jsr delay_a_25_clocks
	pla
	clc
	adc #-1&$FF
	bne :-
	rts

31..65566 cycles of delay

;;;;;;;;;;;;;;;;;;;;;;;;
; Delays A:X clocks+overhead
; Time: 256*A+X+31 clocks (including JSR)
; Clobbers A. Preserves X,Y. Has relocations.
;;;;;;;;;;;;;;;;;;;;;;;;
:	; do 256 cycles.	; 5 cycles done so far. Loop is 2+1+ 2+3+ 1 = 9 bytes.
	sbc #1			; 2 cycles - Carry was set from cmp
	pha			; 3 cycles
	 lda #(256-25-10-2-4)   ; +2
	 jsr delay_a_25_clocks
	pla                     ; 4 cycles
delay_256a_x_31_clocks:
	cmp #1			; +2; 2 cycles overhead
	bcs :-			; +2; 4 cycles overhead
	; 0-255 cycles remain, overhead = 4
	txa 			; +2; 6; +25 = 31
	;passthru
<<Place the function delay_a_25_clocks immediately following here>>

See also