;; Here is a list of all possible "fast" code-threading
;; (the kind of code that's worth being programmed in assembly)
;; (for other kind of code, you'd better use a portable "C" implementation !)

ideal
model tiny
p386
codeseg
push 1234
push 0
ret


segment zap para use32 'code'

; value:		; x, y, z
;			  |  |  |
;			  |  |  `- clock cycles on a 386 (optimistic approx.)
;			  |  `---- clock cycles on a 486 (optimistic approx.)
;			  `------- size in bytes (exact evaluation)


; ABSOLUTE CALL
mov eax,555555		; 5, 1, 2
call eax		; 2, 5, 7+m

mov ebx,555555		; 5, 1, 2
call ebx		; 2, 5, 7+m

; ABSOLUTE JUMP
mov eax,555555		; 5, 1, 2
jmp eax			; 2, 5, 7+m

push 555555		; 5, 1, 2
ret			; 1, 5, 10+m



; IMPLEMENTATIONS OF 'NEXT'

;BYTECODE, CODE ARRAY
;--------------------
movzx eax,[byte esi]	; 3, 3, 3  ;\
inc esi			; 1, 1, 2  ;-`-  was lodsb; movzx eax,al ;(4,8,8)
shl eax,5 ; or 6	; 3, 2, 3
jmp eax			; 2, 5, 7+m
;TOTAL:			; 9,11,15+m


;BYTECODE, INDIRECT THREADED
;---------------------------
INSTRTBL EQU DWORD PTR 666666

movzx ebx,[byte esi]	; 3, 3, 6
inc esi			; 1, 1, 2  was lodsb; movzx eax,al ;(4,8)
jmp [INSTRTBL+ebx*4]	; 7, 5,11+m
;TOTAL:			;10, 9,18+m
; or alternatively
xor ebx,ebx		; 2, 1, 2
mov bl,[esi]		; 2, 1, 4
inc esi			; 1, 1, 2 ; instead of lodsb
jmp [INSTRTBL+ebx*4]	; 2, 5,10+m
; TOTAL:		 11, 8,18+m

;; usage du "ret" pour le threading
;; -> pas de ESP pour la pile !
;; -> dbogage ais,  condition de patcher les instructions de contrle
;; qui agiraient sur le pointeur de
;;
;_SP en ESI ->
;_IP en ESP -> ret
;_RP en EDI ->
;_IP en mode dbug dans EBP -> push $ ; add EBP, 4; jmp [EBP]
;_

;donnes relatives  la tche dans GS: ?


;; accs aux donnes en _SP=ESP
; PUSH
push eax		; 1, 1, 2
push 0DEADBEEFh		; 5, 1, 2
; POP
pop eax			; 1, 4, 5
ou
add esp,4		; 3, 1, 2
; MOV
mov [esp],eax		; 4, 1, 2
mov eax,[esp]		; 4, 1, 4
mov [esp+4],eax		; 5, 1, 2
mov eax,[esp+4]		; 5, 1, 4
mov [esp+4*ebx-4],eax	; 5, 2, 3
mov eax,[esp+4*ebx-4]	; 5, 2, 5

;; accs aux donnes en _SP=ESI
; PUSH
sub esi,4;mov [esi],eax	; 5, 2, 4
idem, immed		; 9, 2, 4
; POP
lodsd			; 1, 5, 5		(suppose cld)
sub esi,4;mov eax,[esi] ; 5, 2, 6
sub esi,4		; 3, 1, 1
; MOV
mov [esi],eax		; 3, 1, 2
mov eax,[esi]		; 3, 1, 4
mov [esi+4],eax		; 4, 1, 2
mov eax,[esi+4]		; 4, 1, 4
mov [esi+4*ebx-4],eax	; 5, 2, 3
mov eax,[esi+4*ebx-4]	; 5, 2, 5

;INDIRECT-THREADED
;-----------------
lodsd			; 1, 5, 5
jmp [dword eax]		; 2, 5,10+m
; TOTAL			  3,10,15+m
; alternatively
mov ebx,[esi]		; 2, 1, 4
add esi,4		; 3, 1, 2
jmp [ebx]		; 2, 5,10+m
; TOTAL			  7, 7,16+m

;DIRECT-THREADED, SP as index
;----------------------------
ret			; 1, 5,10+m
;TOTAL:			  1, 5,10+m

;DIRECT-THREADED, SI as index
;----------------------------
lodsd			; 1, 5, 5	; Beware: requires cld
jmp eax			; 2, 5, 7+m
;TOTAL:			  3,10,12+m
;alternatively:
;...
add esi,4		; 3, 1, 2
jmp [dword esi]		; 2, 5,10+m
;TOTAL:			  5, 6,12+m ; Beware: ++esi instead of esi++
;alternatively:
push [dword esi]	; 2, 4, 5
add esi,4		; 3, 1, 2
ret			; 1, 5,10+m
;TOTAL:			; 6,10,17+m

;DIRECT-THREADED, SI as index, 64KB asm code segment
;---------------------------------------------------
lodsw			; 2, 5, 5
jmp ax			; 3, 5, 7+m
;TOTAL:			  5,10,12+m
;alternatively:
add esi,2		; 3, 1, 2	; or inc esi; inc esi 2, 2
jmp [word esi]		; 3, 5,10+m	; beware: ++esi instead of esi++
;TOTAL:			  6, 6,12+m

push [word esi]		; 3, 4, 5
add esi,2		; 3, 1, 2	; or inc esi;inc esi (2,2)
ret			; 1, 5,10+m
;TOTAL:			; 7,10,17+m 	; or 6,11

ends

end

if 0

Summary
THREADING	notes

endif