PROWARE technologies
PROWARE technologies

wcstok in x86 Assembly

The procedure wcstok_asm considers string to consist of a sequence of text tokens separated by one or more delimiting characters. Subsequent calls will work through the input string until no tokens remain. The input string will be modified. When no tokens remain, a NULL pointer is returned. This procedure is safe in multithreaded applications so long as string is not global or static.

Parameters:

wchar_t **string
wchar_t string to tokenize, this is a pointer to a pointer
 
wchar_t *delimiters
characters to use as delimiters

Returns a pointer to the next token in string. Returns NULL when no more tokens remain. string is modified.

TITLE 'extern "C" wchar_t *wcstok_asm(wchar_t **string, const wchar_t *delimiters);'
.686

.model FLAT

PUBLIC	_wcstok_asm

_WCSTOK	SEGMENT
_wcstok_asm PROC NEAR

	mov  ecx, DWORD PTR [esp+4] ; string
	mov  eax, DWORD PTR [ecx]
	cmp  WORD PTR [eax], 0
	push ebx
	push esi
	push edi
	mov  edi, DWORD PTR [esp+20] ; delimiters
	je   SHORT label4

; skip over leading delimiters

label1:
	xor  eax, eax
	mov  ax, WORD PTR [edi]
	test ax, ax
	mov  edx, edi
	je   SHORT label3
	mov  esi, DWORD PTR [ecx]
	mov  si, WORD PTR [esi]
label2:
	cmp  ax, si
	je  SHORT label3
	mov  ax, WORD PTR [edx+2]
	add  edx, 2
	test ax, ax
	jne  SHORT label2
label3:
	cmp  WORD PTR [edx], 0
	je   SHORT label4

	mov  esi, DWORD PTR [ecx]
	add  esi, 2
	mov  eax, esi
	mov  DWORD PTR [ecx], esi
	cmp  WORD PTR [eax], 0
	jne  SHORT label1



label4:

	mov  ebx, DWORD PTR [ecx]


; find the end of the token and if it is not the end of the string then terminate with NULL character

	cmp  WORD PTR [ebx], 0
	je   SHORT label9
label5:
	xor  eax, eax
	mov  ax, WORD PTR [edi]
	test ax, ax
	mov  edx, edi
	je   SHORT label7
	mov  esi, DWORD PTR [ecx]
	mov  si, WORD PTR [esi]
label6:
	cmp  ax, si
	je   SHORT label7
	mov  ax, WORD PTR [edx+2]
	add  edx, 2
	test ax, ax
	jne  SHORT label6
label7:
	cmp  WORD PTR [edx], 0
	jne  SHORT label8
	mov  esi, DWORD PTR [ecx]
	add  esi, 2
	mov  eax, esi
	mov  DWORD PTR [ecx], esi
	cmp  WORD PTR [eax], 0
	jne  SHORT label5


; if a token has been found

	xor  eax, eax
	cmp  ebx, esi
	sete al ; SET if Equal
	pop  edi
	pop  esi
	dec  eax
	and  eax, ebx
	pop  ebx

	ret  0
	
label8:

	mov  edx, DWORD PTR [ecx]
	mov  WORD PTR [edx], 0 ; terminate string with NULL character
	add  DWORD PTR [ecx], 2 ; increment pointer past the NULL character

label9:

; determine if token has been found

	mov  esi, DWORD PTR [ecx]
	xor  eax, eax
	cmp  ebx, esi
	sete al ; SET if Equal
	pop  edi
	pop  esi
	dec  eax
	and  eax, ebx
	pop  ebx

	ret  0

_wcstok_asm ENDP
_WCSTOK	ENDS
END

Example usage: parse tab delimited data

#include <stdio.h>

extern "C" wchar_t *wcstok_asm(wchar_t **string, const wchar_t *delimiters);

int main()
{
	wchar_t s[100] = L"812\t7022\t1477478\t9038\r\nabc\txyc\tlmn\tedf\r\n", *row, *sp;

	wprintf(L"parsing:\r\n%s\r\n", s);
	sp = s;
	while(row = wcstok_asm(&sp, L"\r\n")) {
		while(wchar_t *tok = wcstok_asm(&row, L"\t"))
			wprintf(L"%s\r\n", tok);
		wprintf(L"\r\n");
	}
	// s is modified after using wcstok_asm
	return 0;
}

Another example would be to use this function in a programming language parser.