strtok in x86 Assembly

The procedure strtok_asm considers string to consist of a sequence of text tokens separated by one or more delimiting characters. Subsequent calls will work through the input string until no tokens remain. The input string will be modified. When no tokens remain, a NULL pointer is returned. This procedure is safe in multithreaded applications so long as string is not global or static.

Parameters:

char **string
string to tokenize, this is a pointer to a pointer
 
char *delimiters
characters to use as delimiters

Returns a pointer to the next token in string. Returns NULL (0) when no more tokens remain. string is modified.

TITLE 'extern "C" char *strtok_asm(char **string, const char *delimiters);'
.686

.model FLAT

PUBLIC  _strtok_asm

_STRTOK  SEGMENT
_strtok_asm PROC NEAR

	mov  ecx, DWORD PTR [esp+4] ; string
	mov  eax, DWORD PTR [ecx]
	cmp  BYTE PTR [eax], 0
	push ebx
	push esi
	push edi
	mov  edi, DWORD PTR [esp+20] ; delimiters
	je   SHORT label4

label1:

	mov  al, BYTE PTR [edi]
	test al, al
	mov  esi, edi
	je   SHORT label3
	mov  edx, DWORD PTR [ecx]
	mov  dl, BYTE PTR [edx]
label2:
	cmp  al, dl
	je   SHORT label3
	mov  al, BYTE PTR [esi+1]
	inc  esi
	test al, al
	jne  SHORT label2

label3:

	cmp  BYTE PTR [esi], 0
	je   SHORT label4

	mov  esi, DWORD PTR [ecx]
	inc  esi
	mov  eax, esi
	mov  DWORD PTR [ecx], esi
	cmp  BYTE PTR [eax], 0
	jne  SHORT label1

label4:

	mov  ebx, DWORD PTR [ecx]

	cmp  BYTE PTR [ebx], 0
	je   SHORT label9

label5:

	mov  al, BYTE PTR [edi]
	test al, al
	mov  esi, edi
	je   SHORT label7
	mov  edx, DWORD PTR [ecx]
	mov  dl, BYTE PTR [edx]
label6:
	cmp  al, dl
	je   SHORT label7
	mov  al, BYTE PTR [esi+1]
	inc  esi
	test al, al
	jne  SHORT label6

label7:

	cmp  BYTE PTR [esi], 0
	jne  SHORT label8
	mov  esi, DWORD PTR [ecx]
	inc  esi
	mov  eax, esi
	mov  DWORD PTR [ecx], esi
	cmp  BYTE PTR [eax], 0
	jne  SHORT label5

	xor  eax, eax
	cmp  ebx, esi
	sete al
	pop  edi
	pop  esi
	dec  eax
	and  eax, ebx
	pop  ebx

	ret  0

label8:

	mov  eax, DWORD PTR [ecx]
	mov  BYTE PTR [eax], 0

	inc  DWORD PTR [ecx]

label9:

	mov  esi, DWORD PTR [ecx]
	xor  eax, eax
	cmp  ebx, esi
	sete al
	pop  edi
	pop  esi
	dec  eax
	and  eax, ebx
	pop  ebx

	ret  0
  
_strtok_asm ENDP
_STRTOK  ENDS
END

Example usage: parse tab delimited data

#include <stdio.h>

extern "C" char *strtok_asm(char **string, const char *delimiters);

int main()
{
	char s[100] = "812\t7022\t1477478\t9038\r\nabc\txyc\tlmn\tedf\r\n", *row, *sp;

	printf("parsing:\r\n%s\r\n", s);
	sp = s;
	while(row = strtok_asm(&sp, "\r\n")) {
		while(char *tok = strtok_asm(&row, "\t"))
			printf("%s\r\n", tok);
		printf("\r\n");
	}
	// s is modified after using strtok_asm
	return 0;
}

Another example would be to use this function in a programming language parser.