###############################################################################
# XML parser                                                                  #
###############################################################################
#   Author: Ben Vanik - ben@vanik.net                                         #
#     Date: 2004-01-08                                                        #
# See-also:                                                                   #
###############################################################################
# Simple XML parser and display engine                                        #
# Set sz_xml_source to your input                                             #
###############################################################################

###############################################################################
# Data                                                                        #
###############################################################################
.data

# Character constants
sz_newline:			.asciiz		"\n"
sz_ws:				.asciiz		" "
sz_lt:				.asciiz		"<"
sz_gt:				.asciiz		">"
sz_eq:				.asciiz		"="
sz_qt:				.asciiz		"\""
sz_fs:				.asciiz		"/"
sz_tab:				.asciiz		"\t"

# XML to parse
sz_xml_source:		.asciiz		"<xml><test id=\"0\"><entry name=\"foo\" type=\"bar\" /></test></xml>"

# String table
sz_msg_done:		.asciiz		"\n\nXML parsing complete\n"
sz_msg_badxml:		.asciiz		"\n\nBad XML format, parsing aborted\n"
sz_msg_memory:		.asciiz		"\n\nMemory error - not enough?\n"
sz_msg_unknown:		.asciiz		"\n\nUnknown error, parsing aborted\n"
sz_a:				.asciiz		"abc"

# Variable table
v_depth:			.byte		0				# Indent depth
v_pointer:			.word		0				# Pointer to current location in string
v_error:			.byte		0				# Error message
												# 0 = ok
												# 1 = bad xml format
												# 2 = memory error
												# 3+ = unknown
v_buffer:			.space		255				# String buffer

###############################################################################
# Program code                                                                #
###############################################################################
.text
__start:

	la		$a0, sz_a
	la		$a1, v_buffer
	jal		p_StringCopy
	
	# Begin parse
	la		$t0, sz_xml_source			# Get source address
	sw		$t0, v_pointer				# Save current pointer in XML string
	move	$a0, $t0					# Arg1: Address in string
	jal		r_ProcessElement			# Start parse on root element
	move	$s0, $v0					# Save error code
	
	# Ending message
	beq		$s0, $0, startdone
	ori		$t0, $0, 1
	beq		$s0, $t0, startbadxml
	ori		$t0, $0, 2
	beq		$s0, $t0, startmemory
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_msg_unknown			# Unknown error message
	syscall
startbadxml:
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_msg_badxml			# Bad XML
	syscall
	j		startexit
startmemory:
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_msg_memory			# Memory
	syscall
	j		startexit
startdone:
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_msg_done			# Done - good!
	syscall
	j		startexit
	
	# Exit
startexit:
	ori		$v0, $0, 10
	syscall
	
#------------------------------------------------------------------------------
# rec: Process element and its children
#------------------------------------------------------------------------------
#     in: $a0 = address in source string
#    out: $v0 = error return code
# unsafe: 
#------------------------------------------------------------------------------
r_ProcessElement:

	# Stack grow
	subu	$sp, $sp, 64				# Grow stack
	sw		$ra, 20($sp)				# Save return address
	sw		$fp, 16($sp)				# Save frame pointer
	addiu	$fp, $sp, 60				# Set frame pointer
	
	# Setup
	move	$v0, $0						# Zero error
	move	$t0, $a0					# $t0 is now the pointer in the string
	
	# Do check (first char must be '<')
	lb		$t1, ($t0)					# Get character at location
	lb		$t2, sz_lt					# '<'
	bne		$t1, $t2, r_pe_badxml		# If not '<', then error
	addi	$t0, $t0, 1					# Move to next char
	
	# Read element name (until first ws, /, or >)
	la		$t1, v_buffer				# Get buffer address
r_pe_readelm:
	lb		$t2, ($t0)					# Get character at location
	beq		$t2, $0, r_pe_badxml		# Check for null terminator
	lb		$t3, sz_ws					# ' '
	beq		$t2, $t3, r_pe_afterelm		# If ' ', done with element name
	lb		$t3, sz_gt					# '>'
	beq		$t2, $t3, r_pe_afterelm		# If '>', done with element name
	lb		$t3, sz_fs					# '/'
	beq		$t2, $t3, r_pe_afterelm		# If '/', done with element name
	sb		$t2, ($t1)					# Save char to buffer
	addi	$t1, $t1, 1					# Move to next char (in buffer)
	addi	$t0, $t0, 1					# Move to next char (in source)
	j		r_pe_readelm				# Loop back
	
r_pe_afterelm:
	sw		$t0, v_pointer				# Save $t0 (position in string)
	sb		$0, ($t1)					# Add terminating 0 to string buffer
	lb		$a0, v_depth				# Arg1: Indent depth
	jal		p_Indent					# Draw indent whitespace
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_gt					# '>'
	syscall								# Print '>'
	ori		$v0, $0, 4					# print_string
	la		$a0, v_buffer				# String buffer
	syscall								# Print string buffer
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_newline				# Linefeed
	syscall								# Print newline
	lb		$t4, v_depth				# Get indent depth
	addi	$t4, $t4, 1					# +1
	sb		$t4, v_depth				# Save indent depth
	
	# Save element name in our stack
	la		$a0, v_buffer
	addi	$t0, $sp, 24
	move	$a1, $t0
	jal		p_StringCopy
	
	# Read attributes (until / or >)
	lw		$a0, v_pointer				# Arg1: Source string address (offset)
	jal		p_ReadAttributes			# Read all attributes
	ori		$t1, $0, 1					# Bad XML error
	beq		$v0, $t1, r_pe_badxml		# Check return to see if there was bad XML
	move	$t0, $v1					# Update string position
	sw		$t0, v_pointer				# Save pointer
	
	# Do children if needed
	lw		$t0, v_pointer				# Get pointer
	lb		$t1, ($t0)					# Get char at location
	lb		$t2, sz_fs					# '/'
	beq		$t1, $t2, r_pe_nochildren	# If no children (when '/'), return
	jal		r_pe_doneelm				# End our run (skip past '>')
	
	# test
	addi	$t0, $sp, 24
	ori		$v0, $0, 4
	move	$a0, $t0
	syscall
	
	j		r_pe_done
	
r_pe_nochildren:
	lw		$ra, r_pe_done				# Set the return to done
	jal		r_pe_donelem				# Finish up
	
r_pe_doneelm:
	lw		$t0, v_pointer				# Get pointer
	addi	$t0, $t0, 1					# Inc (skip '>')
	sw		$t0, v_pointer				# Save pointer
	jr		$ra
	
r_pe_badxml:
	ori		$v0, $0, 1					# Set error code for bad xml
	j		r_pe_done
r_pe_memory:
	ori		$v0, $0, 2					# Set error code for memory
	j		r_pe_done
r_pe_done:
	# Stack shrink / return
	lw		$ra, 20($sp)				# Restore return address
	#lw		$fp, 16($fp)				# Restore frame pointer
	addiu	$sp, $sp, 64				# Shrink stack
	jr		$ra
	
#------------------------------------------------------------------------------
# Read attribute listing
#------------------------------------------------------------------------------
#     in: $a0 = source string (offset to read address)
#    out: $v0 = error, $v1 = new position in source string
# unsafe: 
#------------------------------------------------------------------------------
p_ReadAttributes:

	# Stack grow
	subu	$sp, $sp, 32				# Grow stack
	sw		$ra, 20($sp)				# Save return address
	sw		$fp, 16($sp)				# Save frame pointer
	addiu	$fp, $sp, 28				# Set frame pointer
	
	# Setup
	move	$v0, $0						# Zero error code
	move	$t0, $a0					# $t0 is now the pointer in the string

p_ra_readblock:
	
	# Check	
	lb		$t1, ($t0)					# Get character at location
	beq		$t1, $0, p_ra_badxml		# Check for null terminator
	lb		$t2, sz_ws					# ' '
	bne		$t1, $t2, p_ra_afterblock	# If next char not ' ' then no more attributes
	
	# Read name
	la		$t1, v_buffer				# Get buffer address
p_ra_readname:
	lb		$t2, ($t0)					# Get character at location
	beq		$t2, $0, p_ra_badxml		# Check for null terminator
	lb		$t3, sz_eq					# '='
	beq		$t2, $t3, p_ra_aftername	# If '=', done with attrib name
	lb		$t3, sz_fs					# '/'
	beq		$t2, $t3, p_ra_afterblock	# If next char is '/' then not an attribute!
	sb		$t2, ($t1)					# Save char to buffer
	addi	$t1, $t1, 1					# Move to next char (in buffer)
	addi	$t0, $t0, 1					# Move to next char (in source)
	j		p_ra_readname				# Loop back

p_ra_aftername:
	sb		$0, ($t1)					# Add terminating 0 to string buffer
	move	$s0, $t0					# Save source pointer
	lb		$a0, v_depth				# Arg1: Indent depth
	jal		p_Indent					# Draw indent whitespace
	ori		$v0, $0, 4					# print_string
	la		$a0, v_buffer				# String buffer
	syscall								# Print string buffer
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_tab					# Tab character
	syscall								# Print tab
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_eq					# '='
	syscall								# Print '='
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_ws					# ' '
	syscall								# Print ' '

	# Read value
	move	$t0, $s0					# Restore source pointer
	la		$t1, v_buffer				# Get buffer address
	addi	$t0, $t0, 2					# Skip '="'
p_ra_readvalue:
	lb		$t2, ($t0)					# Get character at location
	beq		$t2, $0, p_ra_badxml		# Check for null terminator
	lb		$t3, sz_qt					# '"'
	beq		$t2, $t3, p_ra_aftervalue	# If '"', done with attrib value
	sb		$t2, ($t1)					# Save char to buffer
	addi	$t1, $t1, 1					# Move to next char (in buffer)
	addi	$t0, $t0, 1					# Move to next char (in source)
	j		p_ra_readvalue				# Loop back

p_ra_aftervalue:
	sb		$0, ($t1)					# Add terminating 0 to string buffer
	ori		$v0, $0, 4					# print_string
	la		$a0, v_buffer				# String buffer
	syscall								# Print string buffer
	ori		$v0, $0, 4					# print_string
	la		$a0, sz_newline				# Linefeed
	syscall								# Print newline
	
	addi	$t0, $t0, 1					# Skip the '"'
	j		p_ra_readblock				# Loop back to next attribute
	
p_ra_afterblock:
	move	$v1, $t0					# Save new position in string source
	j		p_ra_done					# Done!

p_ra_badxml:
	ori		$v0, $0, 1					# Set error code for bad xml
	j		p_ra_done
p_ra_done:
	# Stack shrink / return
	lw		$ra, 20($sp)				# Restore return address
	#lw		$fp, 16($fp)				# Restore frame pointer
	addiu	$sp, $sp, 32				# Shrink stack
	jr		$ra							# Return
	
#------------------------------------------------------------------------------
# Indent by the given number of characters
#------------------------------------------------------------------------------
#     in: $a0 = number of characters to indent
#    out: -
# unsafe: $a0
#------------------------------------------------------------------------------
p_Indent:
	move	$t0, $a0					# Setup counter
	ori		$t1, $0, 1					# Counter dec amount
	move	$t9, $a0					# Save a0
	beq		$t0, $0, p_indent_2			# If no chars remain, go right to end
p_indent_1:
	la		$a0, sz_ws					# Load space character
	ori		$v0, $0, 4					# print_string
	syscall
	sub		$t0, $t0, $t1				# Decrement counter
	bgt		$t0, $0, p_indent_1			# As long as counter > 0, loop
p_indent_2:
	move	$t9, $a0					# Restore a0
	jr		$ra							# Return
	
#------------------------------------------------------------------------------
# Copy source string to dest buffer
#------------------------------------------------------------------------------
#     in: $a0 = source string, $a1 = dest buffer
#    out: -
# unsafe: 
#------------------------------------------------------------------------------
p_StringCopy:
	move	$t0, $a0					# Source address
	move	$t1, $a1					# Dest address
p_sc_loop:
	lb		$t2, ($t0)					# Read from source
	sb		$t2, ($t1)					# Save to dest
	beqz	$t2, p_sc_done				# If null terminator, done!
	addi	$t0, $t0, 1					# +1 in source
	addi	$t1, $t1, 1					# +1 in dest
	j		p_sc_loop					# Loop back
	
p_sc_done:
	jr		$ra							# Return

###############################################################################
# EOF                                                                         #
###############################################################################
