.extern fread, fwrite .text ######################### # int readbyte(FILE *f) # ######################### .globl readbyte readbyte: pushl %ebp # movl %esp,%ebp # subl $4,%esp movl $0,-4(%ebp) # initialize with 0 leal -4(%ebp),%eax # put address in eax pushl 8(%ebp) # FILE *stream pushl $1 # int nmemb pushl $1 # int size pushl %eax # void *ptr call fread cmp $0,%eax # if return is 0 je rb_e # jump to error movl -4(%ebp),%eax # return eax jmp rb_x # exit rb_e: movl $-1,%eax # return -1 jmp rb_x # exit rb_x: movl %ebp,%esp # popl %ebp # ret # ############################## # long readutf8char(FILE *f) # ############################## .globl readutf8char readutf8char: pushl %ebp # movl %esp,%ebp # # Read the first byte into eax pushl 8(%ebp) # FILE *f call readbyte # If the character is less than 0xC0, # meaning it's just one byte, just return it. # This also covers the case where it's -1. cmp $0xC0,%eax # if >= 0xC0, jge ru8_main # do the whole thing jmp ru8_x # else return the one byte ru8_main: subl $16,%esp # allocate 4 ints # -4(%ebp): unicode character # -8(%ebp): current byte # -12(%ebp): mask # -16($ebp): number of bits left movl $0,-4(%ebp) # zero out the unicode character movl %eax,-8(%ebp) # store the current byte cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes jge ru8_4bytes cmp $0xE0,%eax # >= 0xE0 means 3 bytes jge ru8_3bytes jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled) ru8_4bytes: movl $18,-16(%ebp) # 18 bits to read for 4 bytes movl $0b00000111,-12(%ebp) # only read the lowest 3 bits jmp ru8_loop ru8_3bytes: movl $12,-16(%ebp) # 12 bits to read for 3 bytes movl $0b00001111,-12(%ebp) # only read the lowest 4 bits jmp ru8_loop ru8_2bytes: movl $6,-16(%ebp) # 6 bits to read for 2 bytes movl $0b00011111,-12(%ebp) # only read the lowest 5 bits jmp ru8_loop ru8_loop: movl -8(%ebp),%eax # using eax as a temporary arithmetic register andl -12(%ebp),%eax # AND with the mask movl -16(%ebp),%ecx # move number of bits left to ecx for shll shll %cl,%eax # shift ecx bits to the left orl %eax,-4(%ebp) # OR the unicode character with eax cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's je ru8_done # no more bytes to read movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask subl $6,-16(%ebp) # we read 6 bits at a time # read the next byte into -8(%ebp) pushl 8(%ebp) # f call readbyte movl %eax,-8(%ebp) # move byte to memory jmp ru8_loop # since we haven't exited yet, just loop ru8_done: movl -4(%ebp),%eax # move the unicode character to eax # then exit ru8_x: movl %ebp,%esp # popl %ebp # ret # ############################################ # void writebyte(FILE *f, unsigned char b) # ############################################ .globl writebyte writebyte: pushl %ebp # movl %esp,%ebp # leal 12(%esp),%eax # put address in eax pushl 8(%esp) # FILE *stream pushl $1 # nmemb pushl $1 # size pushl %eax # void *ptr call fwrite movl %ebp,%esp # popl %ebp # ret # ################################################ # void writeutf8char(FILE *f, unsigned long u) # ################################################ .globl writeutf8char writeutf8char: pushl %ebp # movl %esp,%ebp # movl 12(%ebp),%ecx # put u in ecx cmp $0x0080,%ecx # if the number doesn't fit in one byte, jge wu8_main # skip the early return version pushl %ecx # unsigned char u pushl 8(%ebp) # FILE *f call writebyte jmp wu8_x # exit # wu8_main is responsible for jumping to the correct section depending on # the size of the unicode character. wu8_main: subl $12,%esp # stack allocate 3 ints cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes jge wu8_4bytes cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes jge wu8_3bytes jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes # we already dealt with those which need 1 byte # This section uses 3 stack allocated ints. # They're here mainly for this expression: # %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp)) wu8_4bytes: movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write movl $0b11110000,-4(%ebp) # the first 4 bits should be 1 movl $0b00000111,-12(%ebp) # only the last 3 bits is data jmp wu8_loop wu8_3bytes: movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write movl $0b11100000,-4(%ebp) # the first 3 bits should be 1 movl $0b00001111,-12(%ebp) # only the last 4 bits is data jmp wu8_loop wu8_2bytes: movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write movl $0b11000000,-4(%ebp) # the first 2 bits should be 1 movl $0b00011111,-12(%ebp) # only the last 3 bits is data jmp wu8_loop wu8_loop: movl 12(%ebp),%eax # using eax as a temporary arithmetic register movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl shrl %cl,%eax # shift ecx (or rather cl) bits to the right andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits orl -4(%ebp),%eax # OR with ecx to set the leftmost bit pushl %eax # unsigned char u pushl 8(%ebp) # FILE *f call writebyte # call addl $8,%esp # restore esp cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's je wu8_x # no bytes left to write movl $63,-12(%ebp) # subsequent iterations will AND with 00111111 movl $128,-4(%ebp) # subsequent iterations will OR with 10000000 subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time jmp wu8_loop # since we haven't exited yet, just loop wu8_x: movl %ebp,%esp # popl %ebp # ret #