|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- .extern fread, fwrite
-
- .text
-
- #########################
- # int readbyte(FILE *f) #
- #########################
- .globl readbyte
- readbyte:
- pushl %ebp #
- movl %esp,%ebp #
-
- subl $4,%esp
- movl $0,-4(%ebp) # initialize with 0
- leal -4(%ebp),%eax # put address in eax
- pushl 8(%ebp) # FILE *stream
- pushl $1 # int nmemb
- pushl $1 # int size
- pushl %eax # void *ptr
- call fread
-
- cmp $0,%eax # if return is 0
- je rb_e # jump to error
- movl -4(%ebp),%eax # return eax
- jmp rb_x # exit
-
- rb_e: movl $-1,%eax # return -1
- jmp rb_x # exit
-
- rb_x: movl %ebp,%esp #
- popl %ebp #
- ret #
-
- ##############################
- # long readutf8char(FILE *f) #
- ##############################
- .globl readutf8char
- readutf8char:
- pushl %ebp #
- movl %esp,%ebp #
-
- # Read the first byte into eax
- pushl 8(%ebp) # FILE *f
- call readbyte
-
- # If the character is less than 0xC0,
- # meaning it's just one byte, just return it.
- # This also covers the case where it's -1.
-
- cmp $0xC0,%eax # if >= 0xC0,
- jge ru8_main # do the whole thing
- jmp ru8_x # else return the one byte
-
- ru8_main:
- subl $16,%esp # allocate 4 ints
- # -4(%ebp): unicode character
- # -8(%ebp): current byte
- # -12(%ebp): mask
- # -16($ebp): number of bits left
-
- movl $0,-4(%ebp) # zero out the unicode character
- movl %eax,-8(%ebp) # store the current byte
-
- cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes
- jge ru8_4bytes
- cmp $0xE0,%eax # >= 0xE0 means 3 bytes
- jge ru8_3bytes
- jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled)
-
- ru8_4bytes:
- movl $18,-16(%ebp) # 18 bits to read for 4 bytes
- movl $0b00000111,-12(%ebp) # only read the lowest 3 bits
- jmp ru8_loop
-
- ru8_3bytes:
- movl $12,-16(%ebp) # 12 bits to read for 3 bytes
- movl $0b00001111,-12(%ebp) # only read the lowest 4 bits
- jmp ru8_loop
-
- ru8_2bytes:
- movl $6,-16(%ebp) # 6 bits to read for 2 bytes
- movl $0b00011111,-12(%ebp) # only read the lowest 5 bits
- jmp ru8_loop
-
- ru8_loop:
- movl -8(%ebp),%eax # using eax as a temporary arithmetic register
- andl -12(%ebp),%eax # AND with the mask
- movl -16(%ebp),%ecx # move number of bits left to ecx for shll
- shll %cl,%eax # shift ecx bits to the left
- orl %eax,-4(%ebp) # OR the unicode character with eax
-
- cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's
- je ru8_done # no more bytes to read
-
- movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask
- subl $6,-16(%ebp) # we read 6 bits at a time
-
- # read the next byte into -8(%ebp)
- pushl 8(%ebp) # f
- call readbyte
- movl %eax,-8(%ebp) # move byte to memory
-
- jmp ru8_loop # since we haven't exited yet, just loop
-
- ru8_done:
- movl -4(%ebp),%eax # move the unicode character to eax
- # then exit
-
- ru8_x: movl %ebp,%esp #
- popl %ebp #
- ret #
-
- ############################################
- # void writebyte(FILE *f, unsigned char b) #
- ############################################
- .globl writebyte
- writebyte:
- pushl %ebp #
- movl %esp,%ebp #
-
- leal 12(%esp),%eax # put address in eax
- pushl 8(%esp) # FILE *stream
- pushl $1 # nmemb
- pushl $1 # size
- pushl %eax # void *ptr
- call fwrite
-
- movl %ebp,%esp #
- popl %ebp #
- ret #
-
- ################################################
- # void writeutf8char(FILE *f, unsigned long u) #
- ################################################
- .globl writeutf8char
- writeutf8char:
- pushl %ebp #
- movl %esp,%ebp #
-
- movl 12(%ebp),%ecx # put u in ecx
-
- cmp $0x0080,%ecx # if the number doesn't fit in one byte,
- jge wu8_main # skip the early return version
-
- pushl %ecx # unsigned char u
- pushl 8(%ebp) # FILE *f
- call writebyte
- jmp wu8_x # exit
-
- # wu8_main is responsible for jumping to the correct section depending on
- # the size of the unicode character.
- wu8_main:
- subl $12,%esp # stack allocate 3 ints
-
- cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes
- jge wu8_4bytes
- cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes
- jge wu8_3bytes
- jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes
- # we already dealt with those which need 1 byte
-
- # This section uses 3 stack allocated ints.
- # They're here mainly for this expression:
- # %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp))
-
- wu8_4bytes:
- movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write
- movl $0b11110000,-4(%ebp) # the first 4 bits should be 1
- movl $0b00000111,-12(%ebp) # only the last 3 bits is data
- jmp wu8_loop
-
- wu8_3bytes:
- movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write
- movl $0b11100000,-4(%ebp) # the first 3 bits should be 1
- movl $0b00001111,-12(%ebp) # only the last 4 bits is data
- jmp wu8_loop
-
- wu8_2bytes:
- movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write
- movl $0b11000000,-4(%ebp) # the first 2 bits should be 1
- movl $0b00011111,-12(%ebp) # only the last 3 bits is data
- jmp wu8_loop
-
- wu8_loop:
- movl 12(%ebp),%eax # using eax as a temporary arithmetic register
- movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl
- shrl %cl,%eax # shift ecx (or rather cl) bits to the right
- andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits
- orl -4(%ebp),%eax # OR with ecx to set the leftmost bit
-
- pushl %eax # unsigned char u
- pushl 8(%ebp) # FILE *f
- call writebyte # call
- addl $8,%esp # restore esp
-
- cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's
- je wu8_x # no bytes left to write
-
- movl $63,-12(%ebp) # subsequent iterations will AND with 00111111
- movl $128,-4(%ebp) # subsequent iterations will OR with 10000000
-
- subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time
-
- jmp wu8_loop # since we haven't exited yet, just loop
-
- wu8_x: movl %ebp,%esp #
- popl %ebp #
- ret #
|