University stuff.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

oppgave.s 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. .extern fread, fwrite
  2. .text
  3. #########################
  4. # int readbyte(FILE *f) #
  5. #########################
  6. .globl readbyte
  7. readbyte:
  8. pushl %ebp #
  9. movl %esp,%ebp #
  10. subl $4,%esp
  11. movl $0,-4(%ebp) # initialize with 0
  12. leal -4(%ebp),%eax # put address in eax
  13. pushl 8(%ebp) # FILE *stream
  14. pushl $1 # int nmemb
  15. pushl $1 # int size
  16. pushl %eax # void *ptr
  17. call fread
  18. cmp $0,%eax # if return is 0
  19. je rb_e # jump to error
  20. movl -4(%ebp),%eax # return eax
  21. jmp rb_x # exit
  22. rb_e: movl $-1,%eax # return -1
  23. jmp rb_x # exit
  24. rb_x: movl %ebp,%esp #
  25. popl %ebp #
  26. ret #
  27. ##############################
  28. # long readutf8char(FILE *f) #
  29. ##############################
  30. .globl readutf8char
  31. readutf8char:
  32. pushl %ebp #
  33. movl %esp,%ebp #
  34. # Read the first byte into eax
  35. pushl 8(%ebp) # FILE *f
  36. call readbyte
  37. # If the character is less than 0xC0,
  38. # meaning it's just one byte, just return it.
  39. # This also covers the case where it's -1.
  40. cmp $0xC0,%eax # if >= 0xC0,
  41. jge ru8_main # do the whole thing
  42. jmp ru8_x # else return the one byte
  43. ru8_main:
  44. subl $16,%esp # allocate 4 ints
  45. # -4(%ebp): unicode character
  46. # -8(%ebp): current byte
  47. # -12(%ebp): mask
  48. # -16($ebp): number of bits left
  49. movl $0,-4(%ebp) # zero out the unicode character
  50. movl %eax,-8(%ebp) # store the current byte
  51. cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes
  52. jge ru8_4bytes
  53. cmp $0xE0,%eax # >= 0xE0 means 3 bytes
  54. jge ru8_3bytes
  55. jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled)
  56. ru8_4bytes:
  57. movl $18,-16(%ebp) # 18 bits to read for 4 bytes
  58. movl $0b00000111,-12(%ebp) # only read the lowest 3 bits
  59. jmp ru8_loop
  60. ru8_3bytes:
  61. movl $12,-16(%ebp) # 12 bits to read for 3 bytes
  62. movl $0b00001111,-12(%ebp) # only read the lowest 4 bits
  63. jmp ru8_loop
  64. ru8_2bytes:
  65. movl $6,-16(%ebp) # 6 bits to read for 2 bytes
  66. movl $0b00011111,-12(%ebp) # only read the lowest 5 bits
  67. jmp ru8_loop
  68. ru8_loop:
  69. movl -8(%ebp),%eax # using eax as a temporary arithmetic register
  70. andl -12(%ebp),%eax # AND with the mask
  71. movl -16(%ebp),%ecx # move number of bits left to ecx for shll
  72. shll %cl,%eax # shift ecx bits to the left
  73. orl %eax,-4(%ebp) # OR the unicode character with eax
  74. cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's
  75. je ru8_done # no more bytes to read
  76. movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask
  77. subl $6,-16(%ebp) # we read 6 bits at a time
  78. # read the next byte into -8(%ebp)
  79. pushl 8(%ebp) # f
  80. call readbyte
  81. movl %eax,-8(%ebp) # move byte to memory
  82. jmp ru8_loop # since we haven't exited yet, just loop
  83. ru8_done:
  84. movl -4(%ebp),%eax # move the unicode character to eax
  85. # then exit
  86. ru8_x: movl %ebp,%esp #
  87. popl %ebp #
  88. ret #
  89. ############################################
  90. # void writebyte(FILE *f, unsigned char b) #
  91. ############################################
  92. .globl writebyte
  93. writebyte:
  94. pushl %ebp #
  95. movl %esp,%ebp #
  96. leal 12(%esp),%eax # put address in eax
  97. pushl 8(%esp) # FILE *stream
  98. pushl $1 # nmemb
  99. pushl $1 # size
  100. pushl %eax # void *ptr
  101. call fwrite
  102. movl %ebp,%esp #
  103. popl %ebp #
  104. ret #
  105. ################################################
  106. # void writeutf8char(FILE *f, unsigned long u) #
  107. ################################################
  108. .globl writeutf8char
  109. writeutf8char:
  110. pushl %ebp #
  111. movl %esp,%ebp #
  112. movl 12(%ebp),%ecx # put u in ecx
  113. cmp $0x0080,%ecx # if the number doesn't fit in one byte,
  114. jge wu8_main # skip the early return version
  115. pushl %ecx # unsigned char u
  116. pushl 8(%ebp) # FILE *f
  117. call writebyte
  118. jmp wu8_x # exit
  119. # wu8_main is responsible for jumping to the correct section depending on
  120. # the size of the unicode character.
  121. wu8_main:
  122. subl $12,%esp # stack allocate 3 ints
  123. cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes
  124. jge wu8_4bytes
  125. cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes
  126. jge wu8_3bytes
  127. jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes
  128. # we already dealt with those which need 1 byte
  129. # This section uses 3 stack allocated ints.
  130. # They're here mainly for this expression:
  131. # %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp))
  132. wu8_4bytes:
  133. movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write
  134. movl $0b11110000,-4(%ebp) # the first 4 bits should be 1
  135. movl $0b00000111,-12(%ebp) # only the last 3 bits is data
  136. jmp wu8_loop
  137. wu8_3bytes:
  138. movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write
  139. movl $0b11100000,-4(%ebp) # the first 3 bits should be 1
  140. movl $0b00001111,-12(%ebp) # only the last 4 bits is data
  141. jmp wu8_loop
  142. wu8_2bytes:
  143. movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write
  144. movl $0b11000000,-4(%ebp) # the first 2 bits should be 1
  145. movl $0b00011111,-12(%ebp) # only the last 3 bits is data
  146. jmp wu8_loop
  147. wu8_loop:
  148. movl 12(%ebp),%eax # using eax as a temporary arithmetic register
  149. movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl
  150. shrl %cl,%eax # shift ecx (or rather cl) bits to the right
  151. andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits
  152. orl -4(%ebp),%eax # OR with ecx to set the leftmost bit
  153. pushl %eax # unsigned char u
  154. pushl 8(%ebp) # FILE *f
  155. call writebyte # call
  156. addl $8,%esp # restore esp
  157. cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's
  158. je wu8_x # no bytes left to write
  159. movl $63,-12(%ebp) # subsequent iterations will AND with 00111111
  160. movl $128,-4(%ebp) # subsequent iterations will OR with 10000000
  161. subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time
  162. jmp wu8_loop # since we haven't exited yet, just loop
  163. wu8_x: movl %ebp,%esp #
  164. popl %ebp #
  165. ret #