| @@ -0,0 +1,18 @@ | |||
| CFLAGS=-g -m32 | |||
| TESTFILES=\ | |||
| test1.txt test2.txt test3.txt \ | |||
| test4.txt test5.txt test6.txt \ | |||
| test7.txt test8.txt | |||
| test: test-oblig3.c oppgave.s | |||
| gcc $(CFLAGS) -o test $^ | |||
| run-test: test | |||
| valgrind ./test | |||
| rm -f $(TESTFILES) | |||
| clean: | |||
| rm -f test | |||
| rm -f $(shell find . -name 'test*.txt') | |||
| .PHONY: run-test clean | |||
| @@ -0,0 +1,112 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| typedef unsigned char byte; | |||
| typedef unsigned long unicode; | |||
| // The minimum value of the first `char` to indicate n bytes | |||
| #define C4bytes 0xF0 | |||
| #define C3bytes 0xE0 | |||
| #define C2bytes 0xC0 | |||
| // The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||
| #define U4bytes 0x10000 | |||
| #define U3bytes 0x0800 | |||
| #define U2bytes 0x0080 | |||
| void writebyte(FILE *f, byte b) | |||
| { | |||
| fwrite(&b, 1, 1, f); | |||
| } | |||
| void writeutf8char(FILE *f, unicode u) | |||
| { | |||
| if (u < U2bytes) | |||
| { | |||
| writebyte(f, (byte)u); | |||
| return; | |||
| } | |||
| int left; | |||
| int a; | |||
| int b; | |||
| if (u >= U4bytes) | |||
| { | |||
| left = 18; | |||
| a = 0b11110000; | |||
| b = 0b00000111; | |||
| } | |||
| else if (u >= U3bytes) | |||
| { | |||
| left = 12; | |||
| a = 0b11100000; | |||
| b = 0b00001111; | |||
| } | |||
| else | |||
| { | |||
| left = 6; | |||
| a = 0b11000000; | |||
| b = 0b00011111; | |||
| } | |||
| while (1) | |||
| { | |||
| writebyte(f, a | ((u >> left) & b)); | |||
| if (left == 0) | |||
| return; | |||
| a = 0b10000000; | |||
| b = 0b00111111; | |||
| left -= 6; | |||
| } | |||
| } | |||
| int readbyte(FILE *f) | |||
| { | |||
| int status; | |||
| byte c; | |||
| status = fread(&c, 1, 1, f); | |||
| if (status <= 0) return -1; | |||
| return (int)c; | |||
| } | |||
| long readutf8char(FILE *f) | |||
| { | |||
| unicode u = 0; | |||
| unicode ch = readbyte(f); | |||
| if (ch == -1) | |||
| return -1; | |||
| if (ch < C2bytes) | |||
| return ch; | |||
| int left; | |||
| int mask; | |||
| if (ch >= C4bytes) | |||
| { | |||
| left = 18; | |||
| mask = 0b00000111; | |||
| } | |||
| else if (ch >= C3bytes) | |||
| { | |||
| left = 12; | |||
| mask = 0b00001111; | |||
| } | |||
| else | |||
| { | |||
| left = 6; | |||
| mask = 0b00011111; | |||
| } | |||
| while (1) | |||
| { | |||
| u |= (ch & mask) << left; | |||
| if (left == 0) | |||
| return u; | |||
| left -= 6; | |||
| mask = 0b00111111; | |||
| ch = readbyte(f); | |||
| } | |||
| } | |||
| @@ -0,0 +1,185 @@ | |||
| .file "fasit.c" | |||
| .text | |||
| .globl writebyte | |||
| .type writebyte, @function | |||
| writebyte: | |||
| .LFB5: | |||
| .cfi_startproc | |||
| pushl %ebp | |||
| .cfi_def_cfa_offset 8 | |||
| .cfi_offset 5, -8 | |||
| movl %esp, %ebp | |||
| .cfi_def_cfa_register 5 | |||
| subl $24, %esp | |||
| movl 12(%ebp), %eax | |||
| movb %al, -12(%ebp) | |||
| pushl 8(%ebp) | |||
| pushl $1 | |||
| pushl $1 | |||
| leal -12(%ebp), %eax | |||
| pushl %eax | |||
| call fwrite | |||
| addl $16, %esp | |||
| nop | |||
| leave | |||
| .cfi_restore 5 | |||
| .cfi_def_cfa 4, 4 | |||
| ret | |||
| .cfi_endproc | |||
| .LFE5: | |||
| .size writebyte, .-writebyte | |||
| .globl writeutf8char | |||
| .type writeutf8char, @function | |||
| writeutf8char: | |||
| .LFB6: | |||
| .cfi_startproc | |||
| pushl %ebp | |||
| .cfi_def_cfa_offset 8 | |||
| .cfi_offset 5, -8 | |||
| movl %esp, %ebp | |||
| .cfi_def_cfa_register 5 | |||
| subl $24, %esp | |||
| cmpl $127, 12(%ebp) | |||
| ja .L3 | |||
| movl 12(%ebp), %eax | |||
| movzbl %al, %eax | |||
| subl $8, %esp | |||
| pushl %eax | |||
| pushl 8(%ebp) | |||
| call writebyte | |||
| addl $16, %esp | |||
| jmp .L2 | |||
| .L3: | |||
| cmpl $65535, 12(%ebp) | |||
| jbe .L5 | |||
| movl $18, -12(%ebp) | |||
| movl -12(%ebp), %eax | |||
| movl 12(%ebp), %edx | |||
| movl %eax, %ecx | |||
| shrl %cl, %edx | |||
| movl %edx, %eax | |||
| andl $7, %eax | |||
| orl $-16, %eax | |||
| movzbl %al, %eax | |||
| subl $8, %esp | |||
| pushl %eax | |||
| pushl 8(%ebp) | |||
| call writebyte | |||
| addl $16, %esp | |||
| jmp .L6 | |||
| .L5: | |||
| cmpl $2047, 12(%ebp) | |||
| jbe .L7 | |||
| movl $12, -12(%ebp) | |||
| movl -12(%ebp), %eax | |||
| movl 12(%ebp), %edx | |||
| movl %eax, %ecx | |||
| shrl %cl, %edx | |||
| movl %edx, %eax | |||
| andl $15, %eax | |||
| orl $-32, %eax | |||
| movzbl %al, %eax | |||
| subl $8, %esp | |||
| pushl %eax | |||
| pushl 8(%ebp) | |||
| call writebyte | |||
| addl $16, %esp | |||
| jmp .L6 | |||
| .L7: | |||
| movl $6, -12(%ebp) | |||
| movl -12(%ebp), %eax | |||
| movl 12(%ebp), %edx | |||
| movl %eax, %ecx | |||
| shrl %cl, %edx | |||
| movl %edx, %eax | |||
| andl $31, %eax | |||
| orl $-64, %eax | |||
| movzbl %al, %eax | |||
| subl $8, %esp | |||
| pushl %eax | |||
| pushl 8(%ebp) | |||
| call writebyte | |||
| addl $16, %esp | |||
| .L6: | |||
| subl $6, -12(%ebp) | |||
| movl -12(%ebp), %eax | |||
| movl 12(%ebp), %edx | |||
| movl %eax, %ecx | |||
| shrl %cl, %edx | |||
| movl %edx, %eax | |||
| andl $63, %eax | |||
| orl $-128, %eax | |||
| movzbl %al, %eax | |||
| subl $8, %esp | |||
| pushl %eax | |||
| pushl 8(%ebp) | |||
| call writebyte | |||
| addl $16, %esp | |||
| cmpl $0, -12(%ebp) | |||
| jg .L6 | |||
| .L2: | |||
| leave | |||
| .cfi_restore 5 | |||
| .cfi_def_cfa 4, 4 | |||
| ret | |||
| .cfi_endproc | |||
| .LFE6: | |||
| .size writeutf8char, .-writeutf8char | |||
| .globl readbyte | |||
| .type readbyte, @function | |||
| readbyte: | |||
| .LFB7: | |||
| .cfi_startproc | |||
| pushl %ebp | |||
| .cfi_def_cfa_offset 8 | |||
| .cfi_offset 5, -8 | |||
| movl %esp, %ebp | |||
| .cfi_def_cfa_register 5 | |||
| subl $24, %esp | |||
| pushl 8(%ebp) | |||
| pushl $1 | |||
| pushl $1 | |||
| leal -13(%ebp), %eax | |||
| pushl %eax | |||
| call fread | |||
| addl $16, %esp | |||
| movl %eax, -12(%ebp) | |||
| cmpl $0, -12(%ebp) | |||
| jg .L9 | |||
| movl $-1, %eax | |||
| jmp .L11 | |||
| .L9: | |||
| movzbl -13(%ebp), %eax | |||
| movzbl %al, %eax | |||
| .L11: | |||
| leave | |||
| .cfi_restore 5 | |||
| .cfi_def_cfa 4, 4 | |||
| ret | |||
| .cfi_endproc | |||
| .LFE7: | |||
| .size readbyte, .-readbyte | |||
| .globl readutf8char | |||
| .type readutf8char, @function | |||
| readutf8char: | |||
| .LFB8: | |||
| .cfi_startproc | |||
| pushl %ebp | |||
| .cfi_def_cfa_offset 8 | |||
| .cfi_offset 5, -8 | |||
| movl %esp, %ebp | |||
| .cfi_def_cfa_register 5 | |||
| subl $8, %esp | |||
| subl $12, %esp | |||
| pushl 8(%ebp) | |||
| call readbyte | |||
| addl $16, %esp | |||
| leave | |||
| .cfi_restore 5 | |||
| .cfi_def_cfa 4, 4 | |||
| ret | |||
| .cfi_endproc | |||
| .LFE8: | |||
| .size readutf8char, .-readutf8char | |||
| .ident "GCC: (GNU) 6.3.1 20170306" | |||
| .section .note.GNU-stack,"",@progbits | |||
| @@ -0,0 +1,54 @@ | |||
| .extern fread, fwrite | |||
| .text | |||
| .globl readbyte | |||
| # Navn: readbyte | |||
| # Synopsis: Leser en byte fra en binærfil. | |||
| # C-signatur: int readbyte (FILE *f) | |||
| # Registre: | |||
| readbyte: | |||
| pushl %ebp # Standard funksjonsstart | |||
| movl %esp,%ebp # | |||
| rb_x: popl %ebp # Standard | |||
| ret # retur. | |||
| .globl readutf8char | |||
| # Navn: readutf8char | |||
| # Synopsis: Leser et Unicode-tegn fra en binærfil. | |||
| # C-signatur: long readutf8char (FILE *f) | |||
| # Registre: | |||
| readutf8char: | |||
| pushl %ebp # Standard funksjonsstart | |||
| movl %esp,%ebp # | |||
| popl %ebp # Standard | |||
| ret # retur. | |||
| .globl writebyte | |||
| # Navn: writebyte | |||
| # Synopsis: Skriver en byte til en binærfil. | |||
| # C-signatur: void writebyte (FILE *f, unsigned char b) | |||
| # Registre: | |||
| writebyte: | |||
| pushl %ebp # Standard funksjonsstart | |||
| movl %esp,%ebp # | |||
| popl %ebp # Standard | |||
| ret # retur. | |||
| .globl writeutf8char | |||
| # Navn: writeutf8char | |||
| # Synopsis: Skriver et tegn kodet som UTF-8 til en binærfil. | |||
| # C-signatur: void writeutf8char (FILE *f, unsigned long u) | |||
| # Registre: | |||
| writeutf8char: | |||
| pushl %ebp # Standard funksjonsstart | |||
| movl %esp,%ebp # | |||
| wu8_x: popl %ebp # Standard | |||
| ret # retur. | |||
| @@ -0,0 +1,86 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| typedef unsigned char byte; | |||
| typedef unsigned long unicode; | |||
| // The minimum value of the first `char` to indicate n bytes | |||
| #define C4bytes 0xF0 | |||
| #define C3bytes 0xE0 | |||
| #define C2bytes 0xC0 | |||
| // The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||
| #define U4bytes 0x10000 | |||
| #define U3bytes 0x0800 | |||
| #define U2bytes 0x0080 | |||
| void writebyte(FILE *f, byte b) | |||
| { | |||
| fwrite(&b, 1, 1, f); | |||
| } | |||
| void writeutf8char(FILE *f, unicode u) | |||
| { | |||
| if (u >= U4bytes) | |||
| { | |||
| writebyte(f, 0b11110000 | ((u >> 18) & 0b00000111)); | |||
| writebyte(f, 0b10000000 | ((u >> 12) & 0b00111111)); | |||
| writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||
| writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
| } | |||
| else if (u >= U3bytes) | |||
| { | |||
| writebyte(f, 0b11100000 | ((u >> 12) & 0b00001111)); | |||
| writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||
| writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
| } | |||
| else if (u >= U2bytes) | |||
| { | |||
| writebyte(f, 0b11000000 | ((u >> 6) & 0b00011111)); | |||
| writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
| } | |||
| else | |||
| { | |||
| writebyte(f, u); | |||
| } | |||
| } | |||
| int readbyte(FILE *f) | |||
| { | |||
| int status; | |||
| byte c; | |||
| status = fread(&c, 1, 1, f); | |||
| if (status <= 0) return -1; | |||
| return (int)c; | |||
| } | |||
| long readutf8char(FILE *f) | |||
| { | |||
| byte first = readbyte(f); | |||
| unicode u = 0; | |||
| if (first >= C4bytes) | |||
| { | |||
| u |= (first & 0b00000111) << 18; | |||
| u |= (readbyte(f) & 0b00111111) << 12; | |||
| u |= (readbyte(f) & 0b00111111) << 6; | |||
| u |= readbyte(f) & 0b00111111; | |||
| } | |||
| else if (first >= C3bytes) | |||
| { | |||
| u |= (first & 0b00001111) << 12; | |||
| u |= (readbyte(f) & 0b00111111) << 6; | |||
| u |= readbyte(f) & 0b00111111; | |||
| } | |||
| else if (first >= C2bytes) | |||
| { | |||
| u |= (first & 0b00011111) << 6; | |||
| u |= readbyte(f) & 0b00111111; | |||
| } | |||
| else | |||
| { | |||
| u |= first; | |||
| } | |||
| return u; | |||
| } | |||
| @@ -0,0 +1,210 @@ | |||
| .extern fread, fwrite | |||
| .text | |||
| ######################### | |||
| # int readbyte(FILE *f) # | |||
| ######################### | |||
| .globl readbyte | |||
| readbyte: | |||
| pushl %ebp # | |||
| movl %esp,%ebp # | |||
| subl $4,%esp | |||
| movl $0,-4(%ebp) # initialize with 0 | |||
| leal -4(%ebp),%eax # put address in eax | |||
| pushl 8(%ebp) # FILE *stream | |||
| pushl $1 # nmemb | |||
| pushl $1 # size | |||
| pushl %eax # void *ptr | |||
| call fread | |||
| cmp $0,%eax # if return is 0 | |||
| je rb_e # jump to error | |||
| movl -4(%ebp),%eax # return eax | |||
| jmp rb_x # exit | |||
| rb_e: movl $-1,%eax # return -1 | |||
| jmp rb_x # exit | |||
| rb_x: movl %ebp,%esp # | |||
| popl %ebp # | |||
| ret # | |||
| ############################## | |||
| # long readutf8char(FILE *f) # | |||
| ############################## | |||
| .globl readutf8char | |||
| readutf8char: | |||
| pushl %ebp # | |||
| movl %esp,%ebp # | |||
| # Read the first byte into eax | |||
| pushl 8(%ebp) # f | |||
| call readbyte | |||
| # If the character is less than 0xC0, | |||
| # meaning it's just one byte, just return it. | |||
| # This also covers the case where it's -1. | |||
| cmp $0xC0,%eax # if >= 0xC0, | |||
| jge ru8_main # do the whole thing | |||
| jmp ru8_x # else return the one byte | |||
| ru8_main: | |||
| subl $16,%esp # allocate 4 ints | |||
| # -4(%ebp): unicode character | |||
| # -8(%ebp): current byte | |||
| # -12(%ebp): mask | |||
| # -16($ebp): number of bits left | |||
| movl $0,-4(%ebp) # zero out the unicode character | |||
| movl %eax,-8(%ebp) # store the current byte | |||
| cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes | |||
| jge ru8_4bytes | |||
| cmp $0xE0,%eax # >= 0xE0 means 3 bytes | |||
| jge ru8_3bytes | |||
| jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled) | |||
| ru8_4bytes: | |||
| movl $18,-16(%ebp) # 18 bits to read for 4 bytes | |||
| movl $0b00000111,-12(%ebp) # only read the lowest 3 bits | |||
| jmp ru8_loop | |||
| ru8_3bytes: | |||
| movl $12,-16(%ebp) # 12 bits to read for 3 bytes | |||
| movl $0b00001111,-12(%ebp) # only read the lowest 4 bits | |||
| jmp ru8_loop | |||
| ru8_2bytes: | |||
| movl $6,-16(%ebp) # 6 bits to read for 2 bytes | |||
| movl $0b00011111,-12(%ebp) # only read the lowest 5 bits | |||
| jmp ru8_loop | |||
| ru8_loop: | |||
| movl -8(%ebp),%eax # using eax as a temporary arithmetic register | |||
| andl -12(%ebp),%eax # AND with the mask | |||
| movl -16(%ebp),%ecx # move number of bits left to ecx for shll | |||
| shll %cl,%eax # shift ecx bits to the left | |||
| orl %eax,-4(%ebp) # OR the unicode character with eax | |||
| cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's | |||
| je ru8_done # no more bytes to read | |||
| movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask | |||
| subl $6,-16(%ebp) # we read 6 bits at a time | |||
| # read the next byte into -8(%ebp) | |||
| pushl 8(%ebp) # f | |||
| call readbyte | |||
| movl %eax,-8(%ebp) # move byte to memory | |||
| jmp ru8_loop # since we haven't exited yet, just loop | |||
| ru8_done: | |||
| movl -4(%ebp),%eax # move the unicode character to eax | |||
| # then exit | |||
| ru8_x: movl %ebp,%esp # | |||
| popl %ebp # | |||
| ret # | |||
| ############################################ | |||
| # void writebyte(FILE *f, unsigned char b) # | |||
| ############################################ | |||
| .globl writebyte | |||
| writebyte: | |||
| pushl %ebp # | |||
| movl %esp,%ebp # | |||
| leal 12(%esp),%eax # put address in eax | |||
| pushl 8(%esp) # FILE *stream | |||
| pushl $1 # nmemb | |||
| pushl $1 # size | |||
| pushl %eax # void *ptr | |||
| call fwrite | |||
| movl %ebp,%esp # | |||
| popl %ebp # | |||
| ret # | |||
| ################################################ | |||
| # void writeutf8char(FILE *f, unsigned long u) # | |||
| ################################################ | |||
| .globl writeutf8char | |||
| writeutf8char: | |||
| pushl %ebp # | |||
| movl %esp,%ebp # | |||
| movl 12(%ebp),%ecx # put u in ecx | |||
| cmp $0x0080,%ecx # if the number doesn't fit in one byte, | |||
| jge wu8_main # skip the early return version | |||
| pushl %ecx # unsigned char u | |||
| pushl 8(%ebp) # FILE *f | |||
| call writebyte | |||
| jmp wu8_x # exit | |||
| # wu8_main is responsible for jumping to the correct section depending on | |||
| # the size of the unicode character. | |||
| wu8_main: | |||
| subl $12,%esp # stack allocate 3 ints | |||
| cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes | |||
| jge wu8_4bytes | |||
| cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes | |||
| jge wu8_3bytes | |||
| jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes | |||
| # we already dealt with those which need 1 byte | |||
| # This section uses 3 stack allocated ints. | |||
| # They're here mainly for this expression: | |||
| # %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp)) | |||
| wu8_4bytes: | |||
| movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write | |||
| movl $0b11110000,-4(%ebp) # the first 4 bits should be 1 | |||
| movl $0b00000111,-12(%ebp) # only the last 3 bits is data | |||
| jmp wu8_loop | |||
| wu8_3bytes: | |||
| movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write | |||
| movl $0b11100000,-4(%ebp) # the first 3 bits should be 1 | |||
| movl $0b00001111,-12(%ebp) # only the last 4 bits is data | |||
| jmp wu8_loop | |||
| wu8_2bytes: | |||
| movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write | |||
| movl $0b11000000,-4(%ebp) # the first 2 bits should be 1 | |||
| movl $0b00011111,-12(%ebp) # only the last 3 bits is data | |||
| jmp wu8_loop | |||
| wu8_loop: | |||
| movl 12(%ebp),%eax # using eax as a temporary arithmetic register | |||
| movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl | |||
| shrl %cl,%eax # shift ecx (or rather cl) bits to the right | |||
| andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits | |||
| orl -4(%ebp),%eax # OR with ecx to set the leftmost bit | |||
| pushl %eax # unsigned char u | |||
| pushl 8(%ebp) # FILE *f | |||
| call writebyte # call | |||
| addl $8,%esp # restore esp | |||
| movl -8(%ebp),%ecx | |||
| bp: | |||
| cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's | |||
| je wu8_x # no bytes left to write | |||
| movl $63,-12(%ebp) # subsequent iterations will AND with 00111111 | |||
| movl $128,-4(%ebp) # subsequent iterations will OR with 10000000 | |||
| subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time | |||
| jmp wu8_loop # since we haven't exited yet, just loop | |||
| wu8_x: movl %ebp,%esp # | |||
| popl %ebp # | |||
| ret # | |||
| @@ -0,0 +1,303 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <wchar.h> | |||
| #define FALSE 0 | |||
| #define TRUE 1 | |||
| typedef unsigned char byte; | |||
| typedef unsigned long unicode; | |||
| extern int readbyte (FILE *f); | |||
| extern long readutf8char (FILE *f); | |||
| extern void writebyte (FILE *f, byte b); | |||
| extern void writeutf8char (FILE *f, unicode u); | |||
| void error (char *message) | |||
| { | |||
| printf("\nERROR: %s\n", message); | |||
| exit(1); | |||
| } | |||
| void dump_byte_seq (byte b[], int n_b) | |||
| { | |||
| int i; | |||
| printf("%d bytes {", n_b); | |||
| for (i = 0; i < n_b; i++) { | |||
| if (i > 0) printf(", "); | |||
| printf("0x%02x", b[i]); | |||
| } | |||
| printf("}"); | |||
| } | |||
| void dump_unicode_seq (unicode u[], int n_u) | |||
| { | |||
| int i; | |||
| printf("%d chars {", n_u); | |||
| for (i = 0; i < n_u; i++) { | |||
| if (i > 0) printf(", "); | |||
| printf("0x%lx", u[i]); | |||
| } | |||
| printf("}"); | |||
| } | |||
| void compare_byte_seqs (byte a[], int n_a, byte b[], int n_b) | |||
| { | |||
| int ok = TRUE; | |||
| if (n_a != n_b) { | |||
| ok = FALSE; | |||
| } else { | |||
| int i; | |||
| for (i = 0; i < n_a; i++) | |||
| if (a[i] != b[i]) ok = FALSE; | |||
| } | |||
| if (ok) { | |||
| printf("OK\n"); | |||
| } else { | |||
| printf("\n Error: Result is "); dump_byte_seq(a, n_a); | |||
| printf("\n but should be "); dump_byte_seq(b, n_b); printf("\n"); | |||
| } | |||
| } | |||
| void compare_unicode_seqs (unicode a[], int n_a, unicode b[], int n_b) | |||
| { | |||
| int ok = TRUE; | |||
| if (n_a != n_b) { | |||
| ok = FALSE; | |||
| } else { | |||
| int i; | |||
| for (i = 0; i < n_a; i++) | |||
| if (a[i] != b[i]) ok = FALSE; | |||
| } | |||
| if (ok) { | |||
| printf("OK\n"); | |||
| } else { | |||
| printf("\n Error: Result is "); dump_unicode_seq(a, n_a); | |||
| printf("\n but should be "); dump_unicode_seq(b, n_b); printf("\n"); | |||
| } | |||
| } | |||
| int read_test_byte (FILE *f) | |||
| { | |||
| int status; | |||
| byte c; | |||
| status = fread(&c, 1, 1, f); | |||
| if (status <= 0) return -1; | |||
| return (int)c; | |||
| } | |||
| void test_byte_file (char *f_name, byte data[], int n_data) | |||
| { | |||
| byte file_bytes[200]; | |||
| int n_file_bytes; | |||
| FILE *f = fopen(f_name, "rb"); | |||
| if (f == NULL) error("Could not open file!"); | |||
| for (n_file_bytes = 0; n_file_bytes < 200; n_file_bytes++) { | |||
| int b = read_test_byte(f); | |||
| if (b < 0) break; | |||
| file_bytes[n_file_bytes] = b; | |||
| } | |||
| fclose(f); | |||
| compare_byte_seqs(file_bytes, n_file_bytes, data, n_data); | |||
| } | |||
| void create_byte_file (char *f_name, byte b_seq[], int n_b_seq) | |||
| { | |||
| FILE *f = fopen(f_name, "wb"); | |||
| if (f == NULL) error("Could not create file!"); | |||
| fwrite(b_seq, n_b_seq, 1, f); | |||
| fclose(f); | |||
| } | |||
| /* Test #1 */ | |||
| byte b_seq_1[] = { 4, 0, 255, 17, 200 }; | |||
| void test_1 (void) | |||
| { | |||
| int n_bytes = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||
| int i; | |||
| FILE *f = fopen("test1.txt", "wb"); | |||
| if (f == NULL) error("Could not create test1.txt!"); | |||
| for (i = 0; i < n_bytes; i++) | |||
| writebyte(f, b_seq_1[i]); | |||
| fclose(f); | |||
| test_byte_file("test1.txt", b_seq_1, n_bytes); | |||
| } | |||
| /* Test #2 */ | |||
| unicode u_seq_2[] = { 0x24, 0x20, 0x41, 0x3d, 0x32, 0x78 }; /* "$ A=2x" */ | |||
| byte b_seq_2[] = { '$', ' ', 'A', '=', '2', 'x' }; | |||
| void test_2 (void) | |||
| { | |||
| int n_u = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||
| int n_b = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||
| int i; | |||
| FILE *f = fopen("test2.txt", "wb"); | |||
| if (f == NULL) error("Could not create test2.txt!"); | |||
| for (i = 0; i < n_u; i++) | |||
| writeutf8char(f, u_seq_2[i]); | |||
| fclose(f); | |||
| test_byte_file("test2.txt", b_seq_2, n_b); | |||
| } | |||
| /* Test #3 */ | |||
| unicode u_seq_3[] = { 0x35, 0xa2, 0x20, 0x429, 0x3c9 }; /* "5¢ Щω" */ | |||
| byte b_seq_3[] = { '5', 0xc2, 0xa2, ' ', 0xd0, 0xa9, 0xcf, 0x89 }; | |||
| void test_3 (void) | |||
| { | |||
| int n_u = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||
| int n_b = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||
| int i; | |||
| FILE *f = fopen("test3.txt", "wb"); | |||
| if (f == NULL) error("Could not create test3.txt!"); | |||
| for (i = 0; i < n_u; i++) | |||
| writeutf8char(f, u_seq_3[i]); | |||
| fclose(f); | |||
| test_byte_file("test3.txt", b_seq_3, n_b); | |||
| } | |||
| /* Test #4 */ | |||
| unicode u_seq_4[] = { 0x20ac, 0x3d, 0x10348, 0x2658 }; /* "€=𐍈♘" */ | |||
| byte b_seq_4[] = { 0xe2, 0x82, 0xac, '=', 0xf0, 0x90, 0x8d, 0x88, | |||
| 0xe2, 0x99, 0x98}; | |||
| void test_4 (void) | |||
| { | |||
| int n_u = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||
| int n_b = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||
| int i; | |||
| FILE *f = fopen("test4.txt", "wb"); | |||
| if (f == NULL) error("Could not create test4.txt!"); | |||
| for (i = 0; i < n_u; i++) | |||
| writeutf8char(f, u_seq_4[i]); | |||
| fclose(f); | |||
| test_byte_file("test4.txt", b_seq_4, n_b); | |||
| } | |||
| /* Test #5 */ | |||
| void test_5 (void) | |||
| { | |||
| byte data[200]; | |||
| int n_data = 0; | |||
| int n_b_seq_1 = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||
| FILE *f; | |||
| create_byte_file ("test5.txt", b_seq_1, n_b_seq_1); | |||
| f = fopen("test5.txt", "rb"); | |||
| if (f == NULL) error("Could not read test5.txt!"); | |||
| while (n_data < 200) { | |||
| int b = readbyte(f); | |||
| if (b < 0) break; | |||
| data[n_data++] = (byte)b; | |||
| } | |||
| fclose(f); | |||
| compare_byte_seqs(data, n_data, b_seq_1, n_b_seq_1); | |||
| } | |||
| /* Test #6 */ | |||
| void test_6 (void) | |||
| { | |||
| unicode data[200]; | |||
| int n_data = 0; | |||
| int n_b_seq_2 = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||
| int n_u_seq_2 = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||
| FILE *f; | |||
| create_byte_file ("test6.txt", b_seq_2, n_b_seq_2); | |||
| f = fopen("test6.txt", "rb"); | |||
| if (f == NULL) error("Could not read test6.txt!"); | |||
| while (n_data < 200) { | |||
| long u = readutf8char(f); | |||
| if (u < 0) break; | |||
| data[n_data++] = (unicode)u; | |||
| } | |||
| fclose(f); | |||
| compare_unicode_seqs(data, n_data, u_seq_2, n_u_seq_2); | |||
| } | |||
| /* Test #7 */ | |||
| void test_7 (void) | |||
| { | |||
| unicode data[200]; | |||
| int n_data = 0; | |||
| int n_b_seq_3 = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||
| int n_u_seq_3 = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||
| FILE *f; | |||
| create_byte_file ("test7.txt", b_seq_3, n_b_seq_3); | |||
| f = fopen("test7.txt", "rb"); | |||
| if (f == NULL) error("Could not read test7.txt!"); | |||
| while (n_data < 200) { | |||
| long u = readutf8char(f); | |||
| if (u < 0) break; | |||
| data[n_data++] = (unicode)u; | |||
| } | |||
| fclose(f); | |||
| compare_unicode_seqs(data, n_data, u_seq_3, n_u_seq_3); | |||
| } | |||
| /* Test #8 */ | |||
| void test_8 (void) | |||
| { | |||
| unicode data[200]; | |||
| int n_data = 0; | |||
| int n_b_seq_4 = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||
| int n_u_seq_4 = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||
| FILE *f; | |||
| create_byte_file ("test8.txt", b_seq_4, n_b_seq_4); | |||
| f = fopen("test8.txt", "rb"); | |||
| if (f == NULL) error("Could not read test8.txt!"); | |||
| while (n_data < 200) { | |||
| long u = readutf8char(f); | |||
| if (u < 0) break; | |||
| data[n_data++] = (unicode)u; | |||
| } | |||
| fclose(f); | |||
| compare_unicode_seqs(data, n_data, u_seq_4, n_u_seq_4); | |||
| } | |||
| /* Main program */ | |||
| int main (void) | |||
| { | |||
| printf("Test 1 (write a byte): "); test_1(); | |||
| printf("Test 2 (write 1-byte utf-8): "); test_2(); | |||
| printf("Test 3 (write 2-byte utf-8): "); test_3(); | |||
| printf("Test 4 (write 3+4-byte utf-8): "); test_4(); | |||
| printf("Test 5 (read a byte): "); test_5(); | |||
| printf("Test 6 (read 1-byte utf-8): "); test_6(); | |||
| printf("Test 7 (read 2-byte utf-8): "); test_7(); | |||
| printf("Test 8 (read 3+4-byte utf-8): "); test_8(); | |||
| return 0; | |||
| } | |||