CFLAGS=-g -m32 | |||||
TESTFILES=\ | |||||
test1.txt test2.txt test3.txt \ | |||||
test4.txt test5.txt test6.txt \ | |||||
test7.txt test8.txt | |||||
test: test-oblig3.c oppgave.s | |||||
gcc $(CFLAGS) -o test $^ | |||||
run-test: test | |||||
valgrind ./test | |||||
rm -f $(TESTFILES) | |||||
clean: | |||||
rm -f test | |||||
rm -f $(shell find . -name 'test*.txt') | |||||
.PHONY: run-test clean |
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
typedef unsigned char byte; | |||||
typedef unsigned long unicode; | |||||
// The minimum value of the first `char` to indicate n bytes | |||||
#define C4bytes 0xF0 | |||||
#define C3bytes 0xE0 | |||||
#define C2bytes 0xC0 | |||||
// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||||
#define U4bytes 0x10000 | |||||
#define U3bytes 0x0800 | |||||
#define U2bytes 0x0080 | |||||
void writebyte(FILE *f, byte b) | |||||
{ | |||||
fwrite(&b, 1, 1, f); | |||||
} | |||||
void writeutf8char(FILE *f, unicode u) | |||||
{ | |||||
if (u < U2bytes) | |||||
{ | |||||
writebyte(f, (byte)u); | |||||
return; | |||||
} | |||||
int left; | |||||
int a; | |||||
int b; | |||||
if (u >= U4bytes) | |||||
{ | |||||
left = 18; | |||||
a = 0b11110000; | |||||
b = 0b00000111; | |||||
} | |||||
else if (u >= U3bytes) | |||||
{ | |||||
left = 12; | |||||
a = 0b11100000; | |||||
b = 0b00001111; | |||||
} | |||||
else | |||||
{ | |||||
left = 6; | |||||
a = 0b11000000; | |||||
b = 0b00011111; | |||||
} | |||||
while (1) | |||||
{ | |||||
writebyte(f, a | ((u >> left) & b)); | |||||
if (left == 0) | |||||
return; | |||||
a = 0b10000000; | |||||
b = 0b00111111; | |||||
left -= 6; | |||||
} | |||||
} | |||||
int readbyte(FILE *f) | |||||
{ | |||||
int status; | |||||
byte c; | |||||
status = fread(&c, 1, 1, f); | |||||
if (status <= 0) return -1; | |||||
return (int)c; | |||||
} | |||||
long readutf8char(FILE *f) | |||||
{ | |||||
unicode u = 0; | |||||
unicode ch = readbyte(f); | |||||
if (ch == -1) | |||||
return -1; | |||||
if (ch < C2bytes) | |||||
return ch; | |||||
int left; | |||||
int mask; | |||||
if (ch >= C4bytes) | |||||
{ | |||||
left = 18; | |||||
mask = 0b00000111; | |||||
} | |||||
else if (ch >= C3bytes) | |||||
{ | |||||
left = 12; | |||||
mask = 0b00001111; | |||||
} | |||||
else | |||||
{ | |||||
left = 6; | |||||
mask = 0b00011111; | |||||
} | |||||
while (1) | |||||
{ | |||||
u |= (ch & mask) << left; | |||||
if (left == 0) | |||||
return u; | |||||
left -= 6; | |||||
mask = 0b00111111; | |||||
ch = readbyte(f); | |||||
} | |||||
} |
.file "fasit.c" | |||||
.text | |||||
.globl writebyte | |||||
.type writebyte, @function | |||||
writebyte: | |||||
.LFB5: | |||||
.cfi_startproc | |||||
pushl %ebp | |||||
.cfi_def_cfa_offset 8 | |||||
.cfi_offset 5, -8 | |||||
movl %esp, %ebp | |||||
.cfi_def_cfa_register 5 | |||||
subl $24, %esp | |||||
movl 12(%ebp), %eax | |||||
movb %al, -12(%ebp) | |||||
pushl 8(%ebp) | |||||
pushl $1 | |||||
pushl $1 | |||||
leal -12(%ebp), %eax | |||||
pushl %eax | |||||
call fwrite | |||||
addl $16, %esp | |||||
nop | |||||
leave | |||||
.cfi_restore 5 | |||||
.cfi_def_cfa 4, 4 | |||||
ret | |||||
.cfi_endproc | |||||
.LFE5: | |||||
.size writebyte, .-writebyte | |||||
.globl writeutf8char | |||||
.type writeutf8char, @function | |||||
writeutf8char: | |||||
.LFB6: | |||||
.cfi_startproc | |||||
pushl %ebp | |||||
.cfi_def_cfa_offset 8 | |||||
.cfi_offset 5, -8 | |||||
movl %esp, %ebp | |||||
.cfi_def_cfa_register 5 | |||||
subl $24, %esp | |||||
cmpl $127, 12(%ebp) | |||||
ja .L3 | |||||
movl 12(%ebp), %eax | |||||
movzbl %al, %eax | |||||
subl $8, %esp | |||||
pushl %eax | |||||
pushl 8(%ebp) | |||||
call writebyte | |||||
addl $16, %esp | |||||
jmp .L2 | |||||
.L3: | |||||
cmpl $65535, 12(%ebp) | |||||
jbe .L5 | |||||
movl $18, -12(%ebp) | |||||
movl -12(%ebp), %eax | |||||
movl 12(%ebp), %edx | |||||
movl %eax, %ecx | |||||
shrl %cl, %edx | |||||
movl %edx, %eax | |||||
andl $7, %eax | |||||
orl $-16, %eax | |||||
movzbl %al, %eax | |||||
subl $8, %esp | |||||
pushl %eax | |||||
pushl 8(%ebp) | |||||
call writebyte | |||||
addl $16, %esp | |||||
jmp .L6 | |||||
.L5: | |||||
cmpl $2047, 12(%ebp) | |||||
jbe .L7 | |||||
movl $12, -12(%ebp) | |||||
movl -12(%ebp), %eax | |||||
movl 12(%ebp), %edx | |||||
movl %eax, %ecx | |||||
shrl %cl, %edx | |||||
movl %edx, %eax | |||||
andl $15, %eax | |||||
orl $-32, %eax | |||||
movzbl %al, %eax | |||||
subl $8, %esp | |||||
pushl %eax | |||||
pushl 8(%ebp) | |||||
call writebyte | |||||
addl $16, %esp | |||||
jmp .L6 | |||||
.L7: | |||||
movl $6, -12(%ebp) | |||||
movl -12(%ebp), %eax | |||||
movl 12(%ebp), %edx | |||||
movl %eax, %ecx | |||||
shrl %cl, %edx | |||||
movl %edx, %eax | |||||
andl $31, %eax | |||||
orl $-64, %eax | |||||
movzbl %al, %eax | |||||
subl $8, %esp | |||||
pushl %eax | |||||
pushl 8(%ebp) | |||||
call writebyte | |||||
addl $16, %esp | |||||
.L6: | |||||
subl $6, -12(%ebp) | |||||
movl -12(%ebp), %eax | |||||
movl 12(%ebp), %edx | |||||
movl %eax, %ecx | |||||
shrl %cl, %edx | |||||
movl %edx, %eax | |||||
andl $63, %eax | |||||
orl $-128, %eax | |||||
movzbl %al, %eax | |||||
subl $8, %esp | |||||
pushl %eax | |||||
pushl 8(%ebp) | |||||
call writebyte | |||||
addl $16, %esp | |||||
cmpl $0, -12(%ebp) | |||||
jg .L6 | |||||
.L2: | |||||
leave | |||||
.cfi_restore 5 | |||||
.cfi_def_cfa 4, 4 | |||||
ret | |||||
.cfi_endproc | |||||
.LFE6: | |||||
.size writeutf8char, .-writeutf8char | |||||
.globl readbyte | |||||
.type readbyte, @function | |||||
readbyte: | |||||
.LFB7: | |||||
.cfi_startproc | |||||
pushl %ebp | |||||
.cfi_def_cfa_offset 8 | |||||
.cfi_offset 5, -8 | |||||
movl %esp, %ebp | |||||
.cfi_def_cfa_register 5 | |||||
subl $24, %esp | |||||
pushl 8(%ebp) | |||||
pushl $1 | |||||
pushl $1 | |||||
leal -13(%ebp), %eax | |||||
pushl %eax | |||||
call fread | |||||
addl $16, %esp | |||||
movl %eax, -12(%ebp) | |||||
cmpl $0, -12(%ebp) | |||||
jg .L9 | |||||
movl $-1, %eax | |||||
jmp .L11 | |||||
.L9: | |||||
movzbl -13(%ebp), %eax | |||||
movzbl %al, %eax | |||||
.L11: | |||||
leave | |||||
.cfi_restore 5 | |||||
.cfi_def_cfa 4, 4 | |||||
ret | |||||
.cfi_endproc | |||||
.LFE7: | |||||
.size readbyte, .-readbyte | |||||
.globl readutf8char | |||||
.type readutf8char, @function | |||||
readutf8char: | |||||
.LFB8: | |||||
.cfi_startproc | |||||
pushl %ebp | |||||
.cfi_def_cfa_offset 8 | |||||
.cfi_offset 5, -8 | |||||
movl %esp, %ebp | |||||
.cfi_def_cfa_register 5 | |||||
subl $8, %esp | |||||
subl $12, %esp | |||||
pushl 8(%ebp) | |||||
call readbyte | |||||
addl $16, %esp | |||||
leave | |||||
.cfi_restore 5 | |||||
.cfi_def_cfa 4, 4 | |||||
ret | |||||
.cfi_endproc | |||||
.LFE8: | |||||
.size readutf8char, .-readutf8char | |||||
.ident "GCC: (GNU) 6.3.1 20170306" | |||||
.section .note.GNU-stack,"",@progbits |
.extern fread, fwrite | |||||
.text | |||||
.globl readbyte | |||||
# Navn: readbyte | |||||
# Synopsis: Leser en byte fra en binærfil. | |||||
# C-signatur: int readbyte (FILE *f) | |||||
# Registre: | |||||
readbyte: | |||||
pushl %ebp # Standard funksjonsstart | |||||
movl %esp,%ebp # | |||||
rb_x: popl %ebp # Standard | |||||
ret # retur. | |||||
.globl readutf8char | |||||
# Navn: readutf8char | |||||
# Synopsis: Leser et Unicode-tegn fra en binærfil. | |||||
# C-signatur: long readutf8char (FILE *f) | |||||
# Registre: | |||||
readutf8char: | |||||
pushl %ebp # Standard funksjonsstart | |||||
movl %esp,%ebp # | |||||
popl %ebp # Standard | |||||
ret # retur. | |||||
.globl writebyte | |||||
# Navn: writebyte | |||||
# Synopsis: Skriver en byte til en binærfil. | |||||
# C-signatur: void writebyte (FILE *f, unsigned char b) | |||||
# Registre: | |||||
writebyte: | |||||
pushl %ebp # Standard funksjonsstart | |||||
movl %esp,%ebp # | |||||
popl %ebp # Standard | |||||
ret # retur. | |||||
.globl writeutf8char | |||||
# Navn: writeutf8char | |||||
# Synopsis: Skriver et tegn kodet som UTF-8 til en binærfil. | |||||
# C-signatur: void writeutf8char (FILE *f, unsigned long u) | |||||
# Registre: | |||||
writeutf8char: | |||||
pushl %ebp # Standard funksjonsstart | |||||
movl %esp,%ebp # | |||||
wu8_x: popl %ebp # Standard | |||||
ret # retur. |
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
typedef unsigned char byte; | |||||
typedef unsigned long unicode; | |||||
// The minimum value of the first `char` to indicate n bytes | |||||
#define C4bytes 0xF0 | |||||
#define C3bytes 0xE0 | |||||
#define C2bytes 0xC0 | |||||
// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||||
#define U4bytes 0x10000 | |||||
#define U3bytes 0x0800 | |||||
#define U2bytes 0x0080 | |||||
void writebyte(FILE *f, byte b) | |||||
{ | |||||
fwrite(&b, 1, 1, f); | |||||
} | |||||
void writeutf8char(FILE *f, unicode u) | |||||
{ | |||||
if (u >= U4bytes) | |||||
{ | |||||
writebyte(f, 0b11110000 | ((u >> 18) & 0b00000111)); | |||||
writebyte(f, 0b10000000 | ((u >> 12) & 0b00111111)); | |||||
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||||
} | |||||
else if (u >= U3bytes) | |||||
{ | |||||
writebyte(f, 0b11100000 | ((u >> 12) & 0b00001111)); | |||||
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||||
} | |||||
else if (u >= U2bytes) | |||||
{ | |||||
writebyte(f, 0b11000000 | ((u >> 6) & 0b00011111)); | |||||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||||
} | |||||
else | |||||
{ | |||||
writebyte(f, u); | |||||
} | |||||
} | |||||
int readbyte(FILE *f) | |||||
{ | |||||
int status; | |||||
byte c; | |||||
status = fread(&c, 1, 1, f); | |||||
if (status <= 0) return -1; | |||||
return (int)c; | |||||
} | |||||
long readutf8char(FILE *f) | |||||
{ | |||||
byte first = readbyte(f); | |||||
unicode u = 0; | |||||
if (first >= C4bytes) | |||||
{ | |||||
u |= (first & 0b00000111) << 18; | |||||
u |= (readbyte(f) & 0b00111111) << 12; | |||||
u |= (readbyte(f) & 0b00111111) << 6; | |||||
u |= readbyte(f) & 0b00111111; | |||||
} | |||||
else if (first >= C3bytes) | |||||
{ | |||||
u |= (first & 0b00001111) << 12; | |||||
u |= (readbyte(f) & 0b00111111) << 6; | |||||
u |= readbyte(f) & 0b00111111; | |||||
} | |||||
else if (first >= C2bytes) | |||||
{ | |||||
u |= (first & 0b00011111) << 6; | |||||
u |= readbyte(f) & 0b00111111; | |||||
} | |||||
else | |||||
{ | |||||
u |= first; | |||||
} | |||||
return u; | |||||
} |
.extern fread, fwrite | |||||
.text | |||||
######################### | |||||
# int readbyte(FILE *f) # | |||||
######################### | |||||
.globl readbyte | |||||
readbyte: | |||||
pushl %ebp # | |||||
movl %esp,%ebp # | |||||
subl $4,%esp | |||||
movl $0,-4(%ebp) # initialize with 0 | |||||
leal -4(%ebp),%eax # put address in eax | |||||
pushl 8(%ebp) # FILE *stream | |||||
pushl $1 # nmemb | |||||
pushl $1 # size | |||||
pushl %eax # void *ptr | |||||
call fread | |||||
cmp $0,%eax # if return is 0 | |||||
je rb_e # jump to error | |||||
movl -4(%ebp),%eax # return eax | |||||
jmp rb_x # exit | |||||
rb_e: movl $-1,%eax # return -1 | |||||
jmp rb_x # exit | |||||
rb_x: movl %ebp,%esp # | |||||
popl %ebp # | |||||
ret # | |||||
############################## | |||||
# long readutf8char(FILE *f) # | |||||
############################## | |||||
.globl readutf8char | |||||
readutf8char: | |||||
pushl %ebp # | |||||
movl %esp,%ebp # | |||||
# Read the first byte into eax | |||||
pushl 8(%ebp) # f | |||||
call readbyte | |||||
# If the character is less than 0xC0, | |||||
# meaning it's just one byte, just return it. | |||||
# This also covers the case where it's -1. | |||||
cmp $0xC0,%eax # if >= 0xC0, | |||||
jge ru8_main # do the whole thing | |||||
jmp ru8_x # else return the one byte | |||||
ru8_main: | |||||
subl $16,%esp # allocate 4 ints | |||||
# -4(%ebp): unicode character | |||||
# -8(%ebp): current byte | |||||
# -12(%ebp): mask | |||||
# -16($ebp): number of bits left | |||||
movl $0,-4(%ebp) # zero out the unicode character | |||||
movl %eax,-8(%ebp) # store the current byte | |||||
cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes | |||||
jge ru8_4bytes | |||||
cmp $0xE0,%eax # >= 0xE0 means 3 bytes | |||||
jge ru8_3bytes | |||||
jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled) | |||||
ru8_4bytes: | |||||
movl $18,-16(%ebp) # 18 bits to read for 4 bytes | |||||
movl $0b00000111,-12(%ebp) # only read the lowest 3 bits | |||||
jmp ru8_loop | |||||
ru8_3bytes: | |||||
movl $12,-16(%ebp) # 12 bits to read for 3 bytes | |||||
movl $0b00001111,-12(%ebp) # only read the lowest 4 bits | |||||
jmp ru8_loop | |||||
ru8_2bytes: | |||||
movl $6,-16(%ebp) # 6 bits to read for 2 bytes | |||||
movl $0b00011111,-12(%ebp) # only read the lowest 5 bits | |||||
jmp ru8_loop | |||||
ru8_loop: | |||||
movl -8(%ebp),%eax # using eax as a temporary arithmetic register | |||||
andl -12(%ebp),%eax # AND with the mask | |||||
movl -16(%ebp),%ecx # move number of bits left to ecx for shll | |||||
shll %cl,%eax # shift ecx bits to the left | |||||
orl %eax,-4(%ebp) # OR the unicode character with eax | |||||
cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's | |||||
je ru8_done # no more bytes to read | |||||
movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask | |||||
subl $6,-16(%ebp) # we read 6 bits at a time | |||||
# read the next byte into -8(%ebp) | |||||
pushl 8(%ebp) # f | |||||
call readbyte | |||||
movl %eax,-8(%ebp) # move byte to memory | |||||
jmp ru8_loop # since we haven't exited yet, just loop | |||||
ru8_done: | |||||
movl -4(%ebp),%eax # move the unicode character to eax | |||||
# then exit | |||||
ru8_x: movl %ebp,%esp # | |||||
popl %ebp # | |||||
ret # | |||||
############################################ | |||||
# void writebyte(FILE *f, unsigned char b) # | |||||
############################################ | |||||
.globl writebyte | |||||
writebyte: | |||||
pushl %ebp # | |||||
movl %esp,%ebp # | |||||
leal 12(%esp),%eax # put address in eax | |||||
pushl 8(%esp) # FILE *stream | |||||
pushl $1 # nmemb | |||||
pushl $1 # size | |||||
pushl %eax # void *ptr | |||||
call fwrite | |||||
movl %ebp,%esp # | |||||
popl %ebp # | |||||
ret # | |||||
################################################ | |||||
# void writeutf8char(FILE *f, unsigned long u) # | |||||
################################################ | |||||
.globl writeutf8char | |||||
writeutf8char: | |||||
pushl %ebp # | |||||
movl %esp,%ebp # | |||||
movl 12(%ebp),%ecx # put u in ecx | |||||
cmp $0x0080,%ecx # if the number doesn't fit in one byte, | |||||
jge wu8_main # skip the early return version | |||||
pushl %ecx # unsigned char u | |||||
pushl 8(%ebp) # FILE *f | |||||
call writebyte | |||||
jmp wu8_x # exit | |||||
# wu8_main is responsible for jumping to the correct section depending on | |||||
# the size of the unicode character. | |||||
wu8_main: | |||||
subl $12,%esp # stack allocate 3 ints | |||||
cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes | |||||
jge wu8_4bytes | |||||
cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes | |||||
jge wu8_3bytes | |||||
jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes | |||||
# we already dealt with those which need 1 byte | |||||
# This section uses 3 stack allocated ints. | |||||
# They're here mainly for this expression: | |||||
# %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp)) | |||||
wu8_4bytes: | |||||
movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write | |||||
movl $0b11110000,-4(%ebp) # the first 4 bits should be 1 | |||||
movl $0b00000111,-12(%ebp) # only the last 3 bits is data | |||||
jmp wu8_loop | |||||
wu8_3bytes: | |||||
movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write | |||||
movl $0b11100000,-4(%ebp) # the first 3 bits should be 1 | |||||
movl $0b00001111,-12(%ebp) # only the last 4 bits is data | |||||
jmp wu8_loop | |||||
wu8_2bytes: | |||||
movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write | |||||
movl $0b11000000,-4(%ebp) # the first 2 bits should be 1 | |||||
movl $0b00011111,-12(%ebp) # only the last 3 bits is data | |||||
jmp wu8_loop | |||||
wu8_loop: | |||||
movl 12(%ebp),%eax # using eax as a temporary arithmetic register | |||||
movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl | |||||
shrl %cl,%eax # shift ecx (or rather cl) bits to the right | |||||
andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits | |||||
orl -4(%ebp),%eax # OR with ecx to set the leftmost bit | |||||
pushl %eax # unsigned char u | |||||
pushl 8(%ebp) # FILE *f | |||||
call writebyte # call | |||||
addl $8,%esp # restore esp | |||||
movl -8(%ebp),%ecx | |||||
bp: | |||||
cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's | |||||
je wu8_x # no bytes left to write | |||||
movl $63,-12(%ebp) # subsequent iterations will AND with 00111111 | |||||
movl $128,-4(%ebp) # subsequent iterations will OR with 10000000 | |||||
subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time | |||||
jmp wu8_loop # since we haven't exited yet, just loop | |||||
wu8_x: movl %ebp,%esp # | |||||
popl %ebp # | |||||
ret # |
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <wchar.h> | |||||
#define FALSE 0 | |||||
#define TRUE 1 | |||||
typedef unsigned char byte; | |||||
typedef unsigned long unicode; | |||||
extern int readbyte (FILE *f); | |||||
extern long readutf8char (FILE *f); | |||||
extern void writebyte (FILE *f, byte b); | |||||
extern void writeutf8char (FILE *f, unicode u); | |||||
void error (char *message) | |||||
{ | |||||
printf("\nERROR: %s\n", message); | |||||
exit(1); | |||||
} | |||||
void dump_byte_seq (byte b[], int n_b) | |||||
{ | |||||
int i; | |||||
printf("%d bytes {", n_b); | |||||
for (i = 0; i < n_b; i++) { | |||||
if (i > 0) printf(", "); | |||||
printf("0x%02x", b[i]); | |||||
} | |||||
printf("}"); | |||||
} | |||||
void dump_unicode_seq (unicode u[], int n_u) | |||||
{ | |||||
int i; | |||||
printf("%d chars {", n_u); | |||||
for (i = 0; i < n_u; i++) { | |||||
if (i > 0) printf(", "); | |||||
printf("0x%lx", u[i]); | |||||
} | |||||
printf("}"); | |||||
} | |||||
void compare_byte_seqs (byte a[], int n_a, byte b[], int n_b) | |||||
{ | |||||
int ok = TRUE; | |||||
if (n_a != n_b) { | |||||
ok = FALSE; | |||||
} else { | |||||
int i; | |||||
for (i = 0; i < n_a; i++) | |||||
if (a[i] != b[i]) ok = FALSE; | |||||
} | |||||
if (ok) { | |||||
printf("OK\n"); | |||||
} else { | |||||
printf("\n Error: Result is "); dump_byte_seq(a, n_a); | |||||
printf("\n but should be "); dump_byte_seq(b, n_b); printf("\n"); | |||||
} | |||||
} | |||||
void compare_unicode_seqs (unicode a[], int n_a, unicode b[], int n_b) | |||||
{ | |||||
int ok = TRUE; | |||||
if (n_a != n_b) { | |||||
ok = FALSE; | |||||
} else { | |||||
int i; | |||||
for (i = 0; i < n_a; i++) | |||||
if (a[i] != b[i]) ok = FALSE; | |||||
} | |||||
if (ok) { | |||||
printf("OK\n"); | |||||
} else { | |||||
printf("\n Error: Result is "); dump_unicode_seq(a, n_a); | |||||
printf("\n but should be "); dump_unicode_seq(b, n_b); printf("\n"); | |||||
} | |||||
} | |||||
int read_test_byte (FILE *f) | |||||
{ | |||||
int status; | |||||
byte c; | |||||
status = fread(&c, 1, 1, f); | |||||
if (status <= 0) return -1; | |||||
return (int)c; | |||||
} | |||||
void test_byte_file (char *f_name, byte data[], int n_data) | |||||
{ | |||||
byte file_bytes[200]; | |||||
int n_file_bytes; | |||||
FILE *f = fopen(f_name, "rb"); | |||||
if (f == NULL) error("Could not open file!"); | |||||
for (n_file_bytes = 0; n_file_bytes < 200; n_file_bytes++) { | |||||
int b = read_test_byte(f); | |||||
if (b < 0) break; | |||||
file_bytes[n_file_bytes] = b; | |||||
} | |||||
fclose(f); | |||||
compare_byte_seqs(file_bytes, n_file_bytes, data, n_data); | |||||
} | |||||
void create_byte_file (char *f_name, byte b_seq[], int n_b_seq) | |||||
{ | |||||
FILE *f = fopen(f_name, "wb"); | |||||
if (f == NULL) error("Could not create file!"); | |||||
fwrite(b_seq, n_b_seq, 1, f); | |||||
fclose(f); | |||||
} | |||||
/* Test #1 */ | |||||
byte b_seq_1[] = { 4, 0, 255, 17, 200 }; | |||||
void test_1 (void) | |||||
{ | |||||
int n_bytes = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||||
int i; | |||||
FILE *f = fopen("test1.txt", "wb"); | |||||
if (f == NULL) error("Could not create test1.txt!"); | |||||
for (i = 0; i < n_bytes; i++) | |||||
writebyte(f, b_seq_1[i]); | |||||
fclose(f); | |||||
test_byte_file("test1.txt", b_seq_1, n_bytes); | |||||
} | |||||
/* Test #2 */ | |||||
unicode u_seq_2[] = { 0x24, 0x20, 0x41, 0x3d, 0x32, 0x78 }; /* "$ A=2x" */ | |||||
byte b_seq_2[] = { '$', ' ', 'A', '=', '2', 'x' }; | |||||
void test_2 (void) | |||||
{ | |||||
int n_u = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||||
int n_b = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||||
int i; | |||||
FILE *f = fopen("test2.txt", "wb"); | |||||
if (f == NULL) error("Could not create test2.txt!"); | |||||
for (i = 0; i < n_u; i++) | |||||
writeutf8char(f, u_seq_2[i]); | |||||
fclose(f); | |||||
test_byte_file("test2.txt", b_seq_2, n_b); | |||||
} | |||||
/* Test #3 */ | |||||
unicode u_seq_3[] = { 0x35, 0xa2, 0x20, 0x429, 0x3c9 }; /* "5¢ Щω" */ | |||||
byte b_seq_3[] = { '5', 0xc2, 0xa2, ' ', 0xd0, 0xa9, 0xcf, 0x89 }; | |||||
void test_3 (void) | |||||
{ | |||||
int n_u = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||||
int n_b = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||||
int i; | |||||
FILE *f = fopen("test3.txt", "wb"); | |||||
if (f == NULL) error("Could not create test3.txt!"); | |||||
for (i = 0; i < n_u; i++) | |||||
writeutf8char(f, u_seq_3[i]); | |||||
fclose(f); | |||||
test_byte_file("test3.txt", b_seq_3, n_b); | |||||
} | |||||
/* Test #4 */ | |||||
unicode u_seq_4[] = { 0x20ac, 0x3d, 0x10348, 0x2658 }; /* "€=𐍈♘" */ | |||||
byte b_seq_4[] = { 0xe2, 0x82, 0xac, '=', 0xf0, 0x90, 0x8d, 0x88, | |||||
0xe2, 0x99, 0x98}; | |||||
void test_4 (void) | |||||
{ | |||||
int n_u = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||||
int n_b = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||||
int i; | |||||
FILE *f = fopen("test4.txt", "wb"); | |||||
if (f == NULL) error("Could not create test4.txt!"); | |||||
for (i = 0; i < n_u; i++) | |||||
writeutf8char(f, u_seq_4[i]); | |||||
fclose(f); | |||||
test_byte_file("test4.txt", b_seq_4, n_b); | |||||
} | |||||
/* Test #5 */ | |||||
void test_5 (void) | |||||
{ | |||||
byte data[200]; | |||||
int n_data = 0; | |||||
int n_b_seq_1 = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||||
FILE *f; | |||||
create_byte_file ("test5.txt", b_seq_1, n_b_seq_1); | |||||
f = fopen("test5.txt", "rb"); | |||||
if (f == NULL) error("Could not read test5.txt!"); | |||||
while (n_data < 200) { | |||||
int b = readbyte(f); | |||||
if (b < 0) break; | |||||
data[n_data++] = (byte)b; | |||||
} | |||||
fclose(f); | |||||
compare_byte_seqs(data, n_data, b_seq_1, n_b_seq_1); | |||||
} | |||||
/* Test #6 */ | |||||
void test_6 (void) | |||||
{ | |||||
unicode data[200]; | |||||
int n_data = 0; | |||||
int n_b_seq_2 = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||||
int n_u_seq_2 = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||||
FILE *f; | |||||
create_byte_file ("test6.txt", b_seq_2, n_b_seq_2); | |||||
f = fopen("test6.txt", "rb"); | |||||
if (f == NULL) error("Could not read test6.txt!"); | |||||
while (n_data < 200) { | |||||
long u = readutf8char(f); | |||||
if (u < 0) break; | |||||
data[n_data++] = (unicode)u; | |||||
} | |||||
fclose(f); | |||||
compare_unicode_seqs(data, n_data, u_seq_2, n_u_seq_2); | |||||
} | |||||
/* Test #7 */ | |||||
void test_7 (void) | |||||
{ | |||||
unicode data[200]; | |||||
int n_data = 0; | |||||
int n_b_seq_3 = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||||
int n_u_seq_3 = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||||
FILE *f; | |||||
create_byte_file ("test7.txt", b_seq_3, n_b_seq_3); | |||||
f = fopen("test7.txt", "rb"); | |||||
if (f == NULL) error("Could not read test7.txt!"); | |||||
while (n_data < 200) { | |||||
long u = readutf8char(f); | |||||
if (u < 0) break; | |||||
data[n_data++] = (unicode)u; | |||||
} | |||||
fclose(f); | |||||
compare_unicode_seqs(data, n_data, u_seq_3, n_u_seq_3); | |||||
} | |||||
/* Test #8 */ | |||||
void test_8 (void) | |||||
{ | |||||
unicode data[200]; | |||||
int n_data = 0; | |||||
int n_b_seq_4 = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||||
int n_u_seq_4 = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||||
FILE *f; | |||||
create_byte_file ("test8.txt", b_seq_4, n_b_seq_4); | |||||
f = fopen("test8.txt", "rb"); | |||||
if (f == NULL) error("Could not read test8.txt!"); | |||||
while (n_data < 200) { | |||||
long u = readutf8char(f); | |||||
if (u < 0) break; | |||||
data[n_data++] = (unicode)u; | |||||
} | |||||
fclose(f); | |||||
compare_unicode_seqs(data, n_data, u_seq_4, n_u_seq_4); | |||||
} | |||||
/* Main program */ | |||||
int main (void) | |||||
{ | |||||
printf("Test 1 (write a byte): "); test_1(); | |||||
printf("Test 2 (write 1-byte utf-8): "); test_2(); | |||||
printf("Test 3 (write 2-byte utf-8): "); test_3(); | |||||
printf("Test 4 (write 3+4-byte utf-8): "); test_4(); | |||||
printf("Test 5 (read a byte): "); test_5(); | |||||
printf("Test 6 (read 1-byte utf-8): "); test_6(); | |||||
printf("Test 7 (read 2-byte utf-8): "); test_7(); | |||||
printf("Test 8 (read 3+4-byte utf-8): "); test_8(); | |||||
return 0; | |||||
} |