@@ -0,0 +1,18 @@ | |||
CFLAGS=-g -m32 | |||
TESTFILES=\ | |||
test1.txt test2.txt test3.txt \ | |||
test4.txt test5.txt test6.txt \ | |||
test7.txt test8.txt | |||
test: test-oblig3.c oppgave.s | |||
gcc $(CFLAGS) -o test $^ | |||
run-test: test | |||
valgrind ./test | |||
rm -f $(TESTFILES) | |||
clean: | |||
rm -f test | |||
rm -f $(shell find . -name 'test*.txt') | |||
.PHONY: run-test clean |
@@ -0,0 +1,112 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
typedef unsigned char byte; | |||
typedef unsigned long unicode; | |||
// The minimum value of the first `char` to indicate n bytes | |||
#define C4bytes 0xF0 | |||
#define C3bytes 0xE0 | |||
#define C2bytes 0xC0 | |||
// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||
#define U4bytes 0x10000 | |||
#define U3bytes 0x0800 | |||
#define U2bytes 0x0080 | |||
void writebyte(FILE *f, byte b) | |||
{ | |||
fwrite(&b, 1, 1, f); | |||
} | |||
void writeutf8char(FILE *f, unicode u) | |||
{ | |||
if (u < U2bytes) | |||
{ | |||
writebyte(f, (byte)u); | |||
return; | |||
} | |||
int left; | |||
int a; | |||
int b; | |||
if (u >= U4bytes) | |||
{ | |||
left = 18; | |||
a = 0b11110000; | |||
b = 0b00000111; | |||
} | |||
else if (u >= U3bytes) | |||
{ | |||
left = 12; | |||
a = 0b11100000; | |||
b = 0b00001111; | |||
} | |||
else | |||
{ | |||
left = 6; | |||
a = 0b11000000; | |||
b = 0b00011111; | |||
} | |||
while (1) | |||
{ | |||
writebyte(f, a | ((u >> left) & b)); | |||
if (left == 0) | |||
return; | |||
a = 0b10000000; | |||
b = 0b00111111; | |||
left -= 6; | |||
} | |||
} | |||
int readbyte(FILE *f) | |||
{ | |||
int status; | |||
byte c; | |||
status = fread(&c, 1, 1, f); | |||
if (status <= 0) return -1; | |||
return (int)c; | |||
} | |||
long readutf8char(FILE *f) | |||
{ | |||
unicode u = 0; | |||
unicode ch = readbyte(f); | |||
if (ch == -1) | |||
return -1; | |||
if (ch < C2bytes) | |||
return ch; | |||
int left; | |||
int mask; | |||
if (ch >= C4bytes) | |||
{ | |||
left = 18; | |||
mask = 0b00000111; | |||
} | |||
else if (ch >= C3bytes) | |||
{ | |||
left = 12; | |||
mask = 0b00001111; | |||
} | |||
else | |||
{ | |||
left = 6; | |||
mask = 0b00011111; | |||
} | |||
while (1) | |||
{ | |||
u |= (ch & mask) << left; | |||
if (left == 0) | |||
return u; | |||
left -= 6; | |||
mask = 0b00111111; | |||
ch = readbyte(f); | |||
} | |||
} |
@@ -0,0 +1,185 @@ | |||
.file "fasit.c" | |||
.text | |||
.globl writebyte | |||
.type writebyte, @function | |||
writebyte: | |||
.LFB5: | |||
.cfi_startproc | |||
pushl %ebp | |||
.cfi_def_cfa_offset 8 | |||
.cfi_offset 5, -8 | |||
movl %esp, %ebp | |||
.cfi_def_cfa_register 5 | |||
subl $24, %esp | |||
movl 12(%ebp), %eax | |||
movb %al, -12(%ebp) | |||
pushl 8(%ebp) | |||
pushl $1 | |||
pushl $1 | |||
leal -12(%ebp), %eax | |||
pushl %eax | |||
call fwrite | |||
addl $16, %esp | |||
nop | |||
leave | |||
.cfi_restore 5 | |||
.cfi_def_cfa 4, 4 | |||
ret | |||
.cfi_endproc | |||
.LFE5: | |||
.size writebyte, .-writebyte | |||
.globl writeutf8char | |||
.type writeutf8char, @function | |||
writeutf8char: | |||
.LFB6: | |||
.cfi_startproc | |||
pushl %ebp | |||
.cfi_def_cfa_offset 8 | |||
.cfi_offset 5, -8 | |||
movl %esp, %ebp | |||
.cfi_def_cfa_register 5 | |||
subl $24, %esp | |||
cmpl $127, 12(%ebp) | |||
ja .L3 | |||
movl 12(%ebp), %eax | |||
movzbl %al, %eax | |||
subl $8, %esp | |||
pushl %eax | |||
pushl 8(%ebp) | |||
call writebyte | |||
addl $16, %esp | |||
jmp .L2 | |||
.L3: | |||
cmpl $65535, 12(%ebp) | |||
jbe .L5 | |||
movl $18, -12(%ebp) | |||
movl -12(%ebp), %eax | |||
movl 12(%ebp), %edx | |||
movl %eax, %ecx | |||
shrl %cl, %edx | |||
movl %edx, %eax | |||
andl $7, %eax | |||
orl $-16, %eax | |||
movzbl %al, %eax | |||
subl $8, %esp | |||
pushl %eax | |||
pushl 8(%ebp) | |||
call writebyte | |||
addl $16, %esp | |||
jmp .L6 | |||
.L5: | |||
cmpl $2047, 12(%ebp) | |||
jbe .L7 | |||
movl $12, -12(%ebp) | |||
movl -12(%ebp), %eax | |||
movl 12(%ebp), %edx | |||
movl %eax, %ecx | |||
shrl %cl, %edx | |||
movl %edx, %eax | |||
andl $15, %eax | |||
orl $-32, %eax | |||
movzbl %al, %eax | |||
subl $8, %esp | |||
pushl %eax | |||
pushl 8(%ebp) | |||
call writebyte | |||
addl $16, %esp | |||
jmp .L6 | |||
.L7: | |||
movl $6, -12(%ebp) | |||
movl -12(%ebp), %eax | |||
movl 12(%ebp), %edx | |||
movl %eax, %ecx | |||
shrl %cl, %edx | |||
movl %edx, %eax | |||
andl $31, %eax | |||
orl $-64, %eax | |||
movzbl %al, %eax | |||
subl $8, %esp | |||
pushl %eax | |||
pushl 8(%ebp) | |||
call writebyte | |||
addl $16, %esp | |||
.L6: | |||
subl $6, -12(%ebp) | |||
movl -12(%ebp), %eax | |||
movl 12(%ebp), %edx | |||
movl %eax, %ecx | |||
shrl %cl, %edx | |||
movl %edx, %eax | |||
andl $63, %eax | |||
orl $-128, %eax | |||
movzbl %al, %eax | |||
subl $8, %esp | |||
pushl %eax | |||
pushl 8(%ebp) | |||
call writebyte | |||
addl $16, %esp | |||
cmpl $0, -12(%ebp) | |||
jg .L6 | |||
.L2: | |||
leave | |||
.cfi_restore 5 | |||
.cfi_def_cfa 4, 4 | |||
ret | |||
.cfi_endproc | |||
.LFE6: | |||
.size writeutf8char, .-writeutf8char | |||
.globl readbyte | |||
.type readbyte, @function | |||
readbyte: | |||
.LFB7: | |||
.cfi_startproc | |||
pushl %ebp | |||
.cfi_def_cfa_offset 8 | |||
.cfi_offset 5, -8 | |||
movl %esp, %ebp | |||
.cfi_def_cfa_register 5 | |||
subl $24, %esp | |||
pushl 8(%ebp) | |||
pushl $1 | |||
pushl $1 | |||
leal -13(%ebp), %eax | |||
pushl %eax | |||
call fread | |||
addl $16, %esp | |||
movl %eax, -12(%ebp) | |||
cmpl $0, -12(%ebp) | |||
jg .L9 | |||
movl $-1, %eax | |||
jmp .L11 | |||
.L9: | |||
movzbl -13(%ebp), %eax | |||
movzbl %al, %eax | |||
.L11: | |||
leave | |||
.cfi_restore 5 | |||
.cfi_def_cfa 4, 4 | |||
ret | |||
.cfi_endproc | |||
.LFE7: | |||
.size readbyte, .-readbyte | |||
.globl readutf8char | |||
.type readutf8char, @function | |||
readutf8char: | |||
.LFB8: | |||
.cfi_startproc | |||
pushl %ebp | |||
.cfi_def_cfa_offset 8 | |||
.cfi_offset 5, -8 | |||
movl %esp, %ebp | |||
.cfi_def_cfa_register 5 | |||
subl $8, %esp | |||
subl $12, %esp | |||
pushl 8(%ebp) | |||
call readbyte | |||
addl $16, %esp | |||
leave | |||
.cfi_restore 5 | |||
.cfi_def_cfa 4, 4 | |||
ret | |||
.cfi_endproc | |||
.LFE8: | |||
.size readutf8char, .-readutf8char | |||
.ident "GCC: (GNU) 6.3.1 20170306" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,54 @@ | |||
.extern fread, fwrite | |||
.text | |||
.globl readbyte | |||
# Navn: readbyte | |||
# Synopsis: Leser en byte fra en binærfil. | |||
# C-signatur: int readbyte (FILE *f) | |||
# Registre: | |||
readbyte: | |||
pushl %ebp # Standard funksjonsstart | |||
movl %esp,%ebp # | |||
rb_x: popl %ebp # Standard | |||
ret # retur. | |||
.globl readutf8char | |||
# Navn: readutf8char | |||
# Synopsis: Leser et Unicode-tegn fra en binærfil. | |||
# C-signatur: long readutf8char (FILE *f) | |||
# Registre: | |||
readutf8char: | |||
pushl %ebp # Standard funksjonsstart | |||
movl %esp,%ebp # | |||
popl %ebp # Standard | |||
ret # retur. | |||
.globl writebyte | |||
# Navn: writebyte | |||
# Synopsis: Skriver en byte til en binærfil. | |||
# C-signatur: void writebyte (FILE *f, unsigned char b) | |||
# Registre: | |||
writebyte: | |||
pushl %ebp # Standard funksjonsstart | |||
movl %esp,%ebp # | |||
popl %ebp # Standard | |||
ret # retur. | |||
.globl writeutf8char | |||
# Navn: writeutf8char | |||
# Synopsis: Skriver et tegn kodet som UTF-8 til en binærfil. | |||
# C-signatur: void writeutf8char (FILE *f, unsigned long u) | |||
# Registre: | |||
writeutf8char: | |||
pushl %ebp # Standard funksjonsstart | |||
movl %esp,%ebp # | |||
wu8_x: popl %ebp # Standard | |||
ret # retur. |
@@ -0,0 +1,86 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
typedef unsigned char byte; | |||
typedef unsigned long unicode; | |||
// The minimum value of the first `char` to indicate n bytes | |||
#define C4bytes 0xF0 | |||
#define C3bytes 0xE0 | |||
#define C2bytes 0xC0 | |||
// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes | |||
#define U4bytes 0x10000 | |||
#define U3bytes 0x0800 | |||
#define U2bytes 0x0080 | |||
void writebyte(FILE *f, byte b) | |||
{ | |||
fwrite(&b, 1, 1, f); | |||
} | |||
void writeutf8char(FILE *f, unicode u) | |||
{ | |||
if (u >= U4bytes) | |||
{ | |||
writebyte(f, 0b11110000 | ((u >> 18) & 0b00000111)); | |||
writebyte(f, 0b10000000 | ((u >> 12) & 0b00111111)); | |||
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
} | |||
else if (u >= U3bytes) | |||
{ | |||
writebyte(f, 0b11100000 | ((u >> 12) & 0b00001111)); | |||
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111)); | |||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
} | |||
else if (u >= U2bytes) | |||
{ | |||
writebyte(f, 0b11000000 | ((u >> 6) & 0b00011111)); | |||
writebyte(f, 0b10000000 | (u & 0b00111111)); | |||
} | |||
else | |||
{ | |||
writebyte(f, u); | |||
} | |||
} | |||
int readbyte(FILE *f) | |||
{ | |||
int status; | |||
byte c; | |||
status = fread(&c, 1, 1, f); | |||
if (status <= 0) return -1; | |||
return (int)c; | |||
} | |||
long readutf8char(FILE *f) | |||
{ | |||
byte first = readbyte(f); | |||
unicode u = 0; | |||
if (first >= C4bytes) | |||
{ | |||
u |= (first & 0b00000111) << 18; | |||
u |= (readbyte(f) & 0b00111111) << 12; | |||
u |= (readbyte(f) & 0b00111111) << 6; | |||
u |= readbyte(f) & 0b00111111; | |||
} | |||
else if (first >= C3bytes) | |||
{ | |||
u |= (first & 0b00001111) << 12; | |||
u |= (readbyte(f) & 0b00111111) << 6; | |||
u |= readbyte(f) & 0b00111111; | |||
} | |||
else if (first >= C2bytes) | |||
{ | |||
u |= (first & 0b00011111) << 6; | |||
u |= readbyte(f) & 0b00111111; | |||
} | |||
else | |||
{ | |||
u |= first; | |||
} | |||
return u; | |||
} |
@@ -0,0 +1,210 @@ | |||
.extern fread, fwrite | |||
.text | |||
######################### | |||
# int readbyte(FILE *f) # | |||
######################### | |||
.globl readbyte | |||
readbyte: | |||
pushl %ebp # | |||
movl %esp,%ebp # | |||
subl $4,%esp | |||
movl $0,-4(%ebp) # initialize with 0 | |||
leal -4(%ebp),%eax # put address in eax | |||
pushl 8(%ebp) # FILE *stream | |||
pushl $1 # nmemb | |||
pushl $1 # size | |||
pushl %eax # void *ptr | |||
call fread | |||
cmp $0,%eax # if return is 0 | |||
je rb_e # jump to error | |||
movl -4(%ebp),%eax # return eax | |||
jmp rb_x # exit | |||
rb_e: movl $-1,%eax # return -1 | |||
jmp rb_x # exit | |||
rb_x: movl %ebp,%esp # | |||
popl %ebp # | |||
ret # | |||
############################## | |||
# long readutf8char(FILE *f) # | |||
############################## | |||
.globl readutf8char | |||
readutf8char: | |||
pushl %ebp # | |||
movl %esp,%ebp # | |||
# Read the first byte into eax | |||
pushl 8(%ebp) # f | |||
call readbyte | |||
# If the character is less than 0xC0, | |||
# meaning it's just one byte, just return it. | |||
# This also covers the case where it's -1. | |||
cmp $0xC0,%eax # if >= 0xC0, | |||
jge ru8_main # do the whole thing | |||
jmp ru8_x # else return the one byte | |||
ru8_main: | |||
subl $16,%esp # allocate 4 ints | |||
# -4(%ebp): unicode character | |||
# -8(%ebp): current byte | |||
# -12(%ebp): mask | |||
# -16($ebp): number of bits left | |||
movl $0,-4(%ebp) # zero out the unicode character | |||
movl %eax,-8(%ebp) # store the current byte | |||
cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes | |||
jge ru8_4bytes | |||
cmp $0xE0,%eax # >= 0xE0 means 3 bytes | |||
jge ru8_3bytes | |||
jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled) | |||
ru8_4bytes: | |||
movl $18,-16(%ebp) # 18 bits to read for 4 bytes | |||
movl $0b00000111,-12(%ebp) # only read the lowest 3 bits | |||
jmp ru8_loop | |||
ru8_3bytes: | |||
movl $12,-16(%ebp) # 12 bits to read for 3 bytes | |||
movl $0b00001111,-12(%ebp) # only read the lowest 4 bits | |||
jmp ru8_loop | |||
ru8_2bytes: | |||
movl $6,-16(%ebp) # 6 bits to read for 2 bytes | |||
movl $0b00011111,-12(%ebp) # only read the lowest 5 bits | |||
jmp ru8_loop | |||
ru8_loop: | |||
movl -8(%ebp),%eax # using eax as a temporary arithmetic register | |||
andl -12(%ebp),%eax # AND with the mask | |||
movl -16(%ebp),%ecx # move number of bits left to ecx for shll | |||
shll %cl,%eax # shift ecx bits to the left | |||
orl %eax,-4(%ebp) # OR the unicode character with eax | |||
cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's | |||
je ru8_done # no more bytes to read | |||
movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask | |||
subl $6,-16(%ebp) # we read 6 bits at a time | |||
# read the next byte into -8(%ebp) | |||
pushl 8(%ebp) # f | |||
call readbyte | |||
movl %eax,-8(%ebp) # move byte to memory | |||
jmp ru8_loop # since we haven't exited yet, just loop | |||
ru8_done: | |||
movl -4(%ebp),%eax # move the unicode character to eax | |||
# then exit | |||
ru8_x: movl %ebp,%esp # | |||
popl %ebp # | |||
ret # | |||
############################################ | |||
# void writebyte(FILE *f, unsigned char b) # | |||
############################################ | |||
.globl writebyte | |||
writebyte: | |||
pushl %ebp # | |||
movl %esp,%ebp # | |||
leal 12(%esp),%eax # put address in eax | |||
pushl 8(%esp) # FILE *stream | |||
pushl $1 # nmemb | |||
pushl $1 # size | |||
pushl %eax # void *ptr | |||
call fwrite | |||
movl %ebp,%esp # | |||
popl %ebp # | |||
ret # | |||
################################################ | |||
# void writeutf8char(FILE *f, unsigned long u) # | |||
################################################ | |||
.globl writeutf8char | |||
writeutf8char: | |||
pushl %ebp # | |||
movl %esp,%ebp # | |||
movl 12(%ebp),%ecx # put u in ecx | |||
cmp $0x0080,%ecx # if the number doesn't fit in one byte, | |||
jge wu8_main # skip the early return version | |||
pushl %ecx # unsigned char u | |||
pushl 8(%ebp) # FILE *f | |||
call writebyte | |||
jmp wu8_x # exit | |||
# wu8_main is responsible for jumping to the correct section depending on | |||
# the size of the unicode character. | |||
wu8_main: | |||
subl $12,%esp # stack allocate 3 ints | |||
cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes | |||
jge wu8_4bytes | |||
cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes | |||
jge wu8_3bytes | |||
jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes | |||
# we already dealt with those which need 1 byte | |||
# This section uses 3 stack allocated ints. | |||
# They're here mainly for this expression: | |||
# %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp)) | |||
wu8_4bytes: | |||
movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write | |||
movl $0b11110000,-4(%ebp) # the first 4 bits should be 1 | |||
movl $0b00000111,-12(%ebp) # only the last 3 bits is data | |||
jmp wu8_loop | |||
wu8_3bytes: | |||
movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write | |||
movl $0b11100000,-4(%ebp) # the first 3 bits should be 1 | |||
movl $0b00001111,-12(%ebp) # only the last 4 bits is data | |||
jmp wu8_loop | |||
wu8_2bytes: | |||
movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write | |||
movl $0b11000000,-4(%ebp) # the first 2 bits should be 1 | |||
movl $0b00011111,-12(%ebp) # only the last 3 bits is data | |||
jmp wu8_loop | |||
wu8_loop: | |||
movl 12(%ebp),%eax # using eax as a temporary arithmetic register | |||
movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl | |||
shrl %cl,%eax # shift ecx (or rather cl) bits to the right | |||
andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits | |||
orl -4(%ebp),%eax # OR with ecx to set the leftmost bit | |||
pushl %eax # unsigned char u | |||
pushl 8(%ebp) # FILE *f | |||
call writebyte # call | |||
addl $8,%esp # restore esp | |||
movl -8(%ebp),%ecx | |||
bp: | |||
cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's | |||
je wu8_x # no bytes left to write | |||
movl $63,-12(%ebp) # subsequent iterations will AND with 00111111 | |||
movl $128,-4(%ebp) # subsequent iterations will OR with 10000000 | |||
subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time | |||
jmp wu8_loop # since we haven't exited yet, just loop | |||
wu8_x: movl %ebp,%esp # | |||
popl %ebp # | |||
ret # |
@@ -0,0 +1,303 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <wchar.h> | |||
#define FALSE 0 | |||
#define TRUE 1 | |||
typedef unsigned char byte; | |||
typedef unsigned long unicode; | |||
extern int readbyte (FILE *f); | |||
extern long readutf8char (FILE *f); | |||
extern void writebyte (FILE *f, byte b); | |||
extern void writeutf8char (FILE *f, unicode u); | |||
void error (char *message) | |||
{ | |||
printf("\nERROR: %s\n", message); | |||
exit(1); | |||
} | |||
void dump_byte_seq (byte b[], int n_b) | |||
{ | |||
int i; | |||
printf("%d bytes {", n_b); | |||
for (i = 0; i < n_b; i++) { | |||
if (i > 0) printf(", "); | |||
printf("0x%02x", b[i]); | |||
} | |||
printf("}"); | |||
} | |||
void dump_unicode_seq (unicode u[], int n_u) | |||
{ | |||
int i; | |||
printf("%d chars {", n_u); | |||
for (i = 0; i < n_u; i++) { | |||
if (i > 0) printf(", "); | |||
printf("0x%lx", u[i]); | |||
} | |||
printf("}"); | |||
} | |||
void compare_byte_seqs (byte a[], int n_a, byte b[], int n_b) | |||
{ | |||
int ok = TRUE; | |||
if (n_a != n_b) { | |||
ok = FALSE; | |||
} else { | |||
int i; | |||
for (i = 0; i < n_a; i++) | |||
if (a[i] != b[i]) ok = FALSE; | |||
} | |||
if (ok) { | |||
printf("OK\n"); | |||
} else { | |||
printf("\n Error: Result is "); dump_byte_seq(a, n_a); | |||
printf("\n but should be "); dump_byte_seq(b, n_b); printf("\n"); | |||
} | |||
} | |||
void compare_unicode_seqs (unicode a[], int n_a, unicode b[], int n_b) | |||
{ | |||
int ok = TRUE; | |||
if (n_a != n_b) { | |||
ok = FALSE; | |||
} else { | |||
int i; | |||
for (i = 0; i < n_a; i++) | |||
if (a[i] != b[i]) ok = FALSE; | |||
} | |||
if (ok) { | |||
printf("OK\n"); | |||
} else { | |||
printf("\n Error: Result is "); dump_unicode_seq(a, n_a); | |||
printf("\n but should be "); dump_unicode_seq(b, n_b); printf("\n"); | |||
} | |||
} | |||
int read_test_byte (FILE *f) | |||
{ | |||
int status; | |||
byte c; | |||
status = fread(&c, 1, 1, f); | |||
if (status <= 0) return -1; | |||
return (int)c; | |||
} | |||
void test_byte_file (char *f_name, byte data[], int n_data) | |||
{ | |||
byte file_bytes[200]; | |||
int n_file_bytes; | |||
FILE *f = fopen(f_name, "rb"); | |||
if (f == NULL) error("Could not open file!"); | |||
for (n_file_bytes = 0; n_file_bytes < 200; n_file_bytes++) { | |||
int b = read_test_byte(f); | |||
if (b < 0) break; | |||
file_bytes[n_file_bytes] = b; | |||
} | |||
fclose(f); | |||
compare_byte_seqs(file_bytes, n_file_bytes, data, n_data); | |||
} | |||
void create_byte_file (char *f_name, byte b_seq[], int n_b_seq) | |||
{ | |||
FILE *f = fopen(f_name, "wb"); | |||
if (f == NULL) error("Could not create file!"); | |||
fwrite(b_seq, n_b_seq, 1, f); | |||
fclose(f); | |||
} | |||
/* Test #1 */ | |||
byte b_seq_1[] = { 4, 0, 255, 17, 200 }; | |||
void test_1 (void) | |||
{ | |||
int n_bytes = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||
int i; | |||
FILE *f = fopen("test1.txt", "wb"); | |||
if (f == NULL) error("Could not create test1.txt!"); | |||
for (i = 0; i < n_bytes; i++) | |||
writebyte(f, b_seq_1[i]); | |||
fclose(f); | |||
test_byte_file("test1.txt", b_seq_1, n_bytes); | |||
} | |||
/* Test #2 */ | |||
unicode u_seq_2[] = { 0x24, 0x20, 0x41, 0x3d, 0x32, 0x78 }; /* "$ A=2x" */ | |||
byte b_seq_2[] = { '$', ' ', 'A', '=', '2', 'x' }; | |||
void test_2 (void) | |||
{ | |||
int n_u = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||
int n_b = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||
int i; | |||
FILE *f = fopen("test2.txt", "wb"); | |||
if (f == NULL) error("Could not create test2.txt!"); | |||
for (i = 0; i < n_u; i++) | |||
writeutf8char(f, u_seq_2[i]); | |||
fclose(f); | |||
test_byte_file("test2.txt", b_seq_2, n_b); | |||
} | |||
/* Test #3 */ | |||
unicode u_seq_3[] = { 0x35, 0xa2, 0x20, 0x429, 0x3c9 }; /* "5¢ Щω" */ | |||
byte b_seq_3[] = { '5', 0xc2, 0xa2, ' ', 0xd0, 0xa9, 0xcf, 0x89 }; | |||
void test_3 (void) | |||
{ | |||
int n_u = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||
int n_b = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||
int i; | |||
FILE *f = fopen("test3.txt", "wb"); | |||
if (f == NULL) error("Could not create test3.txt!"); | |||
for (i = 0; i < n_u; i++) | |||
writeutf8char(f, u_seq_3[i]); | |||
fclose(f); | |||
test_byte_file("test3.txt", b_seq_3, n_b); | |||
} | |||
/* Test #4 */ | |||
unicode u_seq_4[] = { 0x20ac, 0x3d, 0x10348, 0x2658 }; /* "€=𐍈♘" */ | |||
byte b_seq_4[] = { 0xe2, 0x82, 0xac, '=', 0xf0, 0x90, 0x8d, 0x88, | |||
0xe2, 0x99, 0x98}; | |||
void test_4 (void) | |||
{ | |||
int n_u = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||
int n_b = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||
int i; | |||
FILE *f = fopen("test4.txt", "wb"); | |||
if (f == NULL) error("Could not create test4.txt!"); | |||
for (i = 0; i < n_u; i++) | |||
writeutf8char(f, u_seq_4[i]); | |||
fclose(f); | |||
test_byte_file("test4.txt", b_seq_4, n_b); | |||
} | |||
/* Test #5 */ | |||
void test_5 (void) | |||
{ | |||
byte data[200]; | |||
int n_data = 0; | |||
int n_b_seq_1 = sizeof(b_seq_1)/sizeof(b_seq_1[0]); | |||
FILE *f; | |||
create_byte_file ("test5.txt", b_seq_1, n_b_seq_1); | |||
f = fopen("test5.txt", "rb"); | |||
if (f == NULL) error("Could not read test5.txt!"); | |||
while (n_data < 200) { | |||
int b = readbyte(f); | |||
if (b < 0) break; | |||
data[n_data++] = (byte)b; | |||
} | |||
fclose(f); | |||
compare_byte_seqs(data, n_data, b_seq_1, n_b_seq_1); | |||
} | |||
/* Test #6 */ | |||
void test_6 (void) | |||
{ | |||
unicode data[200]; | |||
int n_data = 0; | |||
int n_b_seq_2 = sizeof(b_seq_2)/sizeof(b_seq_2[0]); | |||
int n_u_seq_2 = sizeof(u_seq_2)/sizeof(u_seq_2[0]); | |||
FILE *f; | |||
create_byte_file ("test6.txt", b_seq_2, n_b_seq_2); | |||
f = fopen("test6.txt", "rb"); | |||
if (f == NULL) error("Could not read test6.txt!"); | |||
while (n_data < 200) { | |||
long u = readutf8char(f); | |||
if (u < 0) break; | |||
data[n_data++] = (unicode)u; | |||
} | |||
fclose(f); | |||
compare_unicode_seqs(data, n_data, u_seq_2, n_u_seq_2); | |||
} | |||
/* Test #7 */ | |||
void test_7 (void) | |||
{ | |||
unicode data[200]; | |||
int n_data = 0; | |||
int n_b_seq_3 = sizeof(b_seq_3)/sizeof(b_seq_3[0]); | |||
int n_u_seq_3 = sizeof(u_seq_3)/sizeof(u_seq_3[0]); | |||
FILE *f; | |||
create_byte_file ("test7.txt", b_seq_3, n_b_seq_3); | |||
f = fopen("test7.txt", "rb"); | |||
if (f == NULL) error("Could not read test7.txt!"); | |||
while (n_data < 200) { | |||
long u = readutf8char(f); | |||
if (u < 0) break; | |||
data[n_data++] = (unicode)u; | |||
} | |||
fclose(f); | |||
compare_unicode_seqs(data, n_data, u_seq_3, n_u_seq_3); | |||
} | |||
/* Test #8 */ | |||
void test_8 (void) | |||
{ | |||
unicode data[200]; | |||
int n_data = 0; | |||
int n_b_seq_4 = sizeof(b_seq_4)/sizeof(b_seq_4[0]); | |||
int n_u_seq_4 = sizeof(u_seq_4)/sizeof(u_seq_4[0]); | |||
FILE *f; | |||
create_byte_file ("test8.txt", b_seq_4, n_b_seq_4); | |||
f = fopen("test8.txt", "rb"); | |||
if (f == NULL) error("Could not read test8.txt!"); | |||
while (n_data < 200) { | |||
long u = readutf8char(f); | |||
if (u < 0) break; | |||
data[n_data++] = (unicode)u; | |||
} | |||
fclose(f); | |||
compare_unicode_seqs(data, n_data, u_seq_4, n_u_seq_4); | |||
} | |||
/* Main program */ | |||
int main (void) | |||
{ | |||
printf("Test 1 (write a byte): "); test_1(); | |||
printf("Test 2 (write 1-byte utf-8): "); test_2(); | |||
printf("Test 3 (write 2-byte utf-8): "); test_3(); | |||
printf("Test 4 (write 3+4-byte utf-8): "); test_4(); | |||
printf("Test 5 (read a byte): "); test_5(); | |||
printf("Test 6 (read 1-byte utf-8): "); test_6(); | |||
printf("Test 7 (read 2-byte utf-8): "); test_7(); | |||
printf("Test 8 (read 3+4-byte utf-8): "); test_8(); | |||
return 0; | |||
} |