Browse Source

reading and writing UTF-8 character works

master
mortie 7 years ago
parent
commit
396ee00b33
7 changed files with 968 additions and 0 deletions
  1. 18
    0
      inf2270/hw3/Makefile
  2. 112
    0
      inf2270/hw3/fasit.c
  3. 185
    0
      inf2270/hw3/fasit.s
  4. 54
    0
      inf2270/hw3/oblig3-basis.s
  5. 86
    0
      inf2270/hw3/oppgave.c
  6. 210
    0
      inf2270/hw3/oppgave.s
  7. 303
    0
      inf2270/hw3/test-oblig3.c

+ 18
- 0
inf2270/hw3/Makefile View File

@@ -0,0 +1,18 @@
CFLAGS=-g -m32
TESTFILES=\
test1.txt test2.txt test3.txt \
test4.txt test5.txt test6.txt \
test7.txt test8.txt

test: test-oblig3.c oppgave.s
gcc $(CFLAGS) -o test $^

run-test: test
valgrind ./test
rm -f $(TESTFILES)

clean:
rm -f test
rm -f $(shell find . -name 'test*.txt')

.PHONY: run-test clean

+ 112
- 0
inf2270/hw3/fasit.c View File

@@ -0,0 +1,112 @@
#include <stdio.h>
#include <stdlib.h>

typedef unsigned char byte;
typedef unsigned long unicode;

// The minimum value of the first `char` to indicate n bytes
#define C4bytes 0xF0
#define C3bytes 0xE0
#define C2bytes 0xC0

// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes
#define U4bytes 0x10000
#define U3bytes 0x0800
#define U2bytes 0x0080

void writebyte(FILE *f, byte b)
{
fwrite(&b, 1, 1, f);
}

void writeutf8char(FILE *f, unicode u)
{
if (u < U2bytes)
{
writebyte(f, (byte)u);
return;
}

int left;
int a;
int b;
if (u >= U4bytes)
{
left = 18;
a = 0b11110000;
b = 0b00000111;
}
else if (u >= U3bytes)
{
left = 12;
a = 0b11100000;
b = 0b00001111;
}
else
{
left = 6;
a = 0b11000000;
b = 0b00011111;
}

while (1)
{
writebyte(f, a | ((u >> left) & b));
if (left == 0)
return;
a = 0b10000000;
b = 0b00111111;
left -= 6;
}
}

int readbyte(FILE *f)
{
int status;
byte c;
status = fread(&c, 1, 1, f);
if (status <= 0) return -1;
return (int)c;
}

long readutf8char(FILE *f)
{
unicode u = 0;
unicode ch = readbyte(f);

if (ch == -1)
return -1;

if (ch < C2bytes)
return ch;

int left;
int mask;
if (ch >= C4bytes)
{
left = 18;
mask = 0b00000111;
}
else if (ch >= C3bytes)
{
left = 12;
mask = 0b00001111;
}
else
{
left = 6;
mask = 0b00011111;
}

while (1)
{
u |= (ch & mask) << left;

if (left == 0)
return u;

left -= 6;
mask = 0b00111111;
ch = readbyte(f);
}
}

+ 185
- 0
inf2270/hw3/fasit.s View File

@@ -0,0 +1,185 @@
.file "fasit.c"
.text
.globl writebyte
.type writebyte, @function
writebyte:
.LFB5:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $24, %esp
movl 12(%ebp), %eax
movb %al, -12(%ebp)
pushl 8(%ebp)
pushl $1
pushl $1
leal -12(%ebp), %eax
pushl %eax
call fwrite
addl $16, %esp
nop
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE5:
.size writebyte, .-writebyte
.globl writeutf8char
.type writeutf8char, @function
writeutf8char:
.LFB6:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $24, %esp
cmpl $127, 12(%ebp)
ja .L3
movl 12(%ebp), %eax
movzbl %al, %eax
subl $8, %esp
pushl %eax
pushl 8(%ebp)
call writebyte
addl $16, %esp
jmp .L2
.L3:
cmpl $65535, 12(%ebp)
jbe .L5
movl $18, -12(%ebp)
movl -12(%ebp), %eax
movl 12(%ebp), %edx
movl %eax, %ecx
shrl %cl, %edx
movl %edx, %eax
andl $7, %eax
orl $-16, %eax
movzbl %al, %eax
subl $8, %esp
pushl %eax
pushl 8(%ebp)
call writebyte
addl $16, %esp
jmp .L6
.L5:
cmpl $2047, 12(%ebp)
jbe .L7
movl $12, -12(%ebp)
movl -12(%ebp), %eax
movl 12(%ebp), %edx
movl %eax, %ecx
shrl %cl, %edx
movl %edx, %eax
andl $15, %eax
orl $-32, %eax
movzbl %al, %eax
subl $8, %esp
pushl %eax
pushl 8(%ebp)
call writebyte
addl $16, %esp
jmp .L6
.L7:
movl $6, -12(%ebp)
movl -12(%ebp), %eax
movl 12(%ebp), %edx
movl %eax, %ecx
shrl %cl, %edx
movl %edx, %eax
andl $31, %eax
orl $-64, %eax
movzbl %al, %eax
subl $8, %esp
pushl %eax
pushl 8(%ebp)
call writebyte
addl $16, %esp
.L6:
subl $6, -12(%ebp)
movl -12(%ebp), %eax
movl 12(%ebp), %edx
movl %eax, %ecx
shrl %cl, %edx
movl %edx, %eax
andl $63, %eax
orl $-128, %eax
movzbl %al, %eax
subl $8, %esp
pushl %eax
pushl 8(%ebp)
call writebyte
addl $16, %esp
cmpl $0, -12(%ebp)
jg .L6
.L2:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE6:
.size writeutf8char, .-writeutf8char
.globl readbyte
.type readbyte, @function
readbyte:
.LFB7:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $24, %esp
pushl 8(%ebp)
pushl $1
pushl $1
leal -13(%ebp), %eax
pushl %eax
call fread
addl $16, %esp
movl %eax, -12(%ebp)
cmpl $0, -12(%ebp)
jg .L9
movl $-1, %eax
jmp .L11
.L9:
movzbl -13(%ebp), %eax
movzbl %al, %eax
.L11:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE7:
.size readbyte, .-readbyte
.globl readutf8char
.type readutf8char, @function
readutf8char:
.LFB8:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $8, %esp
subl $12, %esp
pushl 8(%ebp)
call readbyte
addl $16, %esp
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE8:
.size readutf8char, .-readutf8char
.ident "GCC: (GNU) 6.3.1 20170306"
.section .note.GNU-stack,"",@progbits

+ 54
- 0
inf2270/hw3/oblig3-basis.s View File

@@ -0,0 +1,54 @@
.extern fread, fwrite

.text
.globl readbyte
# Navn: readbyte
# Synopsis: Leser en byte fra en binærfil.
# C-signatur: int readbyte (FILE *f)
# Registre:
readbyte:
pushl %ebp # Standard funksjonsstart
movl %esp,%ebp #

rb_x: popl %ebp # Standard
ret # retur.

.globl readutf8char
# Navn: readutf8char
# Synopsis: Leser et Unicode-tegn fra en binærfil.
# C-signatur: long readutf8char (FILE *f)
# Registre:
readutf8char:
pushl %ebp # Standard funksjonsstart
movl %esp,%ebp #

popl %ebp # Standard
ret # retur.

.globl writebyte
# Navn: writebyte
# Synopsis: Skriver en byte til en binærfil.
# C-signatur: void writebyte (FILE *f, unsigned char b)
# Registre:
writebyte:
pushl %ebp # Standard funksjonsstart
movl %esp,%ebp #

popl %ebp # Standard
ret # retur.

.globl writeutf8char
# Navn: writeutf8char
# Synopsis: Skriver et tegn kodet som UTF-8 til en binærfil.
# C-signatur: void writeutf8char (FILE *f, unsigned long u)
# Registre:
writeutf8char:
pushl %ebp # Standard funksjonsstart
movl %esp,%ebp #

wu8_x: popl %ebp # Standard
ret # retur.

+ 86
- 0
inf2270/hw3/oppgave.c View File

@@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>

typedef unsigned char byte;
typedef unsigned long unicode;

// The minimum value of the first `char` to indicate n bytes
#define C4bytes 0xF0
#define C3bytes 0xE0
#define C2bytes 0xC0

// The minimum value for the vm_var_char for the utf8 equivalent to be n bytes
#define U4bytes 0x10000
#define U3bytes 0x0800
#define U2bytes 0x0080

void writebyte(FILE *f, byte b)
{
fwrite(&b, 1, 1, f);
}

void writeutf8char(FILE *f, unicode u)
{
if (u >= U4bytes)
{
writebyte(f, 0b11110000 | ((u >> 18) & 0b00000111));
writebyte(f, 0b10000000 | ((u >> 12) & 0b00111111));
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111));
writebyte(f, 0b10000000 | (u & 0b00111111));
}
else if (u >= U3bytes)
{
writebyte(f, 0b11100000 | ((u >> 12) & 0b00001111));
writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111));
writebyte(f, 0b10000000 | (u & 0b00111111));
}
else if (u >= U2bytes)
{
writebyte(f, 0b11000000 | ((u >> 6) & 0b00011111));
writebyte(f, 0b10000000 | (u & 0b00111111));
}
else
{
writebyte(f, u);
}
}

int readbyte(FILE *f)
{
int status;
byte c;
status = fread(&c, 1, 1, f);
if (status <= 0) return -1;
return (int)c;
}

long readutf8char(FILE *f)
{
byte first = readbyte(f);
unicode u = 0;

if (first >= C4bytes)
{
u |= (first & 0b00000111) << 18;
u |= (readbyte(f) & 0b00111111) << 12;
u |= (readbyte(f) & 0b00111111) << 6;
u |= readbyte(f) & 0b00111111;
}
else if (first >= C3bytes)
{
u |= (first & 0b00001111) << 12;
u |= (readbyte(f) & 0b00111111) << 6;
u |= readbyte(f) & 0b00111111;
}
else if (first >= C2bytes)
{
u |= (first & 0b00011111) << 6;
u |= readbyte(f) & 0b00111111;
}
else
{
u |= first;
}

return u;
}

+ 210
- 0
inf2270/hw3/oppgave.s View File

@@ -0,0 +1,210 @@
.extern fread, fwrite

.text

#########################
# int readbyte(FILE *f) #
#########################
.globl readbyte
readbyte:
pushl %ebp #
movl %esp,%ebp #

subl $4,%esp
movl $0,-4(%ebp) # initialize with 0
leal -4(%ebp),%eax # put address in eax
pushl 8(%ebp) # FILE *stream
pushl $1 # nmemb
pushl $1 # size
pushl %eax # void *ptr
call fread

cmp $0,%eax # if return is 0
je rb_e # jump to error
movl -4(%ebp),%eax # return eax
jmp rb_x # exit

rb_e: movl $-1,%eax # return -1
jmp rb_x # exit

rb_x: movl %ebp,%esp #
popl %ebp #
ret #

##############################
# long readutf8char(FILE *f) #
##############################
.globl readutf8char
readutf8char:
pushl %ebp #
movl %esp,%ebp #

# Read the first byte into eax
pushl 8(%ebp) # f
call readbyte

# If the character is less than 0xC0,
# meaning it's just one byte, just return it.
# This also covers the case where it's -1.

cmp $0xC0,%eax # if >= 0xC0,
jge ru8_main # do the whole thing
jmp ru8_x # else return the one byte

ru8_main:
subl $16,%esp # allocate 4 ints
# -4(%ebp): unicode character
# -8(%ebp): current byte
# -12(%ebp): mask
# -16($ebp): number of bits left

movl $0,-4(%ebp) # zero out the unicode character
movl %eax,-8(%ebp) # store the current byte

cmp $0xF0,%eax # first byte is >= 0xF0 means 4 bytes
jge ru8_4bytes
cmp $0xE0,%eax # >= 0xE0 means 3 bytes
jge ru8_3bytes
jmp ru8_2bytes # else, 2 bytes (as 1 byte is already handled)

ru8_4bytes:
movl $18,-16(%ebp) # 18 bits to read for 4 bytes
movl $0b00000111,-12(%ebp) # only read the lowest 3 bits
jmp ru8_loop

ru8_3bytes:
movl $12,-16(%ebp) # 12 bits to read for 3 bytes
movl $0b00001111,-12(%ebp) # only read the lowest 4 bits
jmp ru8_loop

ru8_2bytes:
movl $6,-16(%ebp) # 6 bits to read for 2 bytes
movl $0b00011111,-12(%ebp) # only read the lowest 5 bits
jmp ru8_loop

ru8_loop:
movl -8(%ebp),%eax # using eax as a temporary arithmetic register
andl -12(%ebp),%eax # AND with the mask
movl -16(%ebp),%ecx # move number of bits left to ecx for shll
shll %cl,%eax # shift ecx bits to the left
orl %eax,-4(%ebp) # OR the unicode character with eax

cmp $0,-16(%ebp) # end if -8(%ebp) is 0, as that means there's
je ru8_done # no more bytes to read

movl $63,-12(%ebp) # subsequent iterations will use 00111111 as mask
subl $6,-16(%ebp) # we read 6 bits at a time

# read the next byte into -8(%ebp)
pushl 8(%ebp) # f
call readbyte
movl %eax,-8(%ebp) # move byte to memory

jmp ru8_loop # since we haven't exited yet, just loop

ru8_done:
movl -4(%ebp),%eax # move the unicode character to eax
# then exit

ru8_x: movl %ebp,%esp #
popl %ebp #
ret #

############################################
# void writebyte(FILE *f, unsigned char b) #
############################################
.globl writebyte
writebyte:
pushl %ebp #
movl %esp,%ebp #

leal 12(%esp),%eax # put address in eax
pushl 8(%esp) # FILE *stream
pushl $1 # nmemb
pushl $1 # size
pushl %eax # void *ptr
call fwrite

movl %ebp,%esp #
popl %ebp #
ret #

################################################
# void writeutf8char(FILE *f, unsigned long u) #
################################################
.globl writeutf8char
writeutf8char:
pushl %ebp #
movl %esp,%ebp #

movl 12(%ebp),%ecx # put u in ecx

cmp $0x0080,%ecx # if the number doesn't fit in one byte,
jge wu8_main # skip the early return version

pushl %ecx # unsigned char u
pushl 8(%ebp) # FILE *f
call writebyte
jmp wu8_x # exit

# wu8_main is responsible for jumping to the correct section depending on
# the size of the unicode character.
wu8_main:
subl $12,%esp # stack allocate 3 ints

cmp $0x10000,%ecx # numbers >= 0x10000 need 4 bytes
jge wu8_4bytes
cmp $0x0800,%ecx # numbers >= 0x0800 need 3 bytes
jge wu8_3bytes
jmp wu8_2bytes # numbers >= 0x0080 need 2 bytes
# we already dealt with those which need 1 byte

# This section uses 3 stack allocated ints.
# They're here mainly for this expression:
# %-4(%ebp) | ((u >> %-8(%ebp)) & %-12(%ebp))

wu8_4bytes:
movl $18,-8(%ebp) # 4 bytes means there are 18 bits to write
movl $0b11110000,-4(%ebp) # the first 4 bits should be 1
movl $0b00000111,-12(%ebp) # only the last 3 bits is data
jmp wu8_loop

wu8_3bytes:
movl $12,-8(%ebp) # 3 bytes means there are 12 bits to write
movl $0b11100000,-4(%ebp) # the first 3 bits should be 1
movl $0b00001111,-12(%ebp) # only the last 4 bits is data
jmp wu8_loop

wu8_2bytes:
movl $6,-8(%ebp) # 2 bytes means there are 6 bits to write
movl $0b11000000,-4(%ebp) # the first 2 bits should be 1
movl $0b00011111,-12(%ebp) # only the last 3 bits is data
jmp wu8_loop

wu8_loop:
movl 12(%ebp),%eax # using eax as a temporary arithmetic register
movl -8(%ebp),%ecx # move -8(%ebp) to %ecx for use with shrl
shrl %cl,%eax # shift ecx (or rather cl) bits to the right
andl -12(%ebp),%eax # AND with -4(%ebp) to remove the upper two bits
orl -4(%ebp),%eax # OR with ecx to set the leftmost bit

pushl %eax # unsigned char u
pushl 8(%ebp) # FILE *f
call writebyte # call
addl $8,%esp # restore esp

movl -8(%ebp),%ecx
bp:
cmp $0,-8(%ebp) # end if -8(%ebp) is 0, as that means there's
je wu8_x # no bytes left to write

movl $63,-12(%ebp) # subsequent iterations will AND with 00111111
movl $128,-4(%ebp) # subsequent iterations will OR with 10000000

subl $6,-8(%ebp) # remove 6 from -8(%ebp), as we write 6 bits at a time

jmp wu8_loop # since we haven't exited yet, just loop

wu8_x: movl %ebp,%esp #
popl %ebp #
ret #

+ 303
- 0
inf2270/hw3/test-oblig3.c View File

@@ -0,0 +1,303 @@
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>

#define FALSE 0
#define TRUE 1

typedef unsigned char byte;
typedef unsigned long unicode;

extern int readbyte (FILE *f);
extern long readutf8char (FILE *f);
extern void writebyte (FILE *f, byte b);
extern void writeutf8char (FILE *f, unicode u);

void error (char *message)
{
printf("\nERROR: %s\n", message);
exit(1);
}

void dump_byte_seq (byte b[], int n_b)
{
int i;

printf("%d bytes {", n_b);
for (i = 0; i < n_b; i++) {
if (i > 0) printf(", ");
printf("0x%02x", b[i]);
}
printf("}");
}

void dump_unicode_seq (unicode u[], int n_u)
{
int i;

printf("%d chars {", n_u);
for (i = 0; i < n_u; i++) {
if (i > 0) printf(", ");
printf("0x%lx", u[i]);
}
printf("}");
}

void compare_byte_seqs (byte a[], int n_a, byte b[], int n_b)
{
int ok = TRUE;

if (n_a != n_b) {
ok = FALSE;
} else {
int i;
for (i = 0; i < n_a; i++)
if (a[i] != b[i]) ok = FALSE;
}

if (ok) {
printf("OK\n");
} else {
printf("\n Error: Result is "); dump_byte_seq(a, n_a);
printf("\n but should be "); dump_byte_seq(b, n_b); printf("\n");
}
}

void compare_unicode_seqs (unicode a[], int n_a, unicode b[], int n_b)
{
int ok = TRUE;

if (n_a != n_b) {
ok = FALSE;
} else {
int i;
for (i = 0; i < n_a; i++)
if (a[i] != b[i]) ok = FALSE;
}

if (ok) {
printf("OK\n");
} else {
printf("\n Error: Result is "); dump_unicode_seq(a, n_a);
printf("\n but should be "); dump_unicode_seq(b, n_b); printf("\n");
}
}

int read_test_byte (FILE *f)
{
int status;
byte c;

status = fread(&c, 1, 1, f);
if (status <= 0) return -1;
return (int)c;
}

void test_byte_file (char *f_name, byte data[], int n_data)
{
byte file_bytes[200];
int n_file_bytes;
FILE *f = fopen(f_name, "rb");
if (f == NULL) error("Could not open file!");

for (n_file_bytes = 0; n_file_bytes < 200; n_file_bytes++) {
int b = read_test_byte(f);
if (b < 0) break;
file_bytes[n_file_bytes] = b;
}
fclose(f);

compare_byte_seqs(file_bytes, n_file_bytes, data, n_data);
}

void create_byte_file (char *f_name, byte b_seq[], int n_b_seq)
{
FILE *f = fopen(f_name, "wb");
if (f == NULL) error("Could not create file!");

fwrite(b_seq, n_b_seq, 1, f);
fclose(f);
}


/* Test #1 */
byte b_seq_1[] = { 4, 0, 255, 17, 200 };

void test_1 (void)
{
int n_bytes = sizeof(b_seq_1)/sizeof(b_seq_1[0]);
int i;
FILE *f = fopen("test1.txt", "wb");
if (f == NULL) error("Could not create test1.txt!");

for (i = 0; i < n_bytes; i++)
writebyte(f, b_seq_1[i]);
fclose(f);

test_byte_file("test1.txt", b_seq_1, n_bytes);
}


/* Test #2 */
unicode u_seq_2[] = { 0x24, 0x20, 0x41, 0x3d, 0x32, 0x78 }; /* "$ A=2x" */
byte b_seq_2[] = { '$', ' ', 'A', '=', '2', 'x' };

void test_2 (void)
{
int n_u = sizeof(u_seq_2)/sizeof(u_seq_2[0]);
int n_b = sizeof(b_seq_2)/sizeof(b_seq_2[0]);
int i;
FILE *f = fopen("test2.txt", "wb");
if (f == NULL) error("Could not create test2.txt!");

for (i = 0; i < n_u; i++)
writeutf8char(f, u_seq_2[i]);
fclose(f);

test_byte_file("test2.txt", b_seq_2, n_b);
}


/* Test #3 */
unicode u_seq_3[] = { 0x35, 0xa2, 0x20, 0x429, 0x3c9 }; /* "5¢ Щω" */
byte b_seq_3[] = { '5', 0xc2, 0xa2, ' ', 0xd0, 0xa9, 0xcf, 0x89 };

void test_3 (void)
{
int n_u = sizeof(u_seq_3)/sizeof(u_seq_3[0]);
int n_b = sizeof(b_seq_3)/sizeof(b_seq_3[0]);
int i;
FILE *f = fopen("test3.txt", "wb");
if (f == NULL) error("Could not create test3.txt!");

for (i = 0; i < n_u; i++)
writeutf8char(f, u_seq_3[i]);
fclose(f);

test_byte_file("test3.txt", b_seq_3, n_b);
}


/* Test #4 */
unicode u_seq_4[] = { 0x20ac, 0x3d, 0x10348, 0x2658 }; /* "€=𐍈♘" */
byte b_seq_4[] = { 0xe2, 0x82, 0xac, '=', 0xf0, 0x90, 0x8d, 0x88,
0xe2, 0x99, 0x98};
void test_4 (void)
{
int n_u = sizeof(u_seq_4)/sizeof(u_seq_4[0]);
int n_b = sizeof(b_seq_4)/sizeof(b_seq_4[0]);
int i;
FILE *f = fopen("test4.txt", "wb");
if (f == NULL) error("Could not create test4.txt!");

for (i = 0; i < n_u; i++)
writeutf8char(f, u_seq_4[i]);
fclose(f);

test_byte_file("test4.txt", b_seq_4, n_b);
}


/* Test #5 */
void test_5 (void)
{
byte data[200];
int n_data = 0;
int n_b_seq_1 = sizeof(b_seq_1)/sizeof(b_seq_1[0]);
FILE *f;

create_byte_file ("test5.txt", b_seq_1, n_b_seq_1);
f = fopen("test5.txt", "rb");
if (f == NULL) error("Could not read test5.txt!");
while (n_data < 200) {
int b = readbyte(f);
if (b < 0) break;
data[n_data++] = (byte)b;
}
fclose(f);

compare_byte_seqs(data, n_data, b_seq_1, n_b_seq_1);
}


/* Test #6 */
void test_6 (void)
{
unicode data[200];
int n_data = 0;
int n_b_seq_2 = sizeof(b_seq_2)/sizeof(b_seq_2[0]);
int n_u_seq_2 = sizeof(u_seq_2)/sizeof(u_seq_2[0]);
FILE *f;

create_byte_file ("test6.txt", b_seq_2, n_b_seq_2);
f = fopen("test6.txt", "rb");
if (f == NULL) error("Could not read test6.txt!");
while (n_data < 200) {
long u = readutf8char(f);
if (u < 0) break;
data[n_data++] = (unicode)u;
}
fclose(f);

compare_unicode_seqs(data, n_data, u_seq_2, n_u_seq_2);
}


/* Test #7 */
void test_7 (void)
{
unicode data[200];
int n_data = 0;
int n_b_seq_3 = sizeof(b_seq_3)/sizeof(b_seq_3[0]);
int n_u_seq_3 = sizeof(u_seq_3)/sizeof(u_seq_3[0]);
FILE *f;

create_byte_file ("test7.txt", b_seq_3, n_b_seq_3);
f = fopen("test7.txt", "rb");
if (f == NULL) error("Could not read test7.txt!");
while (n_data < 200) {
long u = readutf8char(f);
if (u < 0) break;
data[n_data++] = (unicode)u;
}
fclose(f);

compare_unicode_seqs(data, n_data, u_seq_3, n_u_seq_3);
}


/* Test #8 */
void test_8 (void)
{
unicode data[200];
int n_data = 0;
int n_b_seq_4 = sizeof(b_seq_4)/sizeof(b_seq_4[0]);
int n_u_seq_4 = sizeof(u_seq_4)/sizeof(u_seq_4[0]);
FILE *f;

create_byte_file ("test8.txt", b_seq_4, n_b_seq_4);
f = fopen("test8.txt", "rb");
if (f == NULL) error("Could not read test8.txt!");
while (n_data < 200) {
long u = readutf8char(f);
if (u < 0) break;
data[n_data++] = (unicode)u;
}
fclose(f);

compare_unicode_seqs(data, n_data, u_seq_4, n_u_seq_4);
}

/* Main program */
int main (void)
{
printf("Test 1 (write a byte): "); test_1();
printf("Test 2 (write 1-byte utf-8): "); test_2();
printf("Test 3 (write 2-byte utf-8): "); test_3();
printf("Test 4 (write 3+4-byte utf-8): "); test_4();
printf("Test 5 (read a byte): "); test_5();
printf("Test 6 (read 1-byte utf-8): "); test_6();
printf("Test 7 (read 2-byte utf-8): "); test_7();
printf("Test 8 (read 3+4-byte utf-8): "); test_8();
return 0;
}

Loading…
Cancel
Save