1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- #include <stdio.h>
- #include <stdlib.h>
-
- typedef unsigned char byte;
- typedef unsigned long unicode;
-
- // The minimum value of the first `char` to indicate n bytes
- #define C4bytes 0xF0
- #define C3bytes 0xE0
- #define C2bytes 0xC0
-
- // The minimum value for the vm_var_char for the utf8 equivalent to be n bytes
- #define U4bytes 0x10000
- #define U3bytes 0x0800
- #define U2bytes 0x0080
-
- void writebyte(FILE *f, byte b)
- {
- fwrite(&b, 1, 1, f);
- }
-
- void writeutf8char(FILE *f, unicode u)
- {
- if (u >= U4bytes)
- {
- writebyte(f, 0b11110000 | ((u >> 18) & 0b00000111));
- writebyte(f, 0b10000000 | ((u >> 12) & 0b00111111));
- writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111));
- writebyte(f, 0b10000000 | (u & 0b00111111));
- }
- else if (u >= U3bytes)
- {
- writebyte(f, 0b11100000 | ((u >> 12) & 0b00001111));
- writebyte(f, 0b10000000 | ((u >> 6) & 0b00111111));
- writebyte(f, 0b10000000 | (u & 0b00111111));
- }
- else if (u >= U2bytes)
- {
- writebyte(f, 0b11000000 | ((u >> 6) & 0b00011111));
- writebyte(f, 0b10000000 | (u & 0b00111111));
- }
- else
- {
- writebyte(f, u);
- }
- }
-
- int readbyte(FILE *f)
- {
- int status;
- byte c;
- status = fread(&c, 1, 1, f);
- if (status <= 0) return -1;
- return (int)c;
- }
-
- long readutf8char(FILE *f)
- {
- byte first = readbyte(f);
- unicode u = 0;
-
- if (first >= C4bytes)
- {
- u |= (first & 0b00000111) << 18;
- u |= (readbyte(f) & 0b00111111) << 12;
- u |= (readbyte(f) & 0b00111111) << 6;
- u |= readbyte(f) & 0b00111111;
- }
- else if (first >= C3bytes)
- {
- u |= (first & 0b00001111) << 12;
- u |= (readbyte(f) & 0b00111111) << 6;
- u |= readbyte(f) & 0b00111111;
- }
- else if (first >= C2bytes)
- {
- u |= (first & 0b00011111) << 6;
- u |= readbyte(f) & 0b00111111;
- }
- else
- {
- u |= first;
- }
-
- return u;
- }
|