University stuff.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fasit.c 1.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. typedef unsigned char byte;
  4. typedef unsigned long unicode;
  5. // The minimum value of the first `char` to indicate n bytes
  6. #define C4bytes 0xF0
  7. #define C3bytes 0xE0
  8. #define C2bytes 0xC0
  9. // The minimum value for the vm_var_char for the utf8 equivalent to be n bytes
  10. #define U4bytes 0x10000
  11. #define U3bytes 0x0800
  12. #define U2bytes 0x0080
  13. void writebyte(FILE *f, byte b)
  14. {
  15. fwrite(&b, 1, 1, f);
  16. }
  17. void writeutf8char(FILE *f, unicode u)
  18. {
  19. if (u < U2bytes)
  20. {
  21. writebyte(f, (byte)u);
  22. return;
  23. }
  24. int left;
  25. int a;
  26. int b;
  27. if (u >= U4bytes)
  28. {
  29. left = 18;
  30. a = 0b11110000;
  31. b = 0b00000111;
  32. }
  33. else if (u >= U3bytes)
  34. {
  35. left = 12;
  36. a = 0b11100000;
  37. b = 0b00001111;
  38. }
  39. else
  40. {
  41. left = 6;
  42. a = 0b11000000;
  43. b = 0b00011111;
  44. }
  45. while (1)
  46. {
  47. writebyte(f, a | ((u >> left) & b));
  48. if (left == 0)
  49. return;
  50. a = 0b10000000;
  51. b = 0b00111111;
  52. left -= 6;
  53. }
  54. }
  55. int readbyte(FILE *f)
  56. {
  57. int status;
  58. byte c;
  59. status = fread(&c, 1, 1, f);
  60. if (status <= 0) return -1;
  61. return (int)c;
  62. }
  63. long readutf8char(FILE *f)
  64. {
  65. unicode u = 0;
  66. unicode ch = readbyte(f);
  67. if (ch == -1)
  68. return -1;
  69. if (ch < C2bytes)
  70. return ch;
  71. int left;
  72. int mask;
  73. if (ch >= C4bytes)
  74. {
  75. left = 18;
  76. mask = 0b00000111;
  77. }
  78. else if (ch >= C3bytes)
  79. {
  80. left = 12;
  81. mask = 0b00001111;
  82. }
  83. else
  84. {
  85. left = 6;
  86. mask = 0b00011111;
  87. }
  88. while (1)
  89. {
  90. u |= (ch & mask) << left;
  91. if (left == 0)
  92. return u;
  93. left -= 6;
  94. mask = 0b00111111;
  95. ch = readbyte(f);
  96. }
  97. }