University stuff.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import java.util.ArrayList;
  2. import java.util.Arrays;
  3. import java.nio.file.Files;
  4. import java.nio.file.Paths;
  5. import java.nio.charset.StandardCharsets;
  6. import java.io.IOException;
  7. class Main {
  8. static class Match {
  9. int start = -1;
  10. int end = -1;
  11. String text = "";
  12. Match(int start, int end, byte[] haystack) {
  13. this.start = start;
  14. this.end = end;
  15. this.text = getText(haystack);
  16. }
  17. private String getText(byte[] haystack) {
  18. byte[] str = new byte[end - start + 1];
  19. for (int i = start; i <= end; ++i)
  20. str[i - start] = haystack[i];
  21. return new String(str, StandardCharsets.UTF_8);
  22. }
  23. public String toString() {
  24. return ""+start+"-"+end+":\t"+text;
  25. }
  26. }
  27. static ArrayList<Match> search(byte[] needle, byte[] haystack) {
  28. ArrayList<Match> matches = new ArrayList<>();
  29. if (needle.length > haystack.length)
  30. return matches;
  31. /*
  32. * Preprocess bad character shift
  33. */
  34. int[] bcs = new int[256];
  35. for (int i = 0; i < bcs.length; ++i) {
  36. bcs[i] = needle.length;
  37. }
  38. int offset = 0, scan = 0;
  39. int last = needle.length - 1;
  40. int maxoffset = haystack.length - needle.length;
  41. int minOffset = needle.length;
  42. boolean hasWildcard = false;
  43. for (int i = 0; i < last; ++i) {
  44. bcs[needle[i]] = last - i;
  45. }
  46. for (int i = needle.length; i > 0; --i) {
  47. if (needle[i - 1] == '_') {
  48. minOffset = needle.length - i - 1;
  49. if (minOffset <= 0)
  50. minOffset = 1;
  51. hasWildcard = true;
  52. break;
  53. }
  54. }
  55. /*
  56. * Search
  57. */
  58. while (offset <= maxoffset) {
  59. int end = last + offset;
  60. for (
  61. scan = last;
  62. needle[scan] == haystack[scan + offset] || needle[scan] == '_';
  63. scan -= 1) {
  64. if (scan == 0) {
  65. Match m = new Match(offset, end, haystack);
  66. matches.add(m);
  67. break;
  68. }
  69. }
  70. int toShift = bcs[haystack[offset + last]];
  71. if (hasWildcard && toShift > minOffset)
  72. offset += minOffset;
  73. else
  74. offset += toShift;
  75. }
  76. return matches;
  77. }
  78. static byte[] readFile(String path) throws IOException {
  79. byte[] bytes = Files.readAllBytes(Paths.get(path));
  80. // We don't want the last newline if it exists
  81. if (bytes.length > 0 && bytes[bytes.length - 1] == '\n')
  82. return Arrays.copyOfRange(bytes, 0, bytes.length - 1);
  83. else
  84. return bytes;
  85. }
  86. public static void main(String[] args) {
  87. if (args.length < 2) {
  88. System.out.println("Usage: java Main <needle> <haystack>");
  89. System.exit(1);
  90. }
  91. byte[] needle, haystack;
  92. try {
  93. needle = readFile(args[0]);
  94. haystack = readFile(args[1]);
  95. } catch (IOException ex) {
  96. System.out.println(ex.getMessage());
  97. System.exit(1);
  98. return;
  99. }
  100. if (needle.length == 0) {
  101. System.out.println("Empty needle");
  102. System.exit(1);
  103. }
  104. if (haystack.length == 0) {
  105. System.out.println("Empty haystack");
  106. System.exit(1);
  107. }
  108. ArrayList<Match> matches = search(needle, haystack);
  109. for (Match m: matches)
  110. System.out.println(m.toString());
  111. }
  112. }