123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.nio.charset.StandardCharsets;
- import java.io.IOException;
-
- class Main {
- static class Match {
- int start = -1;
- int end = -1;
- String text = "";
-
- Match(int start, int end, byte[] haystack) {
- this.start = start;
- this.end = end;
- this.text = getText(haystack);
- }
-
- private String getText(byte[] haystack) {
- byte[] str = new byte[end - start + 1];
-
- for (int i = start; i <= end; ++i)
- str[i - start] = haystack[i];
-
- return new String(str, StandardCharsets.UTF_8);
- }
-
- public String toString() {
- return ""+start+"-"+end+":\t"+text;
- }
- }
-
- static ArrayList<Match> search(byte[] needle, byte[] haystack) {
- ArrayList<Match> matches = new ArrayList<>();
-
- if (needle.length > haystack.length)
- return matches;
-
- /*
- * Preprocess bad character shift
- */
-
- int[] bcs = new int[256];
-
- for (int i = 0; i < bcs.length; ++i) {
- bcs[i] = needle.length;
- }
-
- int offset = 0, scan = 0;
- int last = needle.length - 1;
- int maxoffset = haystack.length - needle.length;
- int minOffset = needle.length;
- boolean hasWildcard = false;
-
- for (int i = 0; i < last; ++i) {
- bcs[needle[i]] = last - i;
- }
-
- for (int i = needle.length; i > 0; --i) {
- if (needle[i - 1] == '_') {
- minOffset = needle.length - i - 1;
-
- if (minOffset <= 0)
- minOffset = 1;
-
- hasWildcard = true;
- break;
- }
- }
-
- /*
- * Search
- */
-
- while (offset <= maxoffset) {
- int end = last + offset;
- for (
- scan = last;
- needle[scan] == haystack[scan + offset] || needle[scan] == '_';
- scan -= 1) {
-
- if (scan == 0) {
- Match m = new Match(offset, end, haystack);
- matches.add(m);
- break;
- }
- }
- int toShift = bcs[haystack[offset + last]];
- if (hasWildcard && toShift > minOffset)
- offset += minOffset;
- else
- offset += toShift;
- }
-
- return matches;
- }
-
- static byte[] readFile(String path) throws IOException {
- byte[] bytes = Files.readAllBytes(Paths.get(path));
-
- // We don't want the last newline if it exists
- if (bytes.length > 0 && bytes[bytes.length - 1] == '\n')
- return Arrays.copyOfRange(bytes, 0, bytes.length - 1);
- else
- return bytes;
- }
-
- public static void main(String[] args) {
- if (args.length < 2) {
- System.out.println("Usage: java Main <needle> <haystack>");
- System.exit(1);
- }
-
- byte[] needle, haystack;
- try {
- needle = readFile(args[0]);
- haystack = readFile(args[1]);
- } catch (IOException ex) {
- System.out.println(ex.getMessage());
- System.exit(1);
- return;
- }
-
- if (needle.length == 0) {
- System.out.println("Empty needle");
- System.exit(1);
- }
- if (haystack.length == 0) {
- System.out.println("Empty haystack");
- System.exit(1);
- }
-
- ArrayList<Match> matches = search(needle, haystack);
-
- for (Match m: matches)
- System.out.println(m.toString());
- }
- }
|