/*
 * Decompiled with CFR 0.152.
 */
package edu.mit.simile.vicino;

import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class NGramTokenizer
implements Tokenizer {
    private int ngram_size;
    static final Pattern extra = Pattern.compile("\\p{Cntrl}|\\p{Punct}");
    static final Pattern whitespace = Pattern.compile("\\p{Space}+");
    private int nextId = 0;
    private Map<String, Token> tokMap = new TreeMap<String, Token>();

    public NGramTokenizer(int ngram_size) {
        this.ngram_size = ngram_size;
    }

    public Token[] tokenize(String str) {
        str = this.normalize(str);
        ArrayList<Token> tokens = new ArrayList<Token>();
        for (int i = 0; i < str.length(); ++i) {
            int index = i + this.ngram_size;
            if (index > str.length()) continue;
            tokens.add(this.intern(str.substring(i, index)));
        }
        return tokens.toArray(new BasicToken[tokens.size()]);
    }

    private String normalize(String s) {
        s = s.trim();
        s = extra.matcher(s).replaceAll("");
        s = whitespace.matcher(s).replaceAll(" ");
        s = s.toLowerCase();
        return s.intern();
    }

    public Token intern(String s) {
        Token tok = this.tokMap.get(s = s.toLowerCase().intern());
        if (tok == null) {
            tok = new BasicToken(++this.nextId, s);
            this.tokMap.put(s, tok);
        }
        return tok;
    }

    public Iterator<Token> tokenIterator() {
        return this.tokMap.values().iterator();
    }

    public int maxTokenIndex() {
        return this.nextId;
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    public class BasicToken
    implements Token,
    Comparable<Token> {
        private final int index;
        private final String value;

        BasicToken(int index, String value) {
            this.index = index;
            this.value = value;
        }

        public String getValue() {
            return this.value;
        }

        public int getIndex() {
            return this.index;
        }

        @Override
        public int compareTo(Token t) {
            return this.index - t.getIndex();
        }

        public int hashCode() {
            return this.value.hashCode();
        }

        public String toString() {
            return "[token#" + this.getIndex() + ":" + this.getValue() + "]";
        }
    }
}

