项目:lams
文件:FuzzyTermsEnum.java
/** initialize levenshtein DFAs up to maxdistance,if possible */
private List<CompiledAutomaton> initAutomata(int maxdistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxdistance &&
maxdistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_disTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText,realPrefixLength,termText.length - realPrefixLength),transpositions);
String prefix = UnicodeUtil.newString(termText,realPrefixLength);
for (int i = runAutomata.size(); i <= maxdistance; i++) {
Automaton a = builder.toAutomaton(i,prefix);
//System.out.println("compute automaton n=" + i);
runAutomata.add(new CompiledAutomaton(a,true,false));
}
}
return runAutomata;
}
项目:lams
文件:MultiTerms.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled,startTerm);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:lams
文件:Terms.java
/** Returns a TermsEnum that iterates over all terms that
* are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only accept terms
* > <code>startTerm</code>,but you still must call
* next() first to get to the first term. Note that the
* provided <code>startTerm</code> must be accepted by
* the automaton.
*
* <p><b>NOTE</b>: the returned TermsEnum cannot
* seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled,final BytesRef startTerm) throws IOException {
// Todo: eventually we Could support seekCeil/Exact on
// the returned enum,instead of only being able to seek
// at the start
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.norMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
if (startTerm == null) {
return new AutomatonTermsEnum(iterator(null),compiled);
} else {
return new AutomatonTermsEnum(iterator(null),compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
项目:search
文件:FuzzyTermsEnum.java
/** initialize levenshtein DFAs up to maxdistance,false));
}
}
return runAutomata;
}
项目:search
文件:MultiTerms.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:search
文件:Terms.java
/** Returns a TermsEnum that iterates over all terms that
* are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only accept terms
* > <code>startTerm</code>,compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
项目:NYBC
文件:FuzzyTermsEnum.java
/** initialize levenshtein DFAs up to maxdistance,if possible */
private List<CompiledAutomaton> initAutomata(int maxdistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxdistance &&
maxdistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_disTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText,transpositions);
for (int i = runAutomata.size(); i <= maxdistance; i++) {
Automaton a = builder.toAutomaton(i);
//System.out.println("compute automaton n=" + i);
// constant prefix
if (realPrefixLength > 0) {
Automaton prefix = BasicAutomata.makeString(
UnicodeUtil.newString(termText,realPrefixLength));
a = Basicoperations.concatenate(prefix,a);
}
runAutomata.add(new CompiledAutomaton(a,false));
}
}
return runAutomata;
}
项目:NYBC
文件:MultiTerms.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:NYBC
文件:Terms.java
/** Returns a TermsEnum that iterates over all terms that
* are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only accept terms
* > <code>startTerm</code>,compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
项目:read-open-source-code
文件:FuzzyTermsEnum.java
/** initialize levenshtein DFAs up to maxdistance,false));
}
}
return runAutomata;
}
项目:read-open-source-code
文件:MultiTerms.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:read-open-source-code
文件:Terms.java
/** Returns a TermsEnum that iterates over all terms that
* are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only accept terms
* > <code>startTerm</code>,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:Maskana-Gestor-de-Conocimiento
文件:FuzzyTermsEnum.java
/** initialize levenshtein DFAs up to maxdistance,false));
}
}
return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento
文件:MultiTerms.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
项目:Maskana-Gestor-de-Conocimiento
文件:Terms.java
/** Returns a TermsEnum that iterates over all terms that
* are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only accept terms
* > <code>startTerm</code>,compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
项目:lams
文件:FieldReader.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.norMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectTermsEnum(this,compiled,startTerm);
}
项目:lams
文件:FuzzyTermsEnum.java
/**
* return an automata-based enum for matching up to editdistance from
* lastTerm,if possible
*/
protected TermsEnum getAutomatonEnum(int editdistance,BytesRef lastTerm)
throws IOException {
final List<CompiledAutomaton> runAutomata = initAutomata(editdistance);
if (editdistance < runAutomata.size()) {
//System.out.println("FuzzyTE.getAEnum: ed=" + editdistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
final CompiledAutomaton compiled = runAutomata.get(editdistance);
return new AutomatonFuzzyTermsEnum(terms.intersect(compiled,lastTerm == null ? null : compiled.floor(lastTerm,new BytesRefBuilder())),runAutomata.subList(0,editdistance + 1).toArray(new CompiledAutomaton[editdistance + 1]));
} else {
return null;
}
}
项目:lams
文件:FuzzyTermsEnum.java
public AutomatonFuzzyTermsEnum(TermsEnum tenum,CompiledAutomaton compiled[]) {
super(tenum,false);
this.matchers = new ByteRunAutomaton[compiled.length];
for (int i = 0; i < compiled.length; i++)
this.matchers[i] = compiled[i].runAutomaton;
termRef = new BytesRef(term.text());
}
项目:lams
文件:FuzzyTermsEnum.java
@Override
public void copyTo(AttributeImpl target) {
final List<CompiledAutomaton> targetAutomata =
((LevenshteinAutomataAttribute) target).automata();
targetAutomata.clear();
targetAutomata.addAll(automata);
}
项目:lams
文件:AutomatonTermsEnum.java
/**
* Construct an enumerator based upon an automaton,enumerating the specified
* field,working on a supplied TermsEnum
* <p>
* @lucene.experimental
* <p>
* @param compiled CompiledAutomaton
*/
public AutomatonTermsEnum(TermsEnum tenum,CompiledAutomaton compiled) {
super(tenum);
this.finite = compiled.finite;
this.runAutomaton = compiled.runAutomaton;
assert this.runAutomaton != null;
this.commonSuffixRef = compiled.commonSuffixRef;
this.automaton = compiled.automaton;
// used for path tracking,where each bit is a numbered state.
visited = new long[runAutomaton.getSize()];
termComp = getComparator();
}
项目:meltwater-elasticsearch-queries
文件:WildcardTermsProducer.java
private TermsEnum wildcardEnumeration(final IndexReader reader)
throws IOException {
Terms terms = MultiFields.getTerms(reader,term.field());
if(terms == null){
return null;
}
return new AutomatonTermsEnum(
terms.iterator(),new CompiledAutomaton(
WildcardQuery.toAutomaton(term),false,false));
}
项目:search
文件:FSTTermsReader.java
IntersectTermsEnum(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
super();
//if (TEST) System.out.println("Enum init,startTerm=" + startTerm);
this.fst = dict;
this.fstReader = fst.getBytesReader();
this.fstOutputs = dict.outputs;
this.fsa = compiled.runAutomaton;
this.level = -1;
this.stack = new Frame[16];
for (int i = 0 ; i < stack.length; i++) {
this.stack[i] = new Frame();
}
Frame frame;
frame = loadVirtualFrame(newFrame());
this.leveL++;
frame = loadFirstFrame(newFrame());
pushFrame(frame);
this.Meta = null;
this.MetaUpto = 1;
this.decoded = false;
this.pending = false;
if (startTerm == null) {
pending = isAccept(topFrame());
} else {
doSeekCeil(startTerm);
pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
}
}
项目:search
文件:FSTOrdTermsReader.java
IntersectTermsEnum(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
//if (TEST) System.out.println("Enum init,startTerm=" + startTerm);
this.fst = index;
this.fstReader = fst.getBytesReader();
this.fstOutputs = index.outputs;
this.fsa = compiled.runAutomaton;
this.level = -1;
this.stack = new Frame[16];
for (int i = 0 ; i < stack.length; i++) {
this.stack[i] = new Frame();
}
Frame frame;
frame = loadVirtualFrame(newFrame());
this.leveL++;
frame = loadFirstFrame(newFrame());
pushFrame(frame);
this.decoded = false;
this.pending = false;
if (startTerm == null) {
pending = isAccept(topFrame());
} else {
doSeekCeil(startTerm);
pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
}
}
项目:search
文件:OrdsFieldReader.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.norMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new OrdsIntersectTermsEnum(this,startTerm);
}
项目:search
文件:AssertingAtomicReader.java
@Override
public TermsEnum intersect(CompiledAutomaton automaton,BytesRef bytes) throws IOException {
TermsEnum termsEnum = in.intersect(automaton,bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum);
}
项目:search
文件:LuceneTestCase.java
/**
* Terms api equivalency
*/
public void assertTermsEquals(String info,IndexReader leftReader,Terms leftTerms,Terms rightTerms,boolean deep) throws IOException {
if (leftTerms == null || rightTerms == null) {
assertNull(info,leftTerms);
assertNull(info,rightTerms);
return;
}
assertTermsstatisticsEquals(info,leftTerms,rightTerms);
assertEquals(leftTerms.hasOffsets(),rightTerms.hasOffsets());
assertEquals(leftTerms.hasPositions(),rightTerms.hasPositions());
assertEquals(leftTerms.hasPayloads(),rightTerms.hasPayloads());
TermsEnum leftTermsEnum = leftTerms.iterator(null);
TermsEnum rightTermsEnum = rightTerms.iterator(null);
assertTermsEnumEquals(info,leftReader,leftTermsEnum,rightTermsEnum,true);
assertTermsSeekingEquals(info,rightTerms);
if (deep) {
int numIntersections = atLeast(3);
for (int i = 0; i < numIntersections; i++) {
String re = AutomatonTestUtil.randomregexp(random());
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re,RegExp.NONE).toAutomaton());
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.norMAL) {
// Todo: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton,null);
TermsEnum rightIntersection = rightTerms.intersect(automaton,null);
assertTermsEnumEquals(info,leftIntersection,rightIntersection,rarely());
}
}
}
}
项目:search
文件:FieldReader.java
@Override
public TermsEnum intersect(CompiledAutomaton compiled,startTerm);
}
项目:search
文件:FuzzyTermsEnum.java
项目:search
文件:FuzzyTermsEnum.java
public AutomatonFuzzyTermsEnum(TermsEnum tenum,false);
this.matchers = new ByteRunAutomaton[compiled.length];
for (int i = 0; i < compiled.length; i++)
this.matchers[i] = compiled[i].runAutomaton;
termRef = new BytesRef(term.text());
}
项目:search
文件:FuzzyTermsEnum.java
@Override
public void copyTo(AttributeImpl target) {
final List<CompiledAutomaton> targetAutomata =
((LevenshteinAutomataAttribute) target).automata();
targetAutomata.clear();
targetAutomata.addAll(automata);
}
项目:search
文件:AutomatonTermsEnum.java
/**
* Construct an enumerator based upon an automaton,where each bit is a numbered state.
visited = new long[runAutomaton.getSize()];
termComp = getComparator();
}
项目:search
文件:TestBlockPostingsFormat3.java
public void assertTerms(Terms leftTerms,boolean deep) throws Exception {
if (leftTerms == null || rightTerms == null) {
assertNull(leftTerms);
assertNull(rightTerms);
return;
}
assertTermsstatistics(leftTerms,rightTerms);
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
TermsEnum leftTermsEnum = leftTerms.iterator(null);
TermsEnum rightTermsEnum = rightTerms.iterator(null);
assertTermsEnum(leftTermsEnum,true);
assertTermsSeeking(leftTerms,null);
assertTermsEnum(leftIntersection,rarely());
}
}
}
}
项目:search
文件:TestTermsEnum.java
private boolean accepts(CompiledAutomaton c,BytesRef b) {
int state = c.runAutomaton.getinitialState();
for(int idx=0;idx<b.length;idx++) {
assertTrue(state != -1);
state = c.runAutomaton.step(state,b.bytes[b.offset+idx] & 0xff);
}
return c.runAutomaton.isAccept(state);
}
项目:NYBC
文件:AssertingAtomicReader.java
@Override
public TermsEnum intersect(CompiledAutomaton automaton,BytesRef bytes) throws IOException {
TermsEnum termsEnum = super.intersect(automaton,bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum);
}