项目:elasticsearch_my
文件:XAnalyzingSuggester.java
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timsort(surfaceFormsAndPayload,count);
int deduplicator = 0;
analyzed.append((byte) 0);
analyzed.setLength(analyzed.length() + 1);
analyzed.grow(analyzed.length());
for (int i = 0; i < count; i++) {
analyzed.setByteAt(analyzed.length() - 1,(byte) deduplicator++);
Util.toIntsRef(analyzed.get(),scratchInts);
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE,defaultWeight)) : candiate.weight;
builder.add(scratchInts.get(),outputs.newPair(cost,candiate.payload));
}
seenSurfaceForms.clear();
count = 0;
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(),scratch),new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:lams
文件:Dictionary.java
private FST<CharsRef> parseConversions(LineNumberReader reader,int num) throws IOException,ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid Syntax: " + line,reader.getLineNumber());
}
if (mappings.put(parts[1],parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2,outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(),scratchInts);
builder.add(scratchInts.get(),new CharsRef(entry.getValue()));
}
return builder.finish();
}
项目:Elasticsearch
文件:XAnalyzingSuggester.java
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timsort(surfaceFormsAndPayload,candiate.payload));
}
seenSurfaceForms.clear();
count = 0;
}
项目:search
文件:FSTTermsWriter.java
@Override
public void finishTerm(BytesRef text,TermStats stats) throws IOException {
// write term Meta data into fst
final BlockTermState state = postingsWriter.newTermState();
final FSTTermOutputs.TermData Meta = new FSTTermOutputs.TermData();
Meta.longs = new long[longsSize];
Meta.bytes = null;
Meta.docFreq = state.docFreq = stats.docFreq;
Meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(Meta.longs,MetaWriter,fieldInfo,state,true);
final int bytesSize = (int)MetaWriter.getFilePointer();
if (bytesSize > 0) {
Meta.bytes = new byte[bytesSize];
MetaWriter.writeto(Meta.bytes,0);
MetaWriter.reset();
}
builder.add(Util.toIntsRef(text,scratchTerm),Meta);
numTerms++;
}
项目:search
文件:MemoryDocValuesConsumer.java
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
Meta.writeVInt(field.number);
Meta.writeByte(FST);
Meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1,outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:search
文件:Dictionary.java
private FST<CharsRef> parseConversions(LineNumberReader reader,new CharsRef(entry.getValue()));
}
return builder.finish();
}
项目:search
文件:Lucene42DocValuesConsumer.java
private void writeFST(FieldInfo field,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
项目:NYBC
文件:WFSTCompletionLookup.java
@Override
public void build(TermFreqIterator iterator) throws IOException {
BytesRef scratch = new BytesRef();
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef prevIoUs = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,outputs);
while ((scratch = iter.next()) != null) {
long cost = iter.weight();
if (prevIoUs == null) {
prevIoUs = new BytesRef();
} else if (scratch.equals(prevIoUs)) {
continue; // for duplicate suggestions,the best weight is actually
// added
}
Util.toIntsRef(scratch,scratchInts);
builder.add(scratchInts,cost);
prevIoUs.copyBytes(scratch);
}
fst = builder.finish();
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,outputs);
final IntsRef scratch = new IntsRef();
for(Map.Entry<String,new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:NYBC
文件:Lucene42DocValuesConsumer.java
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
Meta.writeVInt(field.number);
Meta.writeByte(FST);
Meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1,outputs);
IntsRef scratch = new IntsRef();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
项目:read-open-source-code
文件:FSTTermsWriter.java
@Override
public void finishTerm(BytesRef text,Meta);
numTerms++;
}
项目:read-open-source-code
文件:MemoryDocValuesConsumer.java
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
Meta.writeVInt(field.number);
Meta.writeByte(FST);
Meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:read-open-source-code
文件:MemoryDocValuesConsumer.java
private void writeFST(FieldInfo field,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:read-open-source-code
文件:Dictionary.java
private FST<CharsRef> parseConversions(LineNumberReader reader,new CharsRef(entry.getValue()));
}
return builder.finish();
}
项目:Maskana-Gestor-de-Conocimiento
文件:MemoryDocValuesConsumer.java
private void writeFST(FieldInfo field,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
项目:Maskana-Gestor-de-Conocimiento
文件:WFSTCompletionLookup.java
@Override
public void build(InputIterator iterator) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
BytesRef scratch = new BytesRef();
InputIterator iter = new WFSTInputIterator(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef prevIoUs = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,cost);
prevIoUs.copyBytes(scratch);
}
fst = builder.finish();
}
/** Builds the normalizeCharMap; call this once you
* are done calling {@link #add}. */
public normalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new normalizeCharMap(map);
}
项目:Maskana-Gestor-de-Conocimiento
文件:Lucene42DocValuesConsumer.java
private void writeFST(FieldInfo field,ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
Meta.writeVLong(ord);
}
项目:lams
文件:BlockTreeTermsWriter.java
private void append(Builder<BytesRef> builder,FST<BytesRef> subIndex,IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input,scratchIntsRef),indexEnt.output);
}
}
项目:lams
文件:Lucene42DocValuesProducer.java
@Override
public void seekExact(long ord) throws IOException {
// Todo: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst,ord,bytesReader,firstArc,scratchArc,scratchInts);
BytesRefBuilder scratchBytes = new BytesRefBuilder();
scratchBytes.clear();
Util.toBytesRef(output,scratchBytes);
// Todo: we Could do this lazily,better to try to push into FSTEnum though?
in.seekExact(scratchBytes.get());
}
项目:lams
文件:Dictionary.java
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4,outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(),scratch);
List<Integer> entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
}
builder.add(scratch.get(),output);
}
return builder.finish();
}
项目:Elasticsearch
文件:CompletionTokenStream.java
@Override
public boolean incrementToken() throws IOException {
clearattributes();
if (finiteStrings == null) {
Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
if (strings.size() > MAX_PATHS) {
throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
+ " finite strings are supported");
}
posInc = strings.size();
finiteStrings = strings.iterator();
}
if (finiteStrings.hasNext()) {
posAttr.setPositionIncrement(posInc);
/*
* this posInc encodes the number of paths that this surface form
* produced. Multi Fields have the same surface form and therefore sum up
*/
posInc = 0;
Util.toBytesRef(finiteStrings.next(),bytesAtt.builder()); // Now we have UTF-8
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
}
if (payload != null) {
payloadAttr.setPayload(this.payload);
}
return true;
}
return false;
}
项目:search
文件:MemoryDocValuesProducer.java
@Override
public void seekExact(long ord) throws IOException {
// Todo: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst,scratchInts);
// Todo: we Could do this lazily,better to try to push into FSTEnum though?
in.seekExact(Util.toBytesRef(output,new BytesRefBuilder()));
}
项目:search
文件:FSTTermsReader.java
/** Load frame for target arc(node) on fst,so that
* arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label,Frame top,Frame frame) throws IOException {
FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
arc = Util.readCeilArc(label,fst,top.fstArc,arc,fstReader);
if (arc == null) {
return null;
}
frame.fsaState = fsa.step(top.fsaState,arc.label);
//if (TEST) System.out.println(" loadCeil frame="+frame);
if (frame.fsaState == -1) {
return loadNextFrame(top,frame);
}
return frame;
}
项目:search
文件:FSTOrdTermsWriter.java
@Override
public void finishTerm(BytesRef text,TermStats stats) throws IOException {
if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
bufferSkip();
}
// write term Meta data into fst
final long longs[] = new long[longsSize];
final long delta = stats.totalTermFreq - stats.docFreq;
if (stats.totalTermFreq > 0) {
if (delta == 0) {
statsOut.writeVInt(stats.docFreq<<1|1);
} else {
statsOut.writeVInt(stats.docFreq<<1|0);
statsOut.writeVLong(stats.totalTermFreq-stats.docFreq);
}
} else {
statsOut.writeVInt(stats.docFreq);
}
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(longs,MetaBytesOut,true);
for (int i = 0; i < longsSize; i++) {
MetaLongsOut.writeVLong(longs[i] - lastLongs[i]);
lastLongs[i] = longs[i];
}
MetaLongsOut.writeVLong(MetaBytesOut.getFilePointer() - lastMetaBytesFP);
builder.add(Util.toIntsRef(text,numTerms);
numTerms++;
lastMetaBytesFP = MetaBytesOut.getFilePointer();
}
项目:search
文件:FSTOrdTermsReader.java
/** Load frame for target arc(node) on fst,Frame frame) throws IOException {
FST.Arc<Long> arc = frame.arc;
arc = Util.readCeilArc(label,top.arc,fstReader);
if (arc == null) {
return null;
}
frame.state = fsa.step(top.state,arc.label);
//if (TEST) System.out.println(" loadCeil frame="+frame);
if (frame.state == -1) {
return loadNextFrame(top,frame);
}
return frame;
}
项目:search
文件:OrdsBlockTreeTermsWriter.java
private void append(Builder<Output> builder,FST<Output> subIndex,long termOrdOffset,IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Output> indexEnt;
while ((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
Output output = indexEnt.output;
long blockTermCount = output.endOrd - output.startOrd + 1;
Output newOutput = FST_OUTPUTS.newOutput(output.bytes,termOrdOffset+output.startOrd,output.endOrd-termOrdOffset);
//System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
builder.add(Util.toIntsRef(indexEnt.input,newOutput);
}
}
项目:search
文件:VariableGapTermsIndexReader.java
private void loadTermsIndex() throws IOException {
if (fst == null) {
IndexInput clone = in.clone();
clone.seek(indexStart);
fst = new FST<>(clone,fstOutputs);
clone.close();
/*
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
Util.todot(fst,w,false,false);
System.out.println("FST INDEX: SAVED to " + dotFileName);
w.close();
*/
if (indexDivisor > 1) {
// subsample
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1,outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
BytesRefFSTEnum.InputOutput<Long> result;
int count = indexDivisor;
while((result = fstEnum.next()) != null) {
if (count == indexDivisor) {
builder.add(Util.toIntsRef(result.input,result.output);
count = 0;
}
count++;
}
fst = builder.finish();
}
}
}
项目:search
文件:VersionBlockTreeTermsWriter.java
private void append(Builder<Pair<BytesRef,Long>> builder,FST<Pair<BytesRef,Long>> subIndex,IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input,indexEnt.output);
}
}
项目:search
文件:Dictionary.java
private FST<IntsRef> affixFST(TreeMap<String,output);
}
return builder.finish();
}
项目:search
文件:BlockTreeTermsWriter.java
private void append(Builder<BytesRef> builder,indexEnt.output);
}
}
项目:search
文件:Lucene42DocValuesProducer.java
@Override
public void seekExact(long ord) throws IOException {
// Todo: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst,better to try to push into FSTEnum though?
in.seekExact(scratchBytes.get());
}