org.apache.lucene.util.fst.Util的实例源码

项目:elasticsearch_my    文件XAnalyzingSuggester.java   
public void finishTerm(long defaultWeight) throws IOException {
    ArrayUtil.timsort(surfaceFormsAndPayload,count);
    int deduplicator = 0;
    analyzed.append((byte) 0);
    analyzed.setLength(analyzed.length() + 1);
    analyzed.grow(analyzed.length());
    for (int i = 0; i < count; i++) {
        analyzed.setByteAt(analyzed.length() - 1,(byte) deduplicator++);
        Util.toIntsRef(analyzed.get(),scratchInts);
        SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
        long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE,defaultWeight)) : candiate.weight;
        builder.add(scratchInts.get(),outputs.newPair(cost,candiate.payload));
    }
    seenSurfaceForms.clear();
    count = 0;
}
项目:lams    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(),scratch),new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:lams    文件Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader,int num) throws IOException,ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid Syntax: " + line,reader.getLineNumber());
    }
    if (mappings.put(parts[1],parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2,outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(),scratchInts);
    builder.add(scratchInts.get(),new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:Elasticsearch    文件XAnalyzingSuggester.java   
public void finishTerm(long defaultWeight) throws IOException {
    ArrayUtil.timsort(surfaceFormsAndPayload,candiate.payload));
    }
    seenSurfaceForms.clear();
    count = 0;
}
项目:search    文件FSTTermsWriter.java   
@Override
public void finishTerm(BytesRef text,TermStats stats) throws IOException {
  // write term Meta data into fst
  final BlockTermState state = postingsWriter.newTermState();
  final FSTTermOutputs.TermData Meta = new FSTTermOutputs.TermData();
  Meta.longs = new long[longsSize];
  Meta.bytes = null;
  Meta.docFreq = state.docFreq = stats.docFreq;
  Meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);
  postingsWriter.encodeTerm(Meta.longs,MetaWriter,fieldInfo,state,true);
  final int bytesSize = (int)MetaWriter.getFilePointer();
  if (bytesSize > 0) {
    Meta.bytes = new byte[bytesSize];
    MetaWriter.writeto(Meta.bytes,0);
    MetaWriter.reset();
  }
  builder.add(Util.toIntsRef(text,scratchTerm),Meta);
  numTerms++;
}
项目:search    文件MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
  Meta.writeVInt(field.number);
  Meta.writeByte(FST);
  Meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1,outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:search    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:search    文件Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader,new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:search    文件Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:NYBC    文件WFSTCompletionLookup.java   
@Override
public void build(TermFreqIterator iterator) throws IOException {
  BytesRef scratch = new BytesRef();
  TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef prevIoUs = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,outputs);
  while ((scratch = iter.next()) != null) {
    long cost = iter.weight();

    if (prevIoUs == null) {
      prevIoUs = new BytesRef();
    } else if (scratch.equals(prevIoUs)) {
      continue; // for duplicate suggestions,the best weight is actually
                // added
    }
    Util.toIntsRef(scratch,scratchInts);
    builder.add(scratchInts,cost);
    prevIoUs.copyBytes(scratch);
  }
  fst = builder.finish();
}
项目:NYBC    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:NYBC    文件Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
  Meta.writeVInt(field.number);
  Meta.writeByte(FST);
  Meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1,outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:read-open-source-code    文件FSTTermsWriter.java   
@Override
public void finishTerm(BytesRef text,Meta);
  numTerms++;
}
项目:read-open-source-code    文件MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field,Iterable<BytesRef> values) throws IOException {
  Meta.writeVInt(field.number);
  Meta.writeByte(FST);
  Meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:read-open-source-code    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:read-open-source-code    文件MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:read-open-source-code    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:read-open-source-code    文件Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader,new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:Maskana-Gestor-de-Conocimiento    文件MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:Maskana-Gestor-de-Conocimiento    文件WFSTCompletionLookup.java   
@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  BytesRef scratch = new BytesRef();
  InputIterator iter = new WFSTInputIterator(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef prevIoUs = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,cost);
    prevIoUs.copyBytes(scratch);
  }
  fst = builder.finish();
}
项目:Maskana-Gestor-de-Conocimiento    文件normalizeCharMap.java   
/** Builds the normalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public normalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2,new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new normalizeCharMap(map);
}
项目:Maskana-Gestor-de-Conocimiento    文件Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field,ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  Meta.writeVLong(ord);
}
项目:lams    文件BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder,FST<BytesRef> subIndex,IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input,scratchIntsRef),indexEnt.output);
  }
}
项目:lams    文件Lucene42DocValuesProducer.java   
@Override
public void seekExact(long ord) throws IOException {
  // Todo: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst,ord,bytesReader,firstArc,scratchArc,scratchInts);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  scratchBytes.clear();
  Util.toBytesRef(output,scratchBytes);
  // Todo: we Could do this lazily,better to try to push into FSTEnum though?
  in.seekExact(scratchBytes.get());
}
项目:lams    文件Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4,outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(),scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(),output);
  }
  return builder.finish();
}
项目:Elasticsearch    文件CompletionTokenStream.java   
@Override
public boolean incrementToken() throws IOException {
    clearattributes();
    if (finiteStrings == null) {
        Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);

        if (strings.size() > MAX_PATHS) {
            throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
                    + " finite strings are supported");
        }
        posInc = strings.size();
        finiteStrings = strings.iterator();
    }
    if (finiteStrings.hasNext()) {
        posAttr.setPositionIncrement(posInc);
        /*
         * this posInc encodes the number of paths that this surface form
         * produced. Multi Fields have the same surface form and therefore sum up
         */
        posInc = 0;
        Util.toBytesRef(finiteStrings.next(),bytesAtt.builder()); // Now we have UTF-8
        if (charTermAttribute != null) {
            charTermAttribute.setLength(0);
            charTermAttribute.append(bytesAtt.toUTF16());
        }
        if (payload != null) {
            payloadAttr.setPayload(this.payload);
        }
        return true;
    }

    return false;
}
项目:search    文件MemoryDocValuesProducer.java   
@Override
public void seekExact(long ord) throws IOException {
  // Todo: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst,scratchInts);
  // Todo: we Could do this lazily,better to try to push into FSTEnum though?
  in.seekExact(Util.toBytesRef(output,new BytesRefBuilder()));
}
项目:search    文件FSTTermsReader.java   
/** Load frame for target arc(node) on fst,so that 
 *  arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label,Frame top,Frame frame) throws IOException {
  FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
  arc = Util.readCeilArc(label,fst,top.fstArc,arc,fstReader);
  if (arc == null) {
    return null;
  }
  frame.fsaState = fsa.step(top.fsaState,arc.label);
  //if (TEST) System.out.println(" loadCeil frame="+frame);
  if (frame.fsaState == -1) {
    return loadNextFrame(top,frame);
  }
  return frame;
}
项目:search    文件FSTOrdTermsWriter.java   
@Override
public void finishTerm(BytesRef text,TermStats stats) throws IOException {
  if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
    bufferSkip();
  }
  // write term Meta data into fst
  final long longs[] = new long[longsSize];
  final long delta = stats.totalTermFreq - stats.docFreq;
  if (stats.totalTermFreq > 0) {
    if (delta == 0) {
      statsOut.writeVInt(stats.docFreq<<1|1);
    } else {
      statsOut.writeVInt(stats.docFreq<<1|0);
      statsOut.writeVLong(stats.totalTermFreq-stats.docFreq);
    }
  } else {
    statsOut.writeVInt(stats.docFreq);
  }
  BlockTermState state = postingsWriter.newTermState();
  state.docFreq = stats.docFreq;
  state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);
  postingsWriter.encodeTerm(longs,MetaBytesOut,true);
  for (int i = 0; i < longsSize; i++) {
    MetaLongsOut.writeVLong(longs[i] - lastLongs[i]);
    lastLongs[i] = longs[i];
  }
  MetaLongsOut.writeVLong(MetaBytesOut.getFilePointer() - lastMetaBytesFP);

  builder.add(Util.toIntsRef(text,numTerms);
  numTerms++;

  lastMetaBytesFP = MetaBytesOut.getFilePointer();
}
项目:search    文件FSTOrdTermsReader.java   
/** Load frame for target arc(node) on fst,Frame frame) throws IOException {
  FST.Arc<Long> arc = frame.arc;
  arc = Util.readCeilArc(label,top.arc,fstReader);
  if (arc == null) {
    return null;
  }
  frame.state = fsa.step(top.state,arc.label);
  //if (TEST) System.out.println(" loadCeil frame="+frame);
  if (frame.state == -1) {
    return loadNextFrame(top,frame);
  }
  return frame;
}
项目:search    文件OrdsBlockTreeTermsWriter.java   
private void append(Builder<Output> builder,FST<Output> subIndex,long termOrdOffset,IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Output> indexEnt;
  while ((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    Output output = indexEnt.output;
    long blockTermCount = output.endOrd - output.startOrd + 1;
    Output newOutput = FST_OUTPUTS.newOutput(output.bytes,termOrdOffset+output.startOrd,output.endOrd-termOrdOffset);
    //System.out.println("  append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount  + " newOutput=" + newOutput  + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
    builder.add(Util.toIntsRef(indexEnt.input,newOutput);
  }
}
项目:search    文件VariableGapTermsIndexReader.java   
private void loadTermsIndex() throws IOException {
  if (fst == null) {
    IndexInput clone = in.clone();
    clone.seek(indexStart);
    fst = new FST<>(clone,fstOutputs);
    clone.close();

    /*
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.todot(fst,w,false,false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    */

    if (indexDivisor > 1) {
      // subsample
      final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
      final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1,outputs);
      final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
      BytesRefFSTEnum.InputOutput<Long> result;
      int count = indexDivisor;
      while((result = fstEnum.next()) != null) {
        if (count == indexDivisor) {
          builder.add(Util.toIntsRef(result.input,result.output);
          count = 0;
        }
        count++;
      }
      fst = builder.finish();
    }
  }
}
项目:search    文件VersionBlockTreeTermsWriter.java   
private void append(Builder<Pair<BytesRef,Long>> builder,FST<Pair<BytesRef,Long>> subIndex,IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input,indexEnt.output);
  }
}
项目:search    文件Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,output);
  }
  return builder.finish();
}
项目:search    文件BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder,indexEnt.output);
  }
}
项目:search    文件Lucene42DocValuesProducer.java   
@Override
public void seekExact(long ord) throws IOException {
  // Todo: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst,better to try to push into FSTEnum though?
  in.seekExact(scratchBytes.get());
}

相关文章

买水果
比较全面的redis工具类
gson 反序列化到多态子类
java 版本的 mb_strwidth
JAVA 反转字符串的最快方法,大概比StringBuffer.reverse()性...
com.google.gson.internal.bind.ArrayTypeAdapter的实例源码...