问题描述
我有包含标题、作者和类型的文档。我已将这些文件编入索引如下。我不确定是否应该为此使用多值和分层方面。
# Imports
import lucene
from org.apache.lucene.index import IndexWriter,IndexWriterConfig,DirectoryReader
from org.apache.lucene.store import SimpleFSDirectory
from java.nio.file import Paths
from org.apache.lucene.analysis.en import EnglishAnalyzer
from org.apache.lucene.facet import Facets,FacetField,FacetResult,FacetsConfig,FacetsCollector
from org.apache.lucene.facet.taxonomy import FastTaxonomyFacetCounts
from org.apache.lucene.facet.taxonomy.directory import DirectoryTaxonomyWriter,DirectoryTaxonomyReader
from org.apache.lucene.facet import DrillSideways,DrillDownQuery
from org.apache.luce3ne.document import Document,Field,TextField
from org.apache.lucene.search import BooleanClause,IndexSearcher
from org.apache.lucene.queryparser.classic import QueryParser
# Indexing
index_path = "../index_try"
taxo_path = "../taxo_try"
lucene.initVM()
config = IndexWriterConfig(EnglishAnalyzer())
config.setopenMode(IndexWriterConfig.OpenMode.CREATE)
facets_config = FacetsConfig()
facets_config.setHierarchical("author_type",True)
facets_config.setMultiValued("author_type",True)
titles = ["the quick fox","the quick brown","the quick brown fox"]
author_types = [[["a","book"],["b","book"]],[["b","cd"],["c","cd"]],[["c","dvd"],["d","dvd"]]]
taxo_store = SimpleFSDirectory(Paths.get(taxo_path))
taxo_writer = DirectoryTaxonomyWriter(taxo_store,IndexWriterConfig.OpenMode.CREATE)
index_store = SimpleFSDirectory(Paths.get(index_path))
index_writer = IndexWriter(index_store,config)
for idx,title in enumerate(titles):
doc = Document()
doc.add(Field("title",title,TextField.TYPE_STORED))
for author_type in author_types[idx]:
doc.add(FacetField("author_type",author_type))
index_writer.addDocument(facets_config.build(taxo_writer,doc))
taxo_writer.close()
taxo_store.close()
index_writer.close()
index_store.close()
当我使用查询进行搜索时,我想按标题类型查看每个作者的标题计数。
#Searching
taxo_store = SimpleFSDirectory(Paths.get(taxo_path))
taxo_reader = DirectoryTaxonomyReader(taxo_store)
index_store = SimpleFSDirectory(Paths.get(index_path))
index_reader = DirectoryReader.open(store)
searcher = IndexSearcher(reader)
query = QueryParser("title",EnglishAnalyzer())
parsed_query = query.parse("""(title: quick)""")
drill_down_query = DrillDownQuery(facets_config,parsed_query)
facets_collector = FacetsCollector()
FacetsCollector.search(searcher,drill_down_query,100,facets_collector)
facets = FastTaxonomyFacetCounts(taxo_reader,facets_config,facets_collector)
results = facets.getTopChildren(1000,"author_type")
results
包含以下内容。
<FacetResult: dim=author_type path=[] value=-1 childCount=4
b (2)
c (2)
a (1)
d (1)
>
我希望看到 b (2)
实际上由一本书和一张 cd 组成,而 c (2)
是一张 cd 和一张 dvd。
我如何实现这一目标?任何帮助都会很棒,谢谢!
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)