问题描述
情况是我将 df 加载到 es 中,df 有两列:'url' & 'text'。
我用名为“forbidden_words”的列表中的值查询“text”。
我想让res也能呈现两列,一列是“url”,一列是“forbidden_words”,在正文中搜索。
如果有任何帮助,将非常感谢!
for i in forbidden_words:
dsll = {
'query': {
'match': {
'text': i
}
},"_source": {
"includes": forbidden_words,# "excludes": []
}
}
res = es.search(index='test',body=dsll)
res 的结果:
{'took': 25,'timed_out': False,'_shards': {'total': 5,'successful': 5,'skipped': 0,'Failed': 0},'hits': {'total': 26,'max_score': 3.211111,'hits': [{'_index': 'test','_type': 'test','_id': 'ml5utHcBcazm5fCndKUY','_score': 3.211111,'_source': {}},....
{'_index': 'test','_id': 'oV5utHcBcazm5fCndKUY','_score': 1.2800283,'_source': {}}]}}
解决方法
String fileName1 = "";
Document document = new Document();
// Location to save
fileName1 = "TEST" + ".pdf";
String dest = context.getExternalFilesDir(null) + "/";
File dir = new File(dest);
if (!dir.exists())
dir.mkdirs();
try {
File file = new File(dest,fileName);
file.createNewFile();
FileOutputStream fOut = new FileOutputStream(file,false);
PdfWriter.getInstance(document,fOut);
} catch (DocumentException e) {
e.printStackTrace();
Log.v("PdfError",e.toString());
} catch (FileNotFoundException e) {
e.printStackTrace();
Log.v("PdfError",e.toString());
} catch (IOException e) {
e.printStackTrace();
}
// Open to write
document.open();
document.add(new Paragraph(""));
document.add(new Chunk(""));
} catch (DocumentException e) {
e.printStackTrace();
}
document.close();
File pdfFile = new File(dest+"/"+fileName1);
if (!pdfFile.exists()) {
pdfFile.mkdir();
}
if (pdfFile != null && pdfFile.exists() ) //Checking for the file is exist or not
{
Intent intent = new Intent(Intent.ACTION_VIEW);
Uri mURI = FileProvider.getUriForFile(
context,context.getApplicationContext()
.getPackageName() + ".provider",pdfFile);
intent.setDataAndType(mURI,"application/pdf");
intent.addFlags(Intent.FLAG_ACTIVITY_NO_HISTORY | Intent.FLAG_GRANT_READ_URI_PERMISSION);
try {
context.startActivity(intent);
}
catch (Exception e) {
e.printStackTrace();
}
} else {
Toast.makeText(context,"The file not exists! ",Toast.LENGTH_SHORT).show();
}
需要字段 名称,而不是字段值。所以你只能说:
_source
如果您只想返回与查询匹配的实际单词,请查看highlighting:
{
"query": {
"match": {
"text": "xyz"
}
},"_source": {
"includes": ["text","url"] <--
}
}
请注意,突出显示的值将不再位于 {
"query": {
"match": {
"text": "xyz"
}
},"url"]
},"highlight": {
"fields": {
"text": {}
}
}
}
内,而是位于 _source
内。
result = []
for i in forbidden_words:
dsl = {
'query': {
'match_phrase': {
'text': i
}
}
}
res = es.search(index='cn_web_crawler_test_linux',body=dsl,size=sizee)
for j in res["hits"]["hits"]:
if len(j) > 0:
append_list = (i,j['_source']['url'],j['_source']['text'])
result.append(append_list)