问题描述
我正在尝试使用核苷酸序列进行 BLAST 搜索并打印最佳匹配结果,但不确定应该使用哪个选项/命令。有 max_hpsp
和 best_hit_overhang
之类的选项。我不知道他们的差异,我只想打印 1 个命中。 (最匹配的一个)我应该使用 max_hpsp 1
吗?
我写了这段代码,但它仍然没有用。如果您能告诉我,我错在哪里以及应该怎么做,我将不胜感激:) 谢谢!
from Bio.Blast import NCBIWWW
seq = Seq("GTTGA......CT")
def best_matching_hit(seq):
try:
result_handle = NCBIWWW.qblast("blastn","nt",seq)
except:
print('BLAST run Failed!')
return None
blast_record = NCBIXML.read(result_handle)
for hit in blast_record.alignments:
for hsp in hit.hsps:
if hsp.expect == max_hsps 1:
print(hit.title)
print(hsp.sbjct)
best_matching_hit(seq)
解决方法
这仅返回一个 hit ,我想是第一个,根据
Limiting the number of hits in a Biopython NCBIWWW Search 在映泰:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 7 15:28:11 2021
@author: Pietro
https://stackoverflow.com/questions/67872118/how-to-print-the-best-matching-hit-in-the-blast-search-biopython
"""
from Bio.Blast import NCBIWWW
from Bio.Seq import Seq
seq = Seq("ATGGCGTGGAATGAGCCTGGAAATAACAACGGCAACAATGGCCGCGATAATGACCCTTGGGGTAATAA\
TAATCGTGGTGGCCAGCGTCCTGGTGGCCGAGATCAAGGTCCGCCAGATTTAGATGAAGTGTTCAACAA\
ACTGAGTCAAAAGCTGGGTGGCAAGTTTGGTAAAAAAGGCGGCGGTGGTTCCTCTATCGGCGGTGGCGG\
TGGTGCAATTGGCTTTGGTGTCATTGCGATCATTGCAATTGCGGTGTGGATTTTCGCTGGTTTTTACAC\
CATCGGTGAAGCAGAGCGTGGTGTTGTACTGCGTTTAGGTAAATACGATCGTATCGTAGACCCAGGCCT\
TAACTGGCGTCCTCGTTTTATTGATGAATACGAAGCGGTTAACGTACAAGCGATTCGCTCACTACGTGC\
ATCTGGTCTAATGCTGACGAAAGATGAAAACGTAGTAACGGTTGCAATGGACGTTCAATACCGAGTTGC\
TGACCCATACAAATACCTATACCGCGTGACCAATGCAGATGATAGCTTGCGTCAAGCAACAGACTCTGC\
GCTACGTGCGGTAATTGGTGATTCACTAATGGATAGCATTCTAACCAGTGGTCGTCAGCAAATTCGTCA\
AAGCACTCAAGAAACACTAAACCAAATCATCGATAGCTATGATATGGGTCTGGTGATTGTTGACGTGAA\
CTTCCAGTCTGCACGTCCGCCAGAGCAAGTAAAAGATGCGTTTGATGACGCGATTGCTGCGCGTGAGGA\
TGAAGAGCGTTTCATCCGTGAAGCAGAAGCTTACAAGAACGAAATCTTGCCGAAGGCAACGGGTCGTGC\
TGAACGTTTGAAGAAGGAAGCTCAAGGTTACAACGAGCGTGTAACTAACGAAGCATTAGGTCAAGTAGC\
ACAGTTTGAAAAACTACTACCTGAATACCAAGCGGCTCCTGGCGTAACACGTGACCGTCTGTACATTGA\
CGCGATGGAAGAGGTTTACACCAACACATCTAAAGTGTTGATTGACTCTGAATCAAGCGGCAACCTTTT\
GTACCTACCAATCGATAAATTGGCAGGTCAAGAAGGCCAAACAGACACTAAACGTAAATCGAAATCTTC\
TTCAACCTACGATCACATTCAACTAGAGTCTGAGCGTACACAAGAAGAAACATCGAACACGCAGTCTCG\
TTCAACAGGTACACGTCAAGGGAGATACTAA")
def best_matching_hit(seq):
try:
result_handle = NCBIWWW.qblast("blastn","nt",seq,hitlist_size=1)
except:
print('BLAST run failed!')
return None
blast_record = result_handle.read()
print(blast_record)
best_matching_hit(seq)