问题描述
我目前正在尝试CS50 pset6。在我的代码中,我试图读取文本并计算文本中某个DNA或STR链的最长运行时间,以便可以在CSV文件中找到匹配的DNA。谁能告诉我为什么在CS50 IDE上运行程序时,它只返回闪烁的光标?我认为自己的计算功能有问题,但是我不确定为什么。任何建议将不胜感激。
john/some-random-stuff
这是tmp的DNA序列:
AAGGTAAGTTTAGAATATAAAAAAATGTGAGTTAAATAGAATAGGTTAAAATTAAAGGAGATCAGATCAGATCAGATCTATCTATCTATCTATCTATCTATCAGAAAAGAGTAAATAGTTAAAGAGTAAGATATTGAATTAATGGAAAATATTGTTGGGGAAAAAAGGAGGGATAGAAGG
这是大CSV文件的10行:
from sys import argv,exit
import csv
def getstring(argv):
with open(argv[2]) as f:
str = f.read()
return str
def compute(tmp):
aatg = gata = tatc = gaaa = tctg = agatc = tctag = ttttttct = 0
#Check if DNA Sequence is AATG,GATA,TATC,GAAA,TCTG
i = 0
j = 5
length = len(tmp)
while (j <= (length - 5)):
AAT = GAT = TAT = GAA = TCT = 0
if tmp[i:j] == "AATG":
AAT += 1
i = i + 5
j = i + 5
elif tmp[i:j] != "AATG":
i = i - 5
j = i + 5
if AAT > aatg:
aatg = AAT
if tmp[i:j] == "GATA":
GAT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "GATA":
i = i - 5
j = i + 5
if GAT > gata:
gata = GAT
if tmp[i:j] == "TATC":
TAT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "TATC":
i = i - 5
j = i + 5
if TAT > tatc:
tatc = TAT
if tmp[i:j] == "GAAA":
GAA += 1
i = i + 5
j = i + 5
if tmp[i:j] != "GAAA":
i = i - 5
j = i + 5
if GAA > gaaa:
gaaa = GAA
if tmp[i:j] == "TCTG":
TCT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "TCTG":
i = i - 4
j = i + 5
if TCT > tctg:
tctg = TCT
#Check if DNA Sequence is AGATC or TCTAG
b = 6
while (b <= (length - 6)):
AGA = TCTA = 0
if tmp[i:b] == "AGATC":
AGA += 1
i = i + 6
b = i + 6
elif tmp[i:b] != "AGATC":
i = i - 6
b = i + 6
if AGA > agatc:
agatc = AGA
if tmp[i:b] == "TCTAG":
TCTA += 1
i = i + 6
b = i + 6
if tmp[i:b] != "TCTAG":
i = i - 5
b = i + 6
if TCTA > tctag:
tctag = TCTA
#Check if DNA Sequence is TTTTTTCT
d = 9
while (d <= (length - 9)):
TTT = 0
if tmp[i:d] == "TTTTTTCT":
TTT += 1
i = i + 9
d = i + 9
elif tmp[i:d] != "TTTTTTCT":
i = i - 8
d = i + 9
if TTT > ttttttct:
ttttttct = TTT
return aatg,gata,tatc,gaaa,tctg,agatc,tctag,ttttttct
def main():
if len(argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit(1)
tmp = getstring(argv)
AATG,TCTG,AGATC,TCTAG,TTTTTTCT = compute(tmp)
print(f"{AATG},{GATA},{TATC}") #willdeletelater
#Store STRCOUNT in dictionary
STRCOUNT = {
'AGATC' : AGATC,'TTTTTTCT' : TTTTTTCT,'AATG' : AATG,'TCTAG' : TCTAG,'GATA' : GATA,'TATC' : TATC,'GAAA' : GAAA,'TCTG' : TCTG
}
#Only large csv has all the STR dna sequence,small csv only has AGATC,AATG,TATC
with open(argv[1],"r") as CSVfile:
read = csv.reader(CSVfile)
c=0
field={}
for row in read:
field[c] = row
c += 1
ncolumns = len (field[0])
CSVfile.seek(0)
reader = csv.DictReader(CSVfile)
for row in reader:
if ncolumns == 4:
if row['AGATC'] == STRCOUNT['AGATC'] and row['AATG'] == STRCOUNT['AATG'] and row['TATC'] == STRCOUNT['TATC']:
print(f"{row['name']}")
exit (0)
elif ncolumns != 4:
if row['AGATC'] == STRCOUNT['AGATC'] and row['TTTTTTCT'] == STRCOUNT['TTTTTTCT'] and row['AATG'] == STRCOUNT['AATG'] and row['TCTAG'] == STRCOUNT['TCTAG'] and row['GATA'] == STRCOUNT['GATA'] and row['TATC'] == STRCOUNT['TATC'] and row['GAAA'] == STRCOUNT['GAAA'] and row['TCTG'] == STRCOUNT['TCTG']:
print(f"{row['name']}")
exit (0)
print("No match")
exit (1)
main ()
这里是PSET6 DNA规格的链接: https://cs50.harvard.edu/x/2020/psets/6/dna/#:~:text=python%20dna.py%20databases/small.csv%20sequences/1.txt
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)