MySQL Connector + Python的内存泄漏

问题描述

我在Windows 10(i7-9700K cpu 3.60 GHz)ram 16 Go上将MysqL 5.7.24与Laragon一起使用。 通过Anaconda + MysqL Connector 8.0.18的Python 3.7。 我正在对数据库进行一些查询和更新,该数据库有1200万行用于最大的表。

我创建了一个循环(带有缓冲的和准备好的游标),该循环当时占用1000行,然后进行计算和更新,然后关闭连接。最后,如果数据库中仍有行要更新(我为此有一个布尔值),则循环将调用自身,每个循环都将关闭每个游标和连接,并使用新的连接重新启动。

但是,内存永远不会释放,并且每次4到5个小时后我的PC都会冻结。 我已经阅读了有关该主题的一些信息,但是尚不清楚是错误还是其他原因。

您有什么建议?谢谢

编辑:这是我的代码之一,它可以清除数据库中的一些马名。

import MysqL.connector
from MysqL.connector import errorcode


def main():
    CONNECTION_STRING = {
      'user': 'user','passwd': 'pass','host': 'localhost','database': 'my_database','raise_on_warnings': True
    }
    
    try:
        cnx = MysqL.connector.connect(**CONNECTION_STRING)
    except MysqL.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Something is wrong with your user name or password")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist")
        else:
            print(err)
    else:
        print("Vous êtes connecté")
        curGet = cnx.cursor(buffered=True)
        curU = cnx.cursor(prepared=True)
        curS2 = cnx.cursor()
        curS3 = cnx.cursor()
    
    # queries
        get_cheval = ('''
                SELECT cheval.id as cId,cheval.nom_pt as nPt
                FROM cheval
                WHERE nom_pt IS NOT NULL
                AND nom IS NULL 
                LIMIT 1000
        ;''')
    
        update_cheval = ('''
                    UPDATE cheval
                    SET nom = %s,pays_id = %s
                    WHERE id = %s     
        ;''')
    
        select_contry_alpah2 = ('''
            SELECT id FROM pays
            WHERE alpha2_pt = %s
        ;''')
    
        select_contry_alpha3 = ('''
            SELECT id pId FROM pays
            WHERE alpha3_pt = %s
        ;''')
    
        curGet.execute(get_cheval)
        list_alpha2_ignore = ['II','IV','VI','JR',]
        list_alpha3_ignore = ['VII','III',"J'R",'XYZ']
        is_there_row = False  # for the loop
        for (cId,nPt) in curGet:
            is_there_row = True
            split_nom_pt = nPt.strip().split(' ')
            print(split_nom_pt)
            contry = split_nom_pt[-1].replace('(','').replace(')','')
    
            if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
                curS3.execute(select_contry_alpha3,(contry,))
                pays_id_tmp = curS3.fetchone()
                if pays_id_tmp is not None:
                    pays_id = pays_id_tmp[0]
                else:
                    pays_id = None
                if pays_id is not None:
                    nom = ' '.join(split_nom_pt[:-1])
                else:
                    nom = None
    
            elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
                curS2.execute(select_contry_alpah2,))
                pays_id_tmp = curS2.fetchone()
                if pays_id_tmp is not None:
                    pays_id = pays_id_tmp[0]
                else:
                    pays_id = None
                if pays_id is not None:
                    nom = ' '.join(split_nom_pt[:-1])
                else:
                    nom = None
    
            else:
                pays_id = 75
                nom = nPt.strip()
    
            curU.execute(update_cheval,(nom,pays_id,cId))
            cnx.commit()
            print('updated:' + str(nom) + '; ' + str(pays_id))
    
        curGet.close()
        curS2.close()
        curS3.close()
        curU.close()
        cnx.close()

        # LOOP
        if is_there_row:
            is_there_row = False
            main()
        else:
            print('End')


if __name__ == '__main__':
    main()

解决方法

正如我在评论中说的那样,代码正在泄漏内存,因为您只能在所有调用返回后才完成第一个main,考虑到无法从内存中删除函数变量。

import mysql.connector
from mysql.connector import errorcode

# First suggestion: Move constants outside of the function
# this way they'll be created only once
connection_string = {
      'user': 'user','passwd': 'pass','host': 'localhost','database': 'my_database','raise_on_warnings': True
    }
# queries
get_cheval = ('SELECT cheval.id as cId,cheval.nom_pt as nPt'
              'FROM cheval'
              'WHERE nom_pt IS NOT NULL'
              'AND nom IS NULL'
              'LIMIT 1000;') # Maybe you're missing an offset here? This query should always return the same first 1000 results. (Maybe you're updating the horses so that they don't match the query thought) 

update_cheval = ('UPDATE cheval'
                 'SET nom = %s,pays_id = %s'
                 'WHERE id = %s;')

select_contry_alpah2 = ('SELECT id FROM pays'
                        'WHERE alpha2_pt = %s;')

select_contry_alpha3 = ('SELECT id pId FROM pays'
                        'WHERE alpha3_pt = %s;')


def main():
    done=False
    while not done:
        try:
            cnx = mysql.connector.connect(**connection_string)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                print("Something is wrong with your user name or password")
            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                print("Database does not exist")
            else:
                print(err)
            continue # Go back and try again
            
        print("Vous êtes connecté")
        curGet = cnx.cursor(buffered=True)
        curU = cnx.cursor(prepared=True)
        curS2 = cnx.cursor()
        curS3 = cnx.cursor()
        
        while cnx.is_connected():
            curGet.execute(get_cheval)
            result = curGet.fetchone()
            if result is None:
                done = True 
                break;
            list_alpha2_ignore = ['II','IV','VI','JR',]
            list_alpha3_ignore = ['VII','III',"J'R",'XYZ']
            for (cId,nPt) in curGet:
                split_nom_pt = nPt.strip().split(' ')
                print(split_nom_pt)
                contry = split_nom_pt[-1].replace('(','').replace(')','')
        
                if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
                    curS3.execute(select_contry_alpha3,(contry,))
                    pays_id_tmp = curS3.fetchone()
                    if pays_id_tmp is not None:
                        pays_id = pays_id_tmp[0]
                    else:
                        pays_id = None
                    if pays_id is not None:
                        nom = ' '.join(split_nom_pt[:-1])
                    else:
                        nom = None
        
                elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
                    curS2.execute(select_contry_alpah2,))
                    pays_id_tmp = curS2.fetchone()
                    if pays_id_tmp is not None:
                        pays_id = pays_id_tmp[0]
                    else:
                        pays_id = None
                    if pays_id is not None:
                        nom = ' '.join(split_nom_pt[:-1])
                    else:
                        nom = None
        
                else:
                    pays_id = 75
                    nom = nPt.strip()
          
                curU.execute(update_cheval,(nom,pays_id,cId))
                cnx.commit()
                print('updated:' + str(nom) + '; ' + str(pays_id))     
        
        curGet.close()
        curS2.close()
        curS3.close()
        curU.close()
        cnx.close()


if __name__ == '__main__':
    main()