无法使用 ecs-conn_error 将文件从 ftp 传输到 aws s3:无法连接 FTP 服务器

问题描述

我需要将文件从 ftp 服务器传输到我的 aws s3 存储桶。 我有一个 python 脚本,当我在本地机器上执行它时可以工作。 但是当我使用 amazon ecs 应用相同的脚本时,它给了我一个错误 “无法连接 FTP 服务器”。

请注意,白名单是在防火墙上完成的,因此无论在 ecs 上运行的 Python 代码如何,都无需再次将 url 列入白名单。我还有一个requirement.txt,其中包含成功安装的所需模块和依赖项。我还创建了一个图像并用它来运行任务定义。

脚本如下:

import math
import time
import io
import boto3
import paramiko


S3_BUCKET_NAME = "mybucket.dev"

FTP_HOST = "dataneeded.abc.com"
FTP_PORT = 21
FTP_USERNAME = "*****"
FTP_PASSWORD = "**********"
FTP_FILE_PATH = "/path1/path2/path3/file.zip"
S3_FILE_PATH = "subfolder/file.zip"

CHUNK_SIZE = 6291456


def open_ftp_connection(ftp_host,ftp_port,ftp_username,ftp_password):
    """
    Opens ftp connection and returns connection object
    """
    client = paramiko.SSHClient()
    client.load_system_host_keys()
    try:
        transport = paramiko.Transport(ftp_host,ftp_port)
    except Exception as e:
        return "conn_error"
    try:
        transport.connect(username=ftp_username,password=ftp_password)
    except Exception as identifier:
        return "auth_error"
    ftp_connection = paramiko.SFTPClient.from_transport(transport)
    return ftp_connection


def transfer_chunk_from_ftp_to_s3(
    ftp_file,s3_connection,multipart_upload,bucket_name,ftp_file_path,s3_file_path,part_number,chunk_size,):
    start_time = time.time()
    chunk = ftp_file.read(int(chunk_size))
    part = s3_connection.upload_part(
        Bucket = bucket_name,Key = s3_file_path,PartNumber = part_number,UploadId = multipart_upload["UploadId"],Body = chunk,)
    end_time = time.time()
    total_seconds = end_time - start_time
    print(
        "speed is {} kb/s total seconds taken {}".format(
            math.ceil((int(chunk_size) / 1024) / total_seconds),total_seconds
        )
    )
    part_output = {"PartNumber": part_number,"ETag": part["ETag"]}
    return part_output


def transfer_file_from_ftp_to_s3(
    bucket_name,#is the path from the root directory of the FTP server to the file,with the file name
    s3_file_path,#is the path starting from the root of the S3 bucket,including the file name
    ftp_username,ftp_password,chunk_size
):
    ftp_connection = open_ftp_connection(
        FTP_HOST,int(FTP_PORT),ftp_password
    )
    ftp_file = ftp_connection.file(ftp_file_path,"r")
    s3_connection = boto3.client("s3")
    ftp_file_size = ftp_file._get_size()
    
    #avoid duplicate
    try:
        s3_file = s3_connection.head_object(Bucket=bucket_name,Key=s3_file_path)
        if s3_file["ContentLength"] == ftp_file_size:
            print("File Already Exists in S3 bucket")
            ftp_file.close()
            return
    except Exception as e:
        pass
    if ftp_file_size <= int(chunk_size):
        # upload file in one go
        print("Transferring complete File from FTP to S3...")
        ftp_file_data = ftp_file.read()
        ftp_file_data_bytes = io.BytesIO(ftp_file_data)
        s3_connection.upload_fileobj(ftp_file_data_bytes,s3_file_path)
        print("Successfully Transferred file from FTP to S3!")
        ftp_file.close()

    else:
        print("Transferring File from FTP to S3 in chunks...")
        # upload file in chunks
        chunk_count = int(math.ceil(ftp_file_size / float(chunk_size)))
        multipart_upload = s3_connection.create_multipart_upload(
            Bucket=bucket_name,Key=s3_file_path
        )
        parts = []
        for i in range(chunk_count):
            print("Transferring chunk {}...".format(i + 1))
            part = transfer_chunk_from_ftp_to_s3(
                ftp_file,i + 1,)
            parts.append(part)
            print("Chunk {} Transferred Successfully!".format(i + 1))

        part_info = {"Parts": parts}
        s3_connection.complete_multipart_upload(
            Bucket=bucket_name,Key=s3_file_path,UploadId=multipart_upload["UploadId"],MultipartUpload=part_info,)
        print("All chunks Transferred to S3 bucket! File Transfer successful!")
        ftp_file.close()


if __name__ == "__main__":
    ftp_username = FTP_USERNAME
    ftp_password = FTP_PASSWORD
    ftp_file_path = FTP_FILE_PATH #str(input("Enter file path located on FTP server: "))
    s3_file_path = S3_FILE_PATH #str(input("Enter file path to upload to s3: "))
    ftp_connection = open_ftp_connection(
        FTP_HOST,ftp_password
    )
    if ftp_connection == "conn_error":
        print("Failed to connect FTP Server!")
    elif ftp_connection == "auth_error":
        print("Incorrect username or password!")
    else:
        try:
            ftp_file = ftp_connection.file(ftp_file_path,"r")
        except Exception as e:
            print("File does not exists on FTP Server!")
        transfer_file_from_ftp_to_s3(
            S3_BUCKET_NAME,CHUNK_SIZE,)

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)