问题描述
我有一个lambda函数,用于解压缩tgz文件,替换内容和文件名,然后将其重新打包为新的tgz文件。如果我从s3下载新文件并在Windows上使用7z或在Ubuntu上使用tar xvzf <new_file_name>.tgz
在本地解压缩,则一切正常。但是,如果我将新的tgz文件sftp到客户,他们会收到utime错误。他们向我发送了这样的日志
cd /usr/.../; tar xvzf </usr/.../new_file_name>.tgz;
./
./A/
./A/a.jpg
./A/b.jpg
./A/c.jpg
./something.xml
tar: .:Cannot utime: Operation not permitted.
tar: Exiting with failure status due to prevIoUs error
我想知道额外的@paxHeader文件是否引起utime错误,因为我从日志文件中可以看到内容已经解压缩。
import os
import sys
import tarfile
import shutil
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client('s3')
accepted_date = os.environ["Acceptable_Date"]
work_dir = '/tmp' # os.getcwd() returns "/var/task"
new_dir = '/tmp/new'
current_date = ''
def lambda_handler(event,context):
for record in event['Records']:
bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
tmpkey = key.replace('/','')
if accepted_date in tmpkey:
print('Already has the correct date. No processing')
else:
download_path = '{}/{}'.format(work_dir,tmpkey)
current_date = get_current_date(tmpkey)
newkey = tmpkey.replace(current_date,accepted_date)
upload_path = '{}/{}'.format(work_dir,newkey.replace('tgz','tar.gz'))
print(work_dir)
s3_client.download_file(bucket,key,download_path)
process(upload_path)
s3_client.upload_file(upload_path,bucket,newkey)
cleanup(download_path)
print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
for entry in os.scandir(work_dir):
print(entry.name)
print('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb')
def process(upload_path):
for entry in os.scandir(work_dir):
if entry.path.endswith(".tgz") and entry.is_file:
print(entry.path)
current_date = get_current_date(entry.name)
decompress(entry.path)
new_name = replace_content(current_date)
compress2(upload_path)
def get_current_date(file_name):
return file_name.split('_')[1]
def decompress(file_name):
tar = tarfile.open(file_name,"r:gz")
tar.extractall(new_dir)
tar.close()
def compress2(output_filename):
# if using tgz as file extension,you cannot decompress it using 7z on Windows.
# Do not add entry by entry. Add the source fold instead.
for entry in os.scandir(new_dir):
print('Included: '+entry.path)
with tarfile.open(output_filename,"w:gz") as tar:
tar.add(new_dir,arcname='.')
def cleanup(output_filename):
for entry in os.scandir(work_dir):
if entry.is_file:
remove(entry.path,False)
else:
remove(entry.path,True)
def replace_content(current_date):
for entry in os.scandir(new_dir):
if entry.path.endswith(".XML") and entry.is_file():
print(entry.path)
new_path = entry.path.replace(current_date,accepted_date)
with open(entry.path,"rt") as old:
with open(new_path,"wt") as new:
for line in old:
new.write(line.replace(current_date,accepted_date))
remove(entry.path,False)
return new_path.replace('XML','tgz')
def remove(path,is_dir):
if os.path.exists(path):
if is_dir:
shutil.rmtree(path)
else:
os.remove(path)
print('Removed: '+path)
else:
print("The file does not exist: "+path)
知道我做错了什么吗?
PS
所有内容文件和tgz文件的文件许可权均为-rwxrwxrwx
,如果我在本地下载并解压缩,则文件所有者为我。
解决方法
试试这个解决方案:
tar = tarfile.open('sample.tar.gz','w:gz',format=tarfile.GNU_FORMAT)