以特定格式提取 Excel 数据作为 python 文件的输入

问题描述

大家好, 作为一个项目,我承担了一项任务,我应该从一个 excel 文件提取数据,该文件将作为另一个 python 脚本的输入提供。问题是,提取的数据必须采用特定格式才能使脚本正常工作。以下是到目前为止我在我的 python 脚本中所做的一些细节:

import tkinter as tk
from tkinter import filedialog
import pandas as pd

root= tk.Tk()

canvas1 = tk.Canvas(root,width = 400,height = 400,bg = 'lightsteelblue')
canvas1.pack()

def getfile ():

    global df
    import_file_path = filedialog.askopenfilename()
    df = pd.read_excel(import_file_path)
    df = df.fillna("")
    df = df.values.tolist()
    print(df)


browseButton_Excel = tk.Button(text='Import Excel File',command=getfile,bg='green',fg='white',font=('helvetica',12,'bold'))
canvas1.create_window(200,200,window=browseButton_Excel)

root.mainloop()`

excel 文件如下所示: raw_data

我想从这个excel文件提取以下格式的数据:

jd = [  
    [(2,3304),(4,2942)],[(2,1119),987)],1025),(2,859),961)],1094),916),1025)],576),488),552)],450),384),431)],[(0,2665),(1,1281),(3,3670)],1690),826),2321)],2230),1078),3068)],1495),(5,1220)]  
]

这是我第一次来这里很抱歉如果我错过了什么。提前致谢。

~更新~ 我想提取的数据将作为 Google 或工具中给出的调度问题中“jobs_data”变量的输入。它有以下代码

import collections

from ortools.sat.python import cp_model


def MinimalJobshopSat():

    model = cp_model.CpModel()

    jobs_data = [
        [(2,1220)],]

    machines_count = 1 + max(task[0] for job in jobs_data for task in job)
    all_machines = range(machines_count)

    horizon = sum(task[1] for job in jobs_data for task in job)

    task_type = collections.namedtuple('task_type','start end interval')

    assigned_task_type = collections.namedtuple('assigned_task_type','start job index duration')

    all_tasks = {}
    machine_to_intervals = collections.defaultdict(list)

    for job_id,job in enumerate(jobs_data):
        for task_id,task in enumerate(job):
            machine = task[0]
            duration = task[1]
            suffix = '_%i_%i' % (job_id,task_id)
            start_var = model.NewIntvar(0,horizon,'start' + suffix)
            end_var = model.NewIntvar(0,'end' + suffix)
            interval_var = model.NewIntervalVar(start_var,duration,end_var,'interval' + suffix)
            all_tasks[job_id,task_id] = task_type(start=start_var,end=end_var,interval=interval_var)
            machine_to_intervals[machine].append(interval_var)

    for machine in all_machines:
        model.AddNoOverlap(machine_to_intervals[machine])

    for job_id,job in enumerate(jobs_data):
        for task_id in range(len(job) - 1):
            model.Add(all_tasks[job_id,task_id +
                                1].start >= all_tasks[job_id,task_id].end)

    obj_var = model.NewIntvar(0,'makespan')
    model.AddMaxEquality(obj_var,[
        all_tasks[job_id,len(job) - 1].end
        for job_id,job in enumerate(jobs_data)
    ])
    model.Minimize(obj_var)

    solver = cp_model.cpsolver()
    status = solver.solve(model)

    if status == cp_model.OPTIMAL:
        assigned_jobs = collections.defaultdict(list)
        for job_id,job in enumerate(jobs_data):
            for task_id,task in enumerate(job):
                machine = task[0]
                assigned_jobs[machine].append(
                    assigned_task_type(start=solver.Value(
                        all_tasks[job_id,task_id].start),job=job_id,index=task_id,duration=task[1]))

        output = ''
        for machine in all_machines:
            assigned_jobs[machine].sort()
            sol_line_tasks = 'Machine ' + str(machine) + ': '
            sol_line = '           '

            for assigned_task in assigned_jobs[machine]:
                name = 'job_%i_%i' % (assigned_task.job,assigned_task.index)
                sol_line_tasks += '%-10s' % name

                start = assigned_task.start
                duration = assigned_task.duration
                sol_tmp = '[%i,%i]' % (start,start + duration)
                sol_line += '%-10s' % sol_tmp

            sol_line += '\n'
            sol_line_tasks += '\n'
            output += sol_line_tasks
            output += sol_line

        print('Optimal Schedule Length: %i' % solver.ObjectiveValue())
        print(output)
    
MinimalJobshopSat()

解决方法

也许您应该尝试将 excel 文件导出为 .csv(逗号分隔值)文件。比你可以用 python 读取文件:

f = open("excel.csv","r")
data = f.read()

之后,您可以按行和分号拆分值并创建多维数组。