在外观新颖的Python环境中以编程方式从Python内部执行Python文件

问题描述

假设我有一个文件script.py位于path = "foo/bar/script.py"。我正在寻找Python中通过函数script.py在我的主要Python程序中以编程方式执行execute_script()方法。但是,我有一些要求似乎阻止我采用涉及importlibexec()的幼稚方法

  • script.py应该在“新鲜”的Python环境中执行,就像它通过$ python script.py运行一样。也就是说,应该相应地设置所有相关的全局变量,例如__name____file__sys.modulessys.path和工作目录,并且应从我的主程序中泄漏尽可能少的信息进入文件的执行。 (不过,如果script.py可以通过inspect模块发现不是直接通过$ python script.py执行的,那就没关系。)

  • 我需要访问执行结果,即execute_script()应该返回script.py给定的模块及其所有变量,函数和类。 (这可防止在子进程中启动新的Python解释器。)

  • execute_script()必须在内部使用open()来读入script.py。这样一来,我就可以在单元测试期间使用pyfakefs包来模拟文件系统。 (这避免了涉及importlib的简单解决方案。)

  • execute_script()不得(永久)修改我的主程序中的任何全局状态,例如sys.pathsys.modules

  • 如果可能,script.py应该不会影响我的主程序的全局状态。 (至少它应该不会影响我的主程序中的sys.pathsys.modules。)

  • 我需要能够修改sys.path看到的script.pyexecute_function()因此应接受系统路径的可选列表作为参数。

  • 堆栈跟踪和对script.py执行期间发生的错误的处理应照常进行。 (这使得涉及exec()解决方案很困难。)

  • 解决方案应尽可能地面向未来,并且不依赖于Python解释器的实现细节。

对于任何想法,我将不胜感激!

解决方法

我刚发现exec()也接受代码对象(例如可以从compile()获得)的事实,并提出了一种似乎可以满足几乎所有要求的方法。之所以“接近”,是因为除了sys.pathsys.modules之外,脚本仍然可以影响主程序的全局状态。此外,还可以查看在调用execute_script()之前导入的所有模块。暂时我对此很满意。

这是包含测试的完整代码:

import os
import sys
from typing import List


module = os.__class__


def create_module(name: str,file: str) -> module:
    mod = module(name)
    # Instances of `module` automatically come with properties __doc__,# __loader__,__name__,__package__ and __spec___. Let's add some
    # more properties that main modules usually come with:

    mod.__annotations__ = {}
    # __builtins__ doesn't show up in dir() but still exists
    mod.__builtins__ = __builtins__
    mod.__file__ = file

    return mod


def exec_script(path: str,working_dir: str,syspath: List[str] = None) -> module:
    """
    Execute a Python script as if it were executed using `$ python
    <path>` from inside the given working directory. `path` can either
    be an absolute path or a path relative to `working_dir`.

    If `syspath` is provided,a copy of it will be used as `sys.path`
    during execution. Otherwise,`sys.path` will be set to
    `sys.path[1:]` which – assuming that `sys.path` has not been
    modified so far – removes the working directory from the time when
    the current Python program was started. Either way,the directory
    containing the script at `path` will always be added at position 0
    in `sys.path` afterwards,so as to simulate execution via `$ python
    <path>`.
    """

    if os.path.isabs(path):
        abs_path = path
    else:
        abs_path = os.path.join(os.path.abspath(working_dir),path)

    with open(abs_path,"r") as f:
        source = f.read()

    if sys.version_info < (3,9):
        # Prior to Python 3.9,the __file__ variable inside the main
        # module always contained the path exactly as it was given to `$
        # python`,no matter whether it is relative or absolute and/or a
        # symlink.
        the__file__ = path
    else:
        # Starting from Python 3.9,__file__ inside the main module is
        # always an absolute path.
        the__file__ = abs_path

    # The filename passed to compile() will be used in stack traces and
    # error messages. It normally it agrees with __file__.
    code = compile(source,filename=the__file__,mode="exec")

    sysmodules_backup = sys.modules
    sys.modules = sys.modules.copy()
    the_module = create_module(name="__main__",file=the__file__)
    sys.modules["__main__"] = the_module

    # According to
    # https://docs.python.org/3/tutorial/modules.html#the-module-search-path
    # if the script is a symlink,the symlink is followed before the
    # directory containing the script is added to sys.path.
    if os.path.islink(abs_path):
        sys_path_dir = os.path.dirname(os.readlink(abs_path))
    else:
        sys_path_dir = os.path.dirname(abs_path)

    if syspath is None:
        syspath = sys.path[1:]
    syspath_backup = sys.path
    sys.path = [
        sys_path_dir
    ] + syspath  # This will automatically create a copy of syspath

    cwd_backup = os.getcwd()
    os.chdir(working_dir)

    # For code inside a module,global and local variables are given by
    # the *same* dictionary
    globals_ = the_module.__dict__
    locals_ = the_module.__dict__
    exec(code,globals_,locals_)

    os.chdir(cwd_backup)
    sys.modules = sysmodules_backup
    sys.path = syspath_backup

    return the_module


#################
##### Tests #####
#################

# Make sure to install pyfakefs via pip!

import unittest

import pyfakefs


class Test_exec_script(pyfakefs.fake_filesystem_unittest.TestCase):
    def setUp(self):
        self.setUpPyfakefs()
        self.fs.create_file(
            "/folder/script.py",contents="\n".join(
                [
                    "import os","import sys","","cwd = os.getcwd()","sysmodules = sys.modules","syspath = sys.path","sys.modules['test_module'] = 'bar'","sys.path.append('/some/path')",]
            ),)
        self.fs.create_symlink("/folder2/symlink.py","/folder/script.py")

    #
    # __name__
    #
    def test__name__is_set_correctly(self):
        module = exec_script("script.py","/folder")

        assert module.__name__ == "__main__"

    #
    # __file__
    #
    def test_relative_path_works_and__file__shows_it(self):
        module = exec_script("script.py","/folder")

        assert module.__file__ == "script.py"

    def test_absolute_path_works_and__file__shows_it(self):
        module = exec_script("/folder/script.py","/folder")

        assert module.__file__ == "/folder/script.py"

    def test__file__doesnt_follow_symlink(self):
        module = exec_script("symlink.py","/folder2")

        assert module.__file__ == "symlink.py"

    #
    # working dir
    #
    def test_working_directory_is_set_and_reset_correctly(self):
        os.chdir("/")

        module = exec_script("/folder/script.py","/folder")

        assert module.cwd == "/folder"
        assert os.getcwd() == "/"

    #
    # sys.modules
    #
    def test__main__module_is_set_correctly(self):
        module = exec_script("/folder/script.py","/folder")

        assert module.sysmodules["__main__"] == module

    def test_script_cannot_modify_our_sys_modules(self):
        sysmodules_backup = sys.modules.copy()

        exec_script("/folder/script.py","/folder")

        assert sys.modules == sysmodules_backup

    #
    # sys.path
    #
    def test_script_cannot_modify_our_sys_path(self):
        syspath_backup = sys.path.copy()

        exec_script("/folder/script.py","/folder")

        assert sys.path == syspath_backup

    def test_sys_path_is_set_up_correctly(self):
        syspath_backup = sys.path[:]
        module = exec_script("/folder/script.py","/folder")

        assert module.syspath[0] == "/folder"
        assert module.syspath[1:] == syspath_backup[1:] + ["/some/path"]

    def test_symlink_is_followed_before_adding_base_dir_to_sys_path(self):
        module = exec_script("symlink.py","/folder2")

        assert module.syspath[0] == "/folder"


if __name__ == "__main__":
    unittest.main()