为什么函数指针的性能要比虚方法好

问题描述

我使用这段代码进行了分析

@H_404_3@#include "Timer.h"
#include <iostream>

enum class BackendAPI {
    B_API_NONE,B_API_VULKAN,B_API_DIRECTX_12,B_API_WEB_GPU,};

namespace Functional
{
    typedef void* VertexBufferHandle;

    namespace Vulkan
    {
        struct VulkanVertexBuffer {};

        VertexBufferHandle CreateVertexBuffer(size_t size)
        {
            return nullptr;
        }

        __forceinline void Hello() {}
        __forceinline void Bello() {}
        __forceinline void Mello() {}
    }

    class RenderBackend {
    public:
        RenderBackend() {}
        ~RenderBackend() {}

        void SetupBackendMethods(BackendAPI api)
        {
            switch (api)
            {
            case BackendAPI::B_API_VULKAN:
            {
                CreateVertexBuffer = Vulkan::CreateVertexBuffer;
                Hello = Vulkan::Hello;
                Bello = Vulkan::Bello;
                Mello = Vulkan::Mello;
            }
            break;
            case BackendAPI::B_API_DIRECTX_12:
                break;
            case BackendAPI::B_API_WEB_GPU:
                break;
            default:
                break;
            }
        }

        VertexBufferHandle(*CreateVertexBuffer)(size_t size) = nullptr;
        void (*Hello)() = nullptr;
        void (*Bello)() = nullptr;
        void (*Mello)() = nullptr;
    };
}

namespace ObjectOriented
{
    struct VertexBuffer {};

    class RenderBackend {
    public:
        RenderBackend() {}
        virtual ~RenderBackend() {}

        virtual VertexBuffer* CreateVertexBuffer(size_t size) = 0;
        virtual void Hello() = 0;
        virtual void Bello() = 0;
        virtual void Mello() = 0;
    };

    class VulkanBackend final : public RenderBackend {
        struct VulkanVertexBuffer : public VertexBuffer {};

    public:
        VulkanBackend() {}
        ~VulkanBackend() {}

        __forceinline virtual VertexBuffer* CreateVertexBuffer(size_t size) override
        {
            return nullptr;
        }

        __forceinline virtual void Hello() override {}
        __forceinline virtual void Bello() override {}
        __forceinline virtual void Mello() override {}
    };

    RenderBackend* CreateBackend(BackendAPI api)
    {
        switch (api)
        {
        case BackendAPI::B_API_VULKAN:
            return new VulkanBackend;
            break;
        case BackendAPI::B_API_DIRECTX_12:
            break;
        case BackendAPI::B_API_WEB_GPU:
            break;
        default:
            break;
        }

        return nullptr;
    }
}

int main()
{
    constexpr int maxItr = 1000000;

    for (int i = 0; i < 100; i++)
    {
        int counter = maxItr;
        Timer t;

        auto pBackend = ObjectOriented::CreateBackend(BackendAPI::B_API_VULKAN);
        while (counter--)
        {
            pBackend->Hello();
            pBackend->Bello();
            pBackend->Mello();

            auto pRef = pBackend->CreateVertexBuffer(100);
        }

        delete pBackend;
    }

    std::cout << "\n";

    for (int i = 0; i < 100; i++)
    {
        int counter = maxItr;
        Timer t;

        {
            Functional::RenderBackend backend;
            backend.SetupBackendMethods(BackendAPI::B_API_VULKAN);
            while (counter--)
            {
                backend.Hello();
                backend.Bello();
                backend.Mello();

                auto pRef = backend.CreateVertexBuffer(100);
            }
        }
    }
}

其中的“ #include“ Timer.h”是

@H_404_3@#pragma once
#include <chrono>

/**
 * Timer class.
 * This calculates the total time taken from creation till the termination of the object.
 */
class Timer {
public:
    /**
     * Default contructor.
     */
    Timer()
    {
        // Set the time point at the creation of the object.
        startPoint = std::chrono::high_resolution_clock::Now();
    }

    /**
     * Default destructor.
     */
    ~Timer()
    {
        // Get the time point of the time of the object's termination.
        auto endPoint = std::chrono::high_resolution_clock::Now();

        // Convert time points.
        long long start = std::chrono::time_point_cast<std::chrono::microseconds>(startPoint).time_since_epoch().count();
        long long end = std::chrono::time_point_cast<std::chrono::microseconds>(endPoint).time_since_epoch().count();

        // Print the time to the console.
        printf("Time taken: %15I64d\n",static_cast<__int64>(end - start));
    }

private:
    std::chrono::time_point<std::chrono::high_resolution_clock> startPoint; // The start time point.
};

在图形中的输出（在Visual Studio 2019中使用@H_404_3@Release配置编译）之后，结果如下，

注意：上面的代码用于分析在构建大型库时功能性和面向对象方法的性能差异。通过运行应用程序5次并重新编译源代码来完成性能分析。每次运行有100次迭代。两种方法都进行了测试（首先是面向对象，第二是功能，反之亦然），但是性能结果大致相同。

我知道继承有点慢，因为继承必须在运行时从V表解析函数指针。但是我不了解的部分是，如果我是对的，函数指针也会在运行时解析。这意味着程序需要在执行之前获取功能代码。

我的问题是

为什么函数指针的性能要比虚拟方法好一些？
为什么虚方法在某些时候性能会下降，但函数指针却有些稳定？

谢谢！

解决方法

每次调用方法时（基本）都需要访问虚拟方法查找表。它将为每个调用添加另一个间接。

初始化后端，然后保存函数指针时，实际上就删除了这个额外的间接寻址，并在开始时对其进行了预先计算。

因此，直接函数指针带来的少量性能提升并不奇怪。

c++function-pointers performance profiling virtual-functions