使用迭代器按元素数而不是键从 std::map 获取子映射

问题描述

我有一个 std::map<std::string,std::vector<std::string>>,我需要通过将地图划分为子地图并将每个子地图传递给一个线程来在此地图上执行线程任务。

使用 std::vector<T> 我可以很容易地得到一个子向量,这样做:

#include <vector>
#include <string>

int main(void)
{
    size_t off = 0; 
    size_t num_elms = 100; // Made up value 
    std::vector<uint8_t> full; // Assume filled with stuff
    std::vector<uin8t_t> sub(std::begin(full) + off,std::begin(full) + off + num_elms);
    off = off + num_elms;
}

但是,对 std::map<T1,T2> 执行相同操作会导致编译错误

#include <vector>
#include <map>
#include <string>

int main(void)
{
    size_t off = 0; 
    size_t num_elms = 100; 
    
    std::map<std::string,std::vector<std::string>> full; 
    std::map<std::string,std::vector<std::string>> sub(std::begin(full) + off,std::begin(full) + off + num_elms); 
    off = off + num_elms;
}

其他std::map“类型”也一样。从我收集的信息来看,这取决于迭代器。

可能的是提取密钥并执行类似于此解决方案的操作:

#include <map>
#include <vector>
#include <string>

#include <iostream>

void print_map(const std::map<std::string,std::vector<std::string>>& _map)
{
    for (const auto& [key,value] : _map)
    {
        std::cout << "key: " << key << "\nvalues\n";
        for (const auto& elm : value)
        {
            std::cout << "\t" << elm << "\n"; 
        }
    }
}

void print_keys(const std::vector<std::string>& keys)
{
    std::cout << "keys: \n"; 
    for(const auto& key : keys)
    {
        std::cout << key << "\n"; 
    }
}

int main(void)
{
    std::map<std::string,std::vector<std::string>> full;

    full["aa"] = {"aa","aaaa","aabb"};
    full["bb"] = {"bb","bbbbb","bbaa"};
    full["cc"] = {"cc","cccc","ccbb"};
    full["dd"] = {"dd","dd","ddcc"};

    print_map(full);

    std::vector<std::string> keys;

    for (const auto& [key,value] : full)
    {
        (void) value;
        keys.emplace_back(key); 
    }

    print_keys(keys); 

    size_t off = 0;
    size_t num_elms = 2;
    
    
    std::map<std::string,std::vector<std::string>> sub1 (full.find(keys.at(off)),full.find(keys.at(off + num_elms)));
    off = off + num_elms; 
    std::map<std::string,std::vector<std::string>> sub2 (full.find(keys.at(off)),full.find(keys.at(off + num_elms -1)));

    std::cout << "sub1:\n";
    print_map(sub1);
    std::cout << "sub2:\n";
    print_map(sub2);     
}

然而,这可能会导致效率极低,因为地图可能非常大(超过 10k 个元素)。

那么,有没有更好的方法来复制 std::vectorstd::map 方法

解决方法

一种稍微不同的方法是使用 C++17 中添加的执行策略之一,例如 std::execution::parallel_policy。在下面的示例中,使用了实例 struct Comparator { bool operator()(string a,string b) const { ... return ...; } }; std::sort(&sentence[0],&sentence[i],Comparator());

std::execution::par
,

稍加修改,您就可以相当轻松地将范围传递给 print_map,并通过在迭代器上调用 std::next 来划分您的地图。

// Minimal range-for support
template <typename Iter>
struct Range {
    Range (Iter b,Iter e) : b(b),e(e) {}
    Iter b;
    Iter e;

    Iter begin() const { return b; }
    Iter end() const { return e; }
};

// some shorter aliases
using Map = std::map<std::string,std::vector<std::string>>;
using MapView = Range<Map::const_iterator>;

// not necessarily the whole map
void print_map(MapView map) {
    for (const auto& [key,value] : map)
    {
        std::cout << "key: " << key << "\nvalues\n";
        for (const auto& elm : value)
        {
            std::cout << "\t" << elm << "\n"; 
        }
    }
}

int main(void)
{
    Map full;

    full["aa"] = {"aa","aaaa","aabb"};
    full["bb"] = {"bb","bbbbb","bbaa"};
    full["cc"] = {"cc","cccc","ccbb"};
    full["dd"] = {"dd","dd","ddcc"};

    // can still print the whole map
    print_map({ map.begin(),map.end() });

    size_t num_elms = 2;
    size_t num_full_views = full.size() / num_elms;
    
    std::vector<MapView> views;

    auto it = full.begin();
    for (size_t i = 0; i < num_full_views; ++i) {
        auto next = std::next(it,num_elms);
        views.emplace_back(it,next);
        it = next;
    }

    if (it != full.end()) {
        views.emplace_back(it,full.end());
    }

    for (auto view : views) {
        print_map(view);
    }
}

在 C++20(或其他范围库)中,这可以通过 std::ranges::drop_view / std::ranges::take_view 简化。

using MapView = decltype(std::declval<Map>() | std::ranges::views::drop(0) | std::ranges::views::take(0));


for (size_t i = 0; i < map.size(); i += num_elms) {
    views.push_back(map | std::ranges::views::drop(i) | std::ranges::views::take(num_elms));
}