问题描述
我想比较下面向量的字符串部分,并尽快删除重复项。
using namespace std;
vector<string> v1;
vector<pair<string,int>> v2;
我尝试了find_if
lambda,它似乎比嵌套的for
循环快一点。
示例数据:
+--------+------------+
| index | v1 |
+--------+------------+
| 0 | apple |
+--------+------------+
| 1 | watermelon |
+--------+------------+
| 2 | cherry |
+--------+------------+
| 3 | tomato |
+--------+------------+
| 4 | cucumber |
+--------+------------+
| . | . |
+--------+------------+
| . | . |
+--------+------------+
| 419776 | lettuce |
+--------+------------+
+--------+---------------------+
| index | v2 |
+--------+------------+--------+
| | first | second |
+--------+------------+--------+
| 0 | pear | 345 |
+--------+------------+--------+
| 1 | apple | 85 |
+--------+------------+--------+
| 2 | strawBerry | 1912 |
+--------+------------+--------+
| 3 | grape | 54 |
+--------+------------+--------+
| 4 | peach | 90 |
+--------+------------+--------+
| . | . | . |
+--------+------------+--------+
| . | . | . |
+--------+------------+--------+
| 21803 | pineapple | 100 |
+--------+------------+--------+
所需结果:
如您所见,apple
在两个向量中都完全匹配。所以我想v1删除重复项。
经过测试的方法:
for (auto itr1 = v1.begin(); itr1 != v1.end(); ++itr1)
for (auto itr2 = v2.begin(); itr2 != v2.end(); ++itr2)
if (*itr1 == itr2->first)
v1.erase(itr1);
嵌套for
循环花费了大约35秒。
auto itr = v1.begin();
for_each(v1.begin(),v1.end(),[&itr,&v1,v2](const auto& s)
{
++itr;
if (find_if(v2.begin(),v2.end(),[s](const auto& sV) { return s == sV.first; }) != v2.end())
v1.erase(itr);
});
这个花了大约31秒。
解决方法
#include <iostream>
#include <string>
#include <unordered_set>
#include <vector>
#include <algorithm>
using namespace std;
vector<string> v1{ "apple","watermelon","cherry","cucumber" };
vector<pair<string,int>> v2{ {"pear",345},{"apple",85},{"strawberry",1912},{"grape",54},{"peach",90} };
struct HASH {
public:
size_t operator()(const pair<string,long long>& p) const {
return hash<string>()(p.first);
}
};
struct EQUAL {
public:
bool operator()(const pair<string,long long>& p1,const pair<string,long long>& p2) const {
return p1.first == p2.first ? true : false;
}
};
int main()
{
vector<pair<string,long long>> V1Unique;
unordered_set<pair<string,long long>,HASH,EQUAL> s;
for (auto itr = v2.begin(); itr != v2.end(); ++itr)
s.insert(make_pair(itr->first,itr-v2.begin()));
for (auto itr = v1.begin(); itr != v1.end(); ++itr)
s.insert(make_pair(*itr,v2.size()+itr-v1.begin()));
V1Unique.assign(s.begin(),s.end());
sort(V1Unique.begin(),V1Unique.end(),[](const auto& a,const auto& b) { return a.second > b.second; });
for (int i = 0; i < v2.size(); ++i)
if (!V1Unique.empty())
V1Unique.pop_back();
for (auto itr = V1Unique.begin(); itr != V1Unique.end(); ++itr)
cout << itr->first << endl;
return 0;
}
我认为这是最快的方法。