如何使用 C++ 计算列表中的词频?

问题描述

所以我在一个文件中有一些词。我将它们读到一个列表中,然后我试图找到每个单词的频率。我的问题是我必须遵循一个不太灵活的列表的特定实现。 这是 List 类:

const int maxListSize = 50;

template<class T>
class List {
private:
    int numberOfElements;
    int currentPosition;
    T data[maxListSize];
public:
    List() {
        numberOfElements = 0;
        currentPosition = -1;
    }
    void insert(T element) {
        if (numberOfElements >= maxListSize) {
            cout << "List is Full" << endl;
            return;
        }
        data[numberOfElements] = element;
        numberOfElements++;
    }

    bool first(T &element) {
        if (numberOfElements == 0) {
            cout << "List is Empty" << endl;
            return false;
        }
        else {
            currentPosition = 0;
            element = data[currentPosition];
            return true;
        }
    }

    bool next(T &element) {
        //Check if the user called the first function
        if (currentPosition < 0) {
            cout << "Please call the first function before calling the next" << endl;
            return false;
        }
        if (currentPosition >= numberOfElements - 1) {
            //cout << "No next item" << endl;
            return false;
        }
        currentPosition++;
        element = data[currentPosition];
        return true;
    }
};

假设我的列表称为名称。如何获取每个单词的出现频率?

解决方法

您可以持有重复项吗?如果是这样,您可以循环遍历列表。

int count(T &element) {
    int numberOfDuplicates = 0;
    for (int i = 0; i < numberOfElements; i++) {
        if (data[i] == element) {
            numberOfDuplicates++;
        }
    }
    return numberOfDuplicates;
}
,

鉴于不幸的 List 界面,我会这样做。

最初我想我会使用 List<pair<string,int>>,但 firstnext 函数提供了元素的副本,因此无法就地修改,所以是指针!

这会泄漏内存。如果不泄漏对您很重要,那么您可以使用智能指针或尝试释放内存。我觉得简单可能更好。

#include <iostream>
#include <string>

const int maxListSize = 50;

template<class T>
class List
{
private:
    int numberOfElements;
    int currentPosition;
    T data[maxListSize];
public:
    List()
    {
        numberOfElements = 0;
        currentPosition = -1;
    }
    void insert(T element)
    {
        if (numberOfElements >= maxListSize)
        {
            return;
        }
        data[numberOfElements] = element;
        numberOfElements++;
    }

    bool first(T &element)
    {
        if (numberOfElements == 0)
        {
            return false;
        }
        else
        {
            currentPosition = 0;
            element = data[currentPosition];
            return true;
        }
    }

    bool next(T &element)
    {
        if (currentPosition < 0)
        {
            return false;
        }
        if (currentPosition >= numberOfElements - 1)
        {
            return false;
        }
        currentPosition++;
        element = data[currentPosition];
        return true;
    }
};

using WordPair = std::pair<std::string,int>;
using WordList = List<WordPair*>;

void incrementCount(WordList &wl,const std::string& s)
{
    WordPair* item = nullptr;
    if (wl.first(item))
    {
        if (item->first == s)
        {
            ++(item->second);
            return;
        }
        while (wl.next(item))
        {
            if (item->first == s)
            {
                ++(item->second);
                return;
            }
        }
    }
    wl.insert(new WordPair { s,1 });
}

void printList(WordList &wl)
{
    WordPair *item = nullptr;
    if (wl.first(item))
    {
        std::cout << item->first << " : " << item->second << "\n";
        while (wl.next(item))
        {
            std::cout << item->first << " : " << item->second << "\n";
        }
    }
}

int main()
{
    std::string words[10] = { "one","two","three","four","one","three" };
    WordList wl;
    for (int i = 0; i < 10; ++i)
    {
        incrementCount(wl,words[i]);
    }
    printList(wl);
}

示例:https://ideone.com/W4Slyq