如何计算AVL树中字符串的出现次数?

问题描述

我有一个AVL树程序,该程序使用有序遍历对存储为字符串的文本文件进行排序。这可以按预期工作,如下所示

std::string fileName;
std::fstream readFile;
std::string storeFile;

struct Node
{
    std::string key;
    int height;
    Node *left;
    Node *right;
};

int max(int a,int b);

int height(Node *N)
{
    if (N == NULL)
        return 0;
    return N->height;
}

int max(int lower,int upper)
{
    return (lower > upper) ? lower : upper;
}

Node *newNode(std::string key)
{
    Node *node = new Node();
    node->key = key;
    node->left = NULL;
    node->right = NULL;
    node->height = 1;
    return node;
}

Node *rightRotation(Node *y)
{
    Node *x = y->left;
    Node *z = x->right;

    x->right = y;
    y->left = z;

    y->height = max(height(y->left),height(y->right)) + 1;
    x->height = max(height(x->left),height(x->right)) + 1;
    return x;
}

Node *leftRotation(Node *x)
{
    Node *y = x->right;
    Node *z = y->left;

    y->left = x;
    x->right = z;

    x->height = max(height(x->left),height(x->right)) + 1;
    y->height = max(height(y->left),height(y->right)) + 1;
    return y;
}

// Get Balance factor of node N
int getBalance(Node *N)
{
    if (N == NULL)
        return 0;
    return height(N->left) - height(N->right);
}

Node *insertnewNode(Node *node,std::string key)
{
    if (node == NULL)
        return (newNode(key));

    if (key < node->key)
        node->left = insertnewNode(node->left,key);
    else if (key > node->key)
        node->right = insertnewNode(node->right,key);
    else
        return node;

    node->height = 1 + max(height(node->left),height(node->right));
    int balance = getBalance(node);
    if (balance > 1 && key < node->left->key)
        return rightRotation(node);
    if (balance < -1 && key > node->right->key)
        return leftRotation(node);
    if (balance > 1 && key > node->left->key)
    {
        node->left = leftRotation(node->left);
        return rightRotation(node);
    }
    if (balance < -1 && key < node->right->key)
    {
        node->right = rightRotation(node->right);
        return leftRotation(node);
    }
    return node;
}

void Inorder(Node *root)
{
    if (root == NULL)
        return;
    Inorder(root->left);                 //visit left sub-tree
    std::cout << root->key << std::endl; //visit root(key node)
    Inorder(root->right);                //visit right sub-tree
}

bool wordCount(const Node &node1,const Node &node2)
{

 

}

int main(int argc,char *argv[])
{
    Node *root = NULL; //pointer to bstNode. Store address of root node.
    //set to NULL(empty tree)

    std::cout << "Please enter the name of the file: " << std::endl; //prompts user for the filename
    std::cin >> argv[0];                                             //stores the filename is the first element of argv[]
    fileName = argv[0];

    std::cout << "Attempting to read file " << fileName << std::endl;

    readFile.open(fileName); //attempts to read the file

    if (!readFile)
    {
        std::cerr << "ERROR: Failed to open file " << std::endl; //if the file cannot be opened an error is displayed
        exit(0);                                                 //if it cannot open the console terminates
    }
    else
    {
        std::cerr << "File successfully opened" << std::endl;
    }

    while (readFile >> storeFile)
    {
        std::transform(storeFile.begin(),storeFile.end(),storeFile.begin(),::tolower);
        for (int i = 0,len = storeFile.size(); i < len; i++)
        {
            // check whether parsing character is punctuation or not
            if (ispunct(storeFile[i]))
            {

                storeFile.erase(std::remove_if(storeFile.begin(),::isspace),storeFile.end());
                storeFile.erase(std::remove_if(storeFile.begin(),::ispunct),storeFile.end());
            }
        }
        root = insertnewNode(root,storeFile);
    }

    Inorder(root);
    readFile.close();
    return 0;
}

当前正在努力实现的实现是每个单词的计数。因此对于下面的例子,单词在左边,计数在右边

adams: 2
apple: 5
as: 20

我尝试了一个名为bool countWords的函数,我认为它将需要参数(在这种情况下,需要两个节点来比较和匹配一个案例)。但是我不确定自己如何实现

谢谢

解决方法

仅因为尚未得到回答,这就是我提出的解决方案背后的想法。您已经将文件中的输入转换为小写,因此我们可以假定所有字符串都相同:

typedef std::map<std::string,uint32_t> omap;
omap occurrences;

void printNumOccur( const omap& m )
{
    for ( omap::it = m.begin(); it != m.end(); ++it )
    {
        std::cout << it->first << ": " << it->second << std::endl;
    }
}

Node *insertnewNode(Node *node,std::string key)
{
    if (node == NULL)
        return (newNode(key));

    omap::iterator it;

    if ( (it = occurrences.find(key)) != m.end() )
        it->second++;
    else
        occurrences.insert({key,1});

    if (key < node->key)
        node->left = insertnewNode(node->left,key);
    else if (key > node->key)
        node->right = insertnewNode(node->right,key);
    else
        return node;

    node->height = 1 + max(height(node->left),height(node->right));
    int balance = getBalance(node);
    if (balance > 1 && key < node->left->key)
        return rightRotation(node);
    if (balance < -1 && key > node->right->key)
        return leftRotation(node);
    if (balance > 1 && key > node->left->key)
    {
        node->left = leftRotation(node->left);
        return rightRotation(node);
    }
    if (balance < -1 && key < node->right->key)
    {
        node->right = rightRotation(node->right);
        return leftRotation(node);
    }
    return node;
}
,

似乎最简单的方法是维护节点中每个字符串的计数,并在每次找到匹配的节点时对其进行递增。

struct Node
{
std::string key;
int height;
Node *left;
Node *right;
int count;
};

Node* newNode(std::string const& key)
{
Node *node = new Node();
node->key = key;
node->left = NULL;
node->right = NULL;
node->height = 1;
node->count = 1;
return node;
}

if (key < node->key)
    node->left = insertnewNode(node->left,key);
else if (key > node->key)
    node->right = insertnewNode(node->right,key);
else
{
    node->count++;
    return node;
}

一旦树被组装,您将需要迭代所有节点。您可以通过两种方式执行此操作:

  1. 每创建一个新节点,就在向量中添加一个指向它的指针
  2. 编写一个“ TreeBrowser”类以扫描树并转储输出。如果希望输出顺序良好,则需要在树上上下移动,记住每个分支从哪个级别开始。