问题描述
我有一个AVL树程序,该程序使用有序遍历对存储为字符串的文本文件进行排序。这可以按预期工作,如下所示
std::string fileName;
std::fstream readFile;
std::string storeFile;
struct Node
{
std::string key;
int height;
Node *left;
Node *right;
};
int max(int a,int b);
int height(Node *N)
{
if (N == NULL)
return 0;
return N->height;
}
int max(int lower,int upper)
{
return (lower > upper) ? lower : upper;
}
Node *newNode(std::string key)
{
Node *node = new Node();
node->key = key;
node->left = NULL;
node->right = NULL;
node->height = 1;
return node;
}
Node *rightRotation(Node *y)
{
Node *x = y->left;
Node *z = x->right;
x->right = y;
y->left = z;
y->height = max(height(y->left),height(y->right)) + 1;
x->height = max(height(x->left),height(x->right)) + 1;
return x;
}
Node *leftRotation(Node *x)
{
Node *y = x->right;
Node *z = y->left;
y->left = x;
x->right = z;
x->height = max(height(x->left),height(x->right)) + 1;
y->height = max(height(y->left),height(y->right)) + 1;
return y;
}
// Get Balance factor of node N
int getBalance(Node *N)
{
if (N == NULL)
return 0;
return height(N->left) - height(N->right);
}
Node *insertnewNode(Node *node,std::string key)
{
if (node == NULL)
return (newNode(key));
if (key < node->key)
node->left = insertnewNode(node->left,key);
else if (key > node->key)
node->right = insertnewNode(node->right,key);
else
return node;
node->height = 1 + max(height(node->left),height(node->right));
int balance = getBalance(node);
if (balance > 1 && key < node->left->key)
return rightRotation(node);
if (balance < -1 && key > node->right->key)
return leftRotation(node);
if (balance > 1 && key > node->left->key)
{
node->left = leftRotation(node->left);
return rightRotation(node);
}
if (balance < -1 && key < node->right->key)
{
node->right = rightRotation(node->right);
return leftRotation(node);
}
return node;
}
void Inorder(Node *root)
{
if (root == NULL)
return;
Inorder(root->left); //visit left sub-tree
std::cout << root->key << std::endl; //visit root(key node)
Inorder(root->right); //visit right sub-tree
}
bool wordCount(const Node &node1,const Node &node2)
{
}
int main(int argc,char *argv[])
{
Node *root = NULL; //pointer to bstNode. Store address of root node.
//set to NULL(empty tree)
std::cout << "Please enter the name of the file: " << std::endl; //prompts user for the filename
std::cin >> argv[0]; //stores the filename is the first element of argv[]
fileName = argv[0];
std::cout << "Attempting to read file " << fileName << std::endl;
readFile.open(fileName); //attempts to read the file
if (!readFile)
{
std::cerr << "ERROR: Failed to open file " << std::endl; //if the file cannot be opened an error is displayed
exit(0); //if it cannot open the console terminates
}
else
{
std::cerr << "File successfully opened" << std::endl;
}
while (readFile >> storeFile)
{
std::transform(storeFile.begin(),storeFile.end(),storeFile.begin(),::tolower);
for (int i = 0,len = storeFile.size(); i < len; i++)
{
// check whether parsing character is punctuation or not
if (ispunct(storeFile[i]))
{
storeFile.erase(std::remove_if(storeFile.begin(),::isspace),storeFile.end());
storeFile.erase(std::remove_if(storeFile.begin(),::ispunct),storeFile.end());
}
}
root = insertnewNode(root,storeFile);
}
Inorder(root);
readFile.close();
return 0;
}
当前正在努力实现的实现是每个单词的计数。因此对于下面的例子,单词在左边,计数在右边
adams: 2
apple: 5
as: 20
我尝试了一个名为bool countWords的函数,我认为它将需要参数(在这种情况下,需要两个节点来比较和匹配一个案例)。但是我不确定自己如何实现
谢谢
解决方法
仅因为尚未得到回答,这就是我提出的解决方案背后的想法。您已经将文件中的输入转换为小写,因此我们可以假定所有字符串都相同:
typedef std::map<std::string,uint32_t> omap;
omap occurrences;
void printNumOccur( const omap& m )
{
for ( omap::it = m.begin(); it != m.end(); ++it )
{
std::cout << it->first << ": " << it->second << std::endl;
}
}
Node *insertnewNode(Node *node,std::string key)
{
if (node == NULL)
return (newNode(key));
omap::iterator it;
if ( (it = occurrences.find(key)) != m.end() )
it->second++;
else
occurrences.insert({key,1});
if (key < node->key)
node->left = insertnewNode(node->left,key);
else if (key > node->key)
node->right = insertnewNode(node->right,key);
else
return node;
node->height = 1 + max(height(node->left),height(node->right));
int balance = getBalance(node);
if (balance > 1 && key < node->left->key)
return rightRotation(node);
if (balance < -1 && key > node->right->key)
return leftRotation(node);
if (balance > 1 && key > node->left->key)
{
node->left = leftRotation(node->left);
return rightRotation(node);
}
if (balance < -1 && key < node->right->key)
{
node->right = rightRotation(node->right);
return leftRotation(node);
}
return node;
}
,
似乎最简单的方法是维护节点中每个字符串的计数,并在每次找到匹配的节点时对其进行递增。
struct Node
{
std::string key;
int height;
Node *left;
Node *right;
int count;
};
Node* newNode(std::string const& key)
{
Node *node = new Node();
node->key = key;
node->left = NULL;
node->right = NULL;
node->height = 1;
node->count = 1;
return node;
}
if (key < node->key)
node->left = insertnewNode(node->left,key);
else if (key > node->key)
node->right = insertnewNode(node->right,key);
else
{
node->count++;
return node;
}
一旦树被组装,您将需要迭代所有节点。您可以通过两种方式执行此操作:
- 每创建一个新节点,就在向量中添加一个指向它的指针
- 编写一个“ TreeBrowser”类以扫描树并转储输出。如果希望输出顺序良好,则需要在树上上下移动,记住每个分支从哪个级别开始。