问题描述
这里我需要使用一个链表数组来存储从文本中获取的单词。 我完成了整个结构的实现:我定义了数组和列表的节点enter image description here
为了从文本中提取单词,我使用了 strtok 函数,这是我的代码 enter image description here
我现在的问题是如何根据单词的首字母创建包含单词的列表。 注意每个列表都由 T[i] 指向,我从 0 到 25
解决方法
首先,欢迎来到 Yanis 社区!
其次,请使用 StackOverflow 的代码片段功能提供所有代码,并在文本和标签中指定您使用的编程语言。
现在,我从你的问题推断你需要一个双链表的 C (?) 实现,它表示字符串中的标记,并将这些列表存储在一个数组中,这将是标记化的字符串。>
现在,这是我基于对问题的理解的实现,如果我错了,请澄清。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TOKENS 25
// Node pointer type
typedef struct node *p_node;
// Node structure of double linked list
struct node
{
char val; // Character stored in node
int line; // Line of the character
int pos; // Position of the character in the line
p_node next; // next node (NULL if last node)
p_node prev; // previous node (NULL if first node)
};
// Utility function to convert a string into a Node
p_node strToNode(char* str,int* pos,int* line)
{
// First node of the list
p_node fnode = (p_node) malloc(sizeof(struct node));
fnode->val = str[0];
fnode->line = *line;
fnode->pos = *pos;
fnode->prev = NULL;
// Stores the previous node in the list so that it can be set for the next one
p_node pnode = fnode;
++(*pos);
int i;
for (i = 1; i < strlen(str); ++i)
{
// Ignore new lines,update the line and position pointers
if (str[i] == '\n')
{
++(*line);
*pos = 0;
}
// Generate new node
else
{
p_node nnode = (p_node) malloc(sizeof(struct node));
nnode->val = str[i];
nnode->line = *line;
nnode->pos = *pos;
nnode->prev = pnode;
nnode->next = NULL;
++(*pos);
// Update previous node and move forward
pnode->next = nnode;
pnode = nnode;
}
}
return fnode;
}
int main(int argc,char** argv)
{
p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list
char delimiters[] = " ;-'()1234567890:!%?.,+-";
char *token;
char str[] = " Hello wor\nld ";
strlwr(str);
int line = 0;
int pos = 0;
int n_tokens = 0;
token = strtok(str,delimiters);
while (token != NULL && n_tokens < MAX_TOKENS)
{
p_node node = strToNode(token,&pos,&line);
nodes[n_tokens++] = node;
token = strtok(NULL,delimiters);
}
// Testing the implementation
int i;
for (i = 0; i < n_tokens; ++i)
{
printf("# Token %d\n",(i+1));
p_node node = nodes[i];
while (node != NULL)
{
printf("\t[%d:%d] %c\n",node->line,node->pos,node->val);
node = node->next;
}
printf("\n");
}
return 0;
}
更新 #1
既然OP评论了,代码的要求我就比较清楚了。我的第一个预感不是数组包含代表单个单词的列表,而是该列表表示以相同字母开头的单词。
因此,这是代码的更新版本。 OP 已经澄清他的输入理论上是一个文件。我也调整了代码以反映这一点。
我也添加了一些内存管理,但我没有检查内存泄漏。
对于文件读取,我使用了Anti Haapla实现的getline
函数,因为Windows系统上不存在POSIX标准版本。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
// From https://stackoverflow.com/questions/735126/are-there-alternate-implementations-of-gnu-getline-interface/
// if typedef doesn't exist (msvc,blah)
typedef intptr_t ssize_t;
ssize_t getline(char **lineptr,size_t *n,FILE *stream) {
size_t pos;
int c;
if (lineptr == NULL || stream == NULL || n == NULL) {
errno = EINVAL;
return -1;
}
c = getc(stream);
if (c == EOF) {
return -1;
}
if (*lineptr == NULL) {
*lineptr = malloc(128);
if (*lineptr == NULL) {
return -1;
}
*n = 128;
}
pos = 0;
while(c != EOF) {
if (pos + 1 >= *n) {
size_t new_size = *n + (*n >> 2);
if (new_size < 128) {
new_size = 128;
}
char *new_ptr = realloc(*lineptr,new_size);
if (new_ptr == NULL) {
return -1;
}
*n = new_size;
*lineptr = new_ptr;
}
((unsigned char *)(*lineptr))[pos ++] = c;
if (c == '\n') {
break;
}
c = getc(stream);
}
(*lineptr)[pos] = '\0';
return pos;
}
#define MAX_TOKENS 26
#define FILE_NAME "test_file.txt"
// Node pointer type
typedef struct node *p_node;
// Node structure of double linked list
struct node
{
char* val; // String stored in node
int line; // Line of the character
int pos; // Position of the character in the line
p_node next; // next node (NULL if last node)
p_node prev; // previous node (NULL if first node)
};
p_node insert(p_node list,char* str,int* line,int* pos)
{
// First we need to reach the end of the list
p_node lnode = NULL;
if (list != NULL)
{
for (lnode = list; lnode->next != NULL; lnode = lnode->next);
}
// Creating the new node
p_node nnode = (p_node) malloc(sizeof(struct node));
// Creating a copy of the string to save it
nnode->val = strdup(str);
nnode->line = *line;
nnode->pos = *pos;
nnode->prev = lnode;
nnode->next = NULL;
if (lnode != NULL)
{
lnode->next = nnode;
}
return nnode;
}
int main(int argc,char** argv)
{
p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list
// Initializing all nodes to NULL
int i;
for (i = 0; i < MAX_TOKENS; ++i)
{
nodes[i] = NULL;
}
char delimiters[] = " ;-'()1234567890:!%?.,+-\t\r\n";
char *token;
FILE* fp;
size_t len = 0;
size_t read;
char* str = NULL;
fp = fopen(FILE_NAME,"r");
if (fp == NULL)
{
exit(EXIT_FAILURE);
}
int line = 0;
int pos = 0;
while ((read = getline(&str,&len,fp)) != -1)
{
strlwr(str);
token = strtok(str,delimiters);
while (token != NULL)
{
int index = token[0] - 'a';
if (nodes[index] == NULL)
{
nodes[index] = insert(NULL,token,&line,&pos);
}
else
{
insert(nodes[index],&pos);
}
pos += strlen(token);
token = strtok(NULL,delimiters);
}
++line;
}
fclose(fp);
// Testing the implementation
for (i = 0; i < MAX_TOKENS; ++i)
{
if (nodes[i] != NULL)
{
printf("# Tokens starting with %c\n",(i+'a'));
p_node node = nodes[i];
while (node != NULL)
{
printf("\t[%d:%d] %s\n",node->val);
node = node->next;
}
printf("\n");
}
}
// Freeing the used memory
for (i = 0; i < MAX_TOKENS; ++i)
{
p_node node = nodes[i];
while (node != NULL)
{
p_node tmp = node;
node = node->next;
free(tmp->val);
free(tmp);
}
}
if (token)
{
free(token);
}
if (str)
{
free(str);
}
return 0;
}
我使用的测试文件(它也有一个 \t
,在 test
之前):
Hello world
This is A
test
输出:
# Tokens starting with a
[2:16] a
# Tokens starting with h
[0:0] hello
# Tokens starting with i
[2:14] is
# Tokens starting with t
[2:10] this
[3:17] test
# Tokens starting with w
[0:5] world