统计单词个数的简单程序

xiaoxiao2021-02-28  161

背景

没事写了个简单的统计单词个数的程序,特记录下。

代码

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #define MAX_LINE_SIZE 256 #define MAX_WORD_SIZE 32 #define MAX_FILE_SIZE (1024 * 1024 * 1024) struct word_node{ char word[MAX_WORD_SIZE]; int counter; struct word_node *next; } *word_list = NULL; typedef struct word_node word_list_t; int get_file_size(char *file_name) { struct stat s; stat(file_name, &s); return s.st_size; } int is_existed(char *word) { word_list_t *ptr = NULL; /* compare the word with already words in list */ for (ptr=word_list; ptr; ptr=ptr->next){ if (strcmp(word, ptr->word) == 0){ ptr->counter++; return 1; } } return 0; } int create_new_word(char *word) { word_list_t *new_node = NULL; word_list_t *ptr = NULL; word_list_t *pre = NULL; new_node = (word_list_t *)malloc(sizeof(word_list_t)); if (new_node == NULL){ printf("malloc error"); return -1; } strcpy(new_node->word, word); new_node->counter = 1; if (word_list == NULL){ word_list = new_node; } else{ for (ptr=word_list; ptr; ptr=ptr->next){ pre = ptr; } pre->next = new_node; } return 0; } int quick_sort(word_list_t *head, word_list_t *tail) { int counter; word_list_t *mid = NULL; word_list_t *p = NULL; word_list_t *q = NULL; word_list_t *t = NULL; if ((head->next == tail) || (head->next->next == tail)) return 0; mid = head->next; p = head; q = mid; counter = mid->counter; t = mid->next; while (t != tail){ if (t->counter < counter){ p = p->next = t; } else{ q = q->next = t; } t = t->next; } p->next = mid; q->next = tail; quick_sort(head, mid); quick_sort(mid, tail); return 0; } int show_words() { word_list_t *ptr = NULL; printf("the words list:\n"); printf("word counters\n"); printf("----------------------------------------\n"); for (ptr=word_list; ptr; ptr=ptr->next){ printf("%-32s M\n", ptr->word, ptr->counter); } return 0; } int main(char argc, char **argv) { int ret = -1; FILE *fp = NULL; char *file_name = NULL; char line[MAX_LINE_SIZE]; char *delim = ".,! "; char *token = NULL; char *saveptr = NULL; char *str = NULL; /* check args */ if (argc != 2){ printf("Usage: %s filename\n", argv[0]); return -1; } /* check size of file */ file_name = argv[1]; if (get_file_size(file_name) > MAX_FILE_SIZE){ printf("the size of file too large.\n"); return -1; } /* open file */ fp = fopen(file_name, "r"); if (fp == NULL){ printf("open file error.\n"); return -1; } /* read words line by line */ while (fgets(line, MAX_LINE_SIZE, fp) != NULL){ /* delete the '\n' of line */ line[strlen(line)-1] = '\0'; str = line; /* parse words in this line */ while (1){ token = strtok_r(str, delim, &saveptr); if (token == NULL){ break; } str = NULL; /* check the length of word */ if (strlen(token) > MAX_WORD_SIZE){ printf("the word[%s] is too long.\n", token); return -1; } /* if existed, update counters of the word */ if (is_existed(token)){ ; } /* not, create new node to store the word */ else{ create_new_word(token); } } } /* sort by ascending order */ quick_sort(word_list, NULL); /* show words infor */ show_words(); return 0; }
转载请注明原文地址: https://www.6miu.com/read-50529.html

最新回复(0)