作业链接 https://edu.cnblogs.com/campus/fzu/FZUSoftwareEngineering1816W/homework/2160
结对成员
我的队友博客地址:http://www.cnblogs.com/52wu244/分工:负责项目各个模块功能的实现我博客地址:https://www.cnblogs.com/wang371091997/p/9781183.html分工:负责爬取部分,附加题部分,一起Debug,解决问题GitHub地址:https://github.com/xiaozhirensan/PairProject-C
Command类用于解析用户输入的命令行,将自定义参数设置为bool型变量,同时给各参数赋予初值,第一次写的代码纯粹为一个函数,各种变量纷杂,可读性很差。经过改进之后,此程序可移植性高,结构清晰,且封装较好。
词组频数统计函数 void WordList::wordCount(string fileName, WordList &wordList, int m, bool _w) { char word[MAX_WORD_LENGTH] = { 0 }; char wordStr[2000] = { 0 }; string str; ifstream inFile; inFile >> noskipws; inFile.open(fileName); int wordposition = 0; int wordPosition = 0; char c; int delta = 'a' - 'A'; int i = 0, j = 0; //记录字符当前位置 int n = m; while (getline(inFile, str)) { if (str[0] != 'T'&&str[0] != 'A') continue; c = str[0]; while (c != '\0') { c = str[i]; if (c <= 'Z'&&c >= 'A') c += delta; bool separator1 = (c >= 'a'&&c <= 'z'); bool separator2 = (c >= '0'&&c <= '9'); if (separator1) { wordposition++; wordStr[wordPosition] = c; wordPosition++; } if (separator2) { if (wordposition < 4) { memset(wordStr, '\0', sizeof(wordStr)); wordposition = 0; wordPosition = 0; } else { wordStr[wordPosition] = c; wordPosition++; } } if (!separator1 && !separator2 && wordposition < 4) { memset(wordStr, '\0', sizeof(wordStr)); memset(word, '\0', sizeof(word)); n = m; wordPosition = 0; wordposition = 0; j = i; } if (c==':' && wordposition >= 4 && strcmp(wordStr, "title") == 0) { state = 1; wordPosition = 0; wordposition = 0; memset(wordStr, '\0', sizeof(wordStr)); } if (c == ':' && wordposition >= 4 && strcmp(wordStr, "abstract") == 0) { state = 2; wordPosition = 0; wordposition = 0; memset(wordStr, '\0', sizeof(wordStr)); } if (!separator1 && !separator2 && wordposition >= 4 && n >= 1) { if (n > 1) { wordStr[wordPosition] = c; } strcat_s(word, wordStr); memset(wordStr, '\0', sizeof(wordStr)); if (n == m) j = i; if (n == 1) { wordList.addWord(word, _w); memset(word, '\0', sizeof(word)); n = m+1; i = j; } wordPosition = 0; wordposition = 0; n--; } i++; } memset(word, '\0', sizeof(word)); memset(wordStr, '\0', sizeof(wordStr)); i = 0; } inFile.close(); }此函数为单词处理类WordList中的一个函数,功能为抽取单词,并将文本字符串转换为词组存储进入链表,这部分花费了较多时间并进行了多次改进。
统计2018年热度最高的十个名词并且制作成饼图展示数据
饼图链接:http://myecharts.applinzi.com/bingtu.html结果展示 可视化代码 <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>ECharts</title> <!-- 引入 echarts.js --> <script src="js/echarts.js"></script> </head> <body> <!-- 为ECharts准备一个具备大小(宽高)的Dom --> <div id="main" style="width: 1200px;height:500px;"></div> <script type="text/javascript"> // 基于准备好的dom,初始化echarts实例 var myChart = echarts.init(document.getElementById('main')); // 指定图表的配置项和数据 option = { title : { text: 'CVPR论文热词', subtext: '2018年', x:'center' }, tooltip : { trigger: 'item', formatter: "{a} <br/>{b} : {c} ({d}%)" }, legend: { orient : 'vertical', x : 'left', data:['image','network','learning','model','method','deep','data','approach','vedio','papper'] }, toolbox: { show : true, feature : { mark : {show: true}, dataView : {show: true, readOnly: false}, magicType : { show: true, type: ['pie', 'funnel'], option: { funnel: { x: '25%', width: '50%', funnelAlign: 'left', max: 1548 } } }, restore : {show: true}, saveAsImage : {show: true} } }, calculable : true, series : [ { name:'访问来源', type:'pie', radius : '55%', center: ['50%', '60%'], data:[ {value:1224, name:'image'}, {value:973, name:'network'}, {value:971, name:'learning'}, {value:762, name:'model'}, {value:662, name:'method'}, {value:640, name:'deep'}, {value:521, name:'data'}, {value:500, name:'approach'}, {value:475, name:'vedio'}, {value:474, name:'paper'} ] } ] }; // 使用刚指定的配置项和数据显示图表。 myChart.setOption(option); </script> </body> </html>改进思路可归纳为三点
第一,将命令行解析函数抽象为一个类,类包括属性和方法,变量抽象为属性,字符串解析函数为方法。
第二,将单词加入链表时,启用hash函数形成单词词表,将每次添加如链表的单词利用其ASCCI码值抽象为一个数值,这样可以快速找到所需单词。
第三,将逐个字符读取文件转换为逐行读取文件,这样处理字符串会更加方便,减少了代码量。
展示性能分析图和程序中消耗最大的函数
void WordList::wordCount(string fileName, WordList &wordList, int m, bool _w) { char word[MAX_WORD_LENGTH] = { 0 }; char wordStr[2000] = { 0 }; string str; ifstream inFile; inFile >> noskipws; inFile.open(fileName); int wordposition = 0; int wordPosition = 0; char c; int delta = 'a' - 'A'; int i = 0, j = 0; //记录字符当前位置 int n = m; while (getline(inFile, str)) { if (str[0] != 'T'&&str[0] != 'A') continue; c = str[0]; while (c != '\0') { c = str[i]; if (c <= 'Z'&&c >= 'A') c += delta; bool separator1 = (c >= 'a'&&c <= 'z'); bool separator2 = (c >= '0'&&c <= '9'); if (separator1) { wordposition++; wordStr[wordPosition] = c; wordPosition++; } if (separator2) { if (wordposition < 4) { memset(wordStr, '\0', sizeof(wordStr)); wordposition = 0; wordPosition = 0; } else { wordStr[wordPosition] = c; wordPosition++; } } if (!separator1 && !separator2 && wordposition < 4) { memset(wordStr, '\0', sizeof(wordStr)); memset(word, '\0', sizeof(word)); n = m; wordPosition = 0; wordposition = 0; j = i; } if (c==':' && wordposition >= 4 && strcmp(wordStr, "title") == 0) { state = 1; wordPosition = 0; wordposition = 0; memset(wordStr, '\0', sizeof(wordStr)); } if (c == ':' && wordposition >= 4 && strcmp(wordStr, "abstract") == 0) { state = 2; wordPosition = 0; wordposition = 0; memset(wordStr, '\0', sizeof(wordStr)); } if (!separator1 && !separator2 && wordposition >= 4 && n >= 1) { if (n > 1) { wordStr[wordPosition] = c; } strcat_s(word, wordStr); memset(wordStr, '\0', sizeof(wordStr)); if (n == m) j = i; if (n == 1) { wordList.addWord(word, _w); memset(word, '\0', sizeof(word)); n = m+1; i = j; } wordPosition = 0; wordposition = 0; n--; } i++; } memset(word, '\0', sizeof(word)); memset(wordStr, '\0', sizeof(wordStr)); i = 0; } inFile.close(); }此函数为功能为抽取合法词组,是最消耗时间的函数段。
void WordList::addWord(char word[],bool _w) { //将word这个单词添加到词频统计表中(或者词频+1) int p_index = Hash(word); WordIndex* pIndex = index[p_index]; while (pIndex != nullptr) { Word *pWord = pIndex->pWord; if (!strcmp(word, pWord->word)) { if (_w == true && state == 1) pWord->num += 10; else pWord->num++; Word *qWord = pWord->previous; while (qWord->num < pWord->num) { if (qWord == pWordHead) return; shiftWord(pWord); qWord = pWord->previous; } while (strcmp(qWord->word, pWord->word) > 0) { if (qWord->num > pWord->num) return; shiftWord(pWord); qWord = pWord->previous; } return; } pIndex = pIndex->next; } Word *pWord; if (_w == true && state == 1) pWord = new Word(word, 10); else pWord = new Word(word, 1); pWord->previous = pWordTail->previous; pWord->next = pWordTail; pWordTail->previous->next = pWord; pWordTail->previous = pWord; pIndex = new WordIndex(pWord, index[p_index]); index[p_index] = pIndex; Word *qWord = pWord->previous; while (strcmp(qWord->word, pWord->word) > 0) { if (qWord->num > pWord->num) return; shiftWord(pWord); qWord = pWord->previous; } }此函数功能为开启链表,存储词组,是空间消耗最大的函数。
问题描述
问题1:无法识别标识符问题2:执行之后某个文件无法使用问题3:执行大文本文件之后不出现越界终端做过哪些尝试
问题1尝试:卸载了vs…然后重装,i am sorry,我就是这么特立独行的蠢,然后在网络上查找资料问题2尝试:不断的查找错误,修改代码,换使用函数,在网络上查找解决方法问题3尝试:改变读取方法,将字符数组改成字符串是否解决
问题1:已解决,是因为无法识别头文件,将头文件路径更新即可问题2:已解决,是因为…我们删除并且改变使用函数之后解决了问题3:已解决,是因为我们开的数组不够大有何收获
解决的问题的过程中我又get到一种排查错误的方法增强自己的抗压能力明白了程序员的真正意义,我们笑谈:“我一直把头藏在帽子里是为了遮挡我头上的鸟窝,并且思考着我的代码”,这个梗我只有我和队友get到…