关于C/C++代码注释统计问题_图形编程forc吧

一、代码注释统计问题，算是一个学习过程中常见的一个题目，在cnblog上也有网友弄了一段C++的统计代码，详见：https://www.cnblogs.com/nchar/p/3915889.html。我拿来编译了一下，gcc和vc可以编译，但vc的debug模式下，无法运行，有越界的异常抛出，其实博主关于这个问题的分析还是写得不错的，基本上该思考的地方都到了，但不幸的是，对于其它源代码的统计，明显是错的。
二、又顺手把以前的vckbase代码仓库里的一个代码统计和网上以前下的一个叫源代码智能统计专家拿来测试了一下，随手从7-zip和libpng老外的源代码中选了几个行数较多的源代码，测试结果让人大跌眼镜，拿notepad++手动数了一下注释和空行，vckbase里的除了空行统计有一行的小误差外，其它是正常的，但由于使用的是vc6的MFC+第三方MFC类做的界面，多最小化几次，界面就会出现bug，按钮居然画出到桌面上去了，晕，而另一个软件统计专家统计出来的结果是错误的，虽然界面没有问题。
三、我又仔细读了一下上面那篇博客上的分析，最后写了一段统计代码，经过测试，统计结果终于是正确的了。由于代码使用了以前写的文件内存映射代码，采用特殊的行读取方式，所以整个统计过程超快，老外的5900多行的代码统计基本不耗时，爽啊。
一段典型的可能出现各种各样注释的测试文件：（花样百出，但可以通过编译）
样本：
//dam it */
/**//* this is muti comment * /" // ;
blank in comment
//
/*
*/
# define BB "gss"
// bug line
//
/**/
// single comment
/* single " comment */
/* this is second muti commnet
hello keitty
*/
#include <stdio.h> //this can be?
#include<windows.h>
int main()
{
char s[] = /* abc */ "http:://http://www.hellovfp.com \" \
/ can you see me\
// \
/*bug here*/ "; /*// */
char ch = '"';
char chs = '\"';
/*bug*/ char ss[] /*bug*/ = BB; /* inline
muti comment
*/
puts("/*");
char ds[] = "f\
"
;
puts("*/");
char gs[] = "..//happy line\
//"; //
char bs[][15] = {
"//bug1", "//* bug3 */", //here "
"/*bug2*/"
};
puts(s); puts(ds); // "inline comment" // // ;
return 1; /* inline
"can you see me?" "disp
commnet */
HWND hwnd = CreateWindowA("edit", //class name
"//have bug", //window name";
WS_CHILD, /* "style" */
0, 0, 100, 100, /**/
NULL, NULL, NULL, NULL // /*
);//shell
return 0; /*/
"can you see me?"
commnet */
return 0; /*two*/ /*
"can you see me?"
commnet */
}
你也可以自己写一段代码来测试一下上面这个样本文件，测试结果是总行数71, 代码行 34, 注释行 38, 空行15。
我的统计代码如下：
// linux 文本行结尾是0x0a, windows是 0x0d, 0x0a
// 还有一个文本转换问题
// 单行读取方式和整块判断。
// 使用特殊的行读取方式，line_start, line_end两个指针返回一行，去首尾空格，制表符等也是。
#include <stdio.h>
#include <time.h>
#include "File.h"
#include "Comm_func.h"
#include "FileFinder.h"
struct TextLine
{
char *begin; //一行开始
char *end; //一行结束
};
struct Count
{
int total; //总行数
int comment; // 注释行计数（不含代码）
int marks; // 行内注释（有代码含注释）
int blanks; // 空白行数
bool flag1;
bool flag2;
};
bool read_line(TextLine & tl)
{
if(tl.end == 0){
tl.end = tl.begin;
}
else{
if(*tl.end == 0) return false;
tl.begin = ++tl.end;
}
while(*tl.end && *tl.end != 0xa)
tl.end++;
return true;
}
void put_line(TextLine l)
{
while(l.begin <= l.end)
putchar(*l.begin++);
}
template<typename T>
inline bool is_space(T ch)
{
return (ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ');
}
template<typename T>
bool have_ch(const T* beg, const T* end, T ch)
{
for(;beg < end;++beg, --end){
if(*beg == ch || *end == ch )
return true;
}
return false;
}
bool have_key(char *beg, char *end)
{
for(;beg < end; ++beg){
if(*beg == '/' && (beg[1] == '/' || beg[1] == '*')){
return true;
}
if(*beg == '*' && beg[1] == '/' || *beg == '"'){
return true;
}
}
return false;
}
int analyse(TextLine line, Count& cnt)
{
char *beg = line.begin, *end = line.end;
// 过滤左边空格字符
while(beg < end && is_space(*beg)){
++beg;
}
// 如果是空行 /* */内的空白行也计算在内
if(beg == end){
++cnt.blanks;
return 0;
}
if(cnt.flag1 && !find(beg, end, "*/")){ //处理块注释
++cnt.comment;
return 1;
}
if(!cnt.flag1 && !cnt.flag2 && *beg == '/' && beg[1] == '/'){ //处理"//"开头的注释行
++cnt.comment;
return 1;
}
if(cnt.flag2 && !have_ch(beg, end, '"') || !have_key(beg, end)){ //过滤掉不含注释符或是引号的行
return 4;
}
//put_line(line);
char state = 0;
for(;beg < end; ++beg)
{
if(beg[1]=='"' && *beg =='\'' || *beg == '\\' ){//读取到\"或是\\"时跳过
++beg;
continue;
}
if(!cnt.flag2){ //处理注释符部分
if((!cnt.flag1 && *beg=='/' && beg[1] == '*') ||
(cnt.flag1 && *beg == '*' && beg[1] == '/')){
cnt.flag1 = ! cnt.flag1;
state |= 1;
++beg;
continue;
}
if(!cnt.flag1 && *beg == '/' && beg[1] == '/'){ //有//注释时跳出
state |= 1;
break;
}
}
if(!cnt.flag1 && *beg == '"'){ //读取引号
cnt.flag2 = !cnt.flag2;
continue;
}
if(!(state & 2) && !cnt.flag1){
if(!is_space(*beg))
state |= 2;
}
}// end for
if(state & 1){//不如把2和1合并了
if(state & 2){
++cnt.marks;
return 2;
}
++cnt.comment;
return 1;
}
return 4;
}
int main()
{
FileFinder finder;
if(!finder.find(L"*"))
return 1;
do{
if(finder.is_dir()) continue;
utf16 *p = rfind(finder.name, finder.name+diy::strlen(finder.name), L'.');
if(istrcmp(p, L".c")==0 || istrcmp(p, L".cpp")==0){
sys::File file;
file.open(finder.name); //不同可能少了
if(!file.is_open()) continue;
TextLine line = {file.read()};
Count cnt= {0};
clock_t start = clock();
while(read_line(line))
{
++cnt.total;
switch(analyse(line, cnt))
{
case 1://单行
//put_line(line);
break;
case 2: // 行内
//put_line(line);
break;
}
}
double time = clock() -start;
wprintf(L"%s\n", finder.name);
printf("run time is %.3f\n", time / 1000);
printf("all line is %d, blank=%d, comment=%d, marks=%d\n",
cnt.total, cnt.blanks, cnt.comment, cnt.marks);
printf("all=%d, code=%d, comm=%d, blank=%d\n",
cnt.total,
cnt.total-cnt.comment-cnt.blanks,
cnt.comment + cnt.marks,
cnt.blanks);
puts("-----------------------------------");
}
}while(finder.next());
return 0; // this is the basic comment line
}
运行结果：

整个操作就是两指针在内存中一阵操作，然后结果就出来了，主分析代码函数只有69行，爽吧？

后续思考：还想更快？(其实运行速度已经很快了)，有一个改进的地方就是bool read_line(TextLine & tl)行读取代码，可以在读取一行的时候，对关键字进行一次初步判定，后面的分析过程中对非注释行进行的二次扫描就会减少到一次，毕竟源代码中代码行是占多数的。

大佬帮忙看看谢谢

日	一	二	三	四	五	六

关于C/C++代码注释统计问题

扫二维码下载贴吧客户端