#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main(int argc, char **argv[])
{
ifstream in("easy.html");
if(!in) {
cout<<"文件打开失败"; }
char buf[1500];//若缓冲数组不够,可以加长
string src = ""; while(!in.eof()) {
in.getline(buf,sizeof(buf));
if(strlen(buf) == 0) {
continue; }
src+=buf; } //可写入文件中
ofstream out("link.txt");
if(!out) {
cout<<"文件打开失败"<<endl;
system("pause");
return 0; }
in.close();
int b = 0;
int pos2 = 0;
while(true) {
int pos1 = src.find("href=\"");
if(pos1<0) break;
b = pos1;
int pos2 = src.find("\"",pos1+6);
if (pos2<0) break;
string sub = src.substr(pos1+6,pos2-pos1-6);
cout<<sub<<endl;
src.erase(src.begin()+pos1,src.begin()+pos2+1);
if(sub[0] != 'h'&&sub[1] != 't'&&sub[0] != 't'&&sub[0] != 'p') {
continue; }
else out<<sub<<endl; }
system("pause");
return 0; }
我就是想把这个功能用到MFC里,读取html文档里的链接。find能够用Find,substr能够用Mid,就是src.erase(src.begin()+pos1,src.begin()+pos2+1); 不知道能怎么换,删除掉前面已经读取过的那个链接的字符串
#include <fstream>
#include <string>
using namespace std;
int main(int argc, char **argv[])
{
ifstream in("easy.html");
if(!in) {
cout<<"文件打开失败"; }
char buf[1500];//若缓冲数组不够,可以加长
string src = ""; while(!in.eof()) {
in.getline(buf,sizeof(buf));
if(strlen(buf) == 0) {
continue; }
src+=buf; } //可写入文件中
ofstream out("link.txt");
if(!out) {
cout<<"文件打开失败"<<endl;
system("pause");
return 0; }
in.close();
int b = 0;
int pos2 = 0;
while(true) {
int pos1 = src.find("href=\"");
if(pos1<0) break;
b = pos1;
int pos2 = src.find("\"",pos1+6);
if (pos2<0) break;
string sub = src.substr(pos1+6,pos2-pos1-6);
cout<<sub<<endl;
src.erase(src.begin()+pos1,src.begin()+pos2+1);
if(sub[0] != 'h'&&sub[1] != 't'&&sub[0] != 't'&&sub[0] != 'p') {
continue; }
else out<<sub<<endl; }
system("pause");
return 0; }
我就是想把这个功能用到MFC里,读取html文档里的链接。find能够用Find,substr能够用Mid,就是src.erase(src.begin()+pos1,src.begin()+pos2+1); 不知道能怎么换,删除掉前面已经读取过的那个链接的字符串