Winnet获取网页HTML内容-Code
2007-06-05 12:49
218 查看
bool loadHtmlFile(const CString& strUrl, CString& strHtml)
{
bool bRet = false;
CInternetSession sess;//建立会话
sess.SetOption(INTERNET_OPTION_CONNECT_TIME, 10*1000, NULL);//似乎不起作用,微软SDK的一个bug
CHttpFile* fileGet = 0;
InternetParam param;
param.pSession = &sess;
param.pHttpFile = &fileGet;
param.strUrl = strUrl;
HANDLE hThread = ::CreateThread(NULL, 0, openUrlProc, (LPVOID)¶m, NULL, NULL);//自己开线程处理超时
DWORD dwTimeOut = 10*1000;
if (::WaitForSingleObject(hThread, dwTimeOut) == WAIT_TIMEOUT)
{
sess.Close();
return false;
}
fileGet = *(param.pHttpFile);
if(fileGet)
{
DWORD dwStatus;
DWORD dwBuffLen = sizeof(dwStatus);
BOOL bSuccess = fileGet->QueryInfo(
HTTP_QUERY_STATUS_CODE|HTTP_QUERY_FLAG_NUMBER,
&dwStatus, &dwBuffLen);
if( bSuccess && dwStatus>= 200 && dwStatus<300 )
{
const int ContentSize = 1024000;//maximin size of text form html file.
char* mbbuf = new char[ContentSize];
int nRead = fileGet->Read(mbbuf, ContentSize-1);
if(nRead > 0 && nRead < ContentSize)
{
mbbuf[nRead] = 0;
int nWordCount = MultiByteToWideChar(CP_ACP, NULL, mbbuf, nRead, NULL, NULL);
if(nWordCount > 0)
{
wchar_t* wstrUnicode = new wchar_t[nWordCount+1];
wstrUnicode[nWordCount] = 0;
MultiByteToWideChar(CP_ACP, NULL, mbbuf, nRead, wstrUnicode, nWordCount+1);
strHtml = wstrUnicode;
delete[] wstrUnicode;
bRet = true;
}
}
delete[] mbbuf;
}
fileGet->Close();
delete fileGet;
}
sess.Close();
return bRet;
}
UNIX主机和WINDOWS主机的回车问题
CString unixRetToWindowRet(const CString& strTxt)//
{
if(strTxt.GetLength() > 0)
{
wchar_t* buf = new wchar_t[strTxt.GetLength()*2+1];
const wchar_t* pSrc = (LPCTSTR)strTxt;
int nCount = 0;
if(pSrc[0] == '/n')
buf[nCount++] = '/r';
buf[nCount++] = pSrc[0];
for(int i = 1; i < strTxt.GetLength(); i++)
{
if(*(pSrc+i) == '/n' && *(pSrc+i-1) != '/r')
buf[nCount++] = '/r';
buf[nCount++] = *(pSrc+i);
}
buf[nCount++] = 0;
return buf;
}
return CString();
}
{
bool bRet = false;
CInternetSession sess;//建立会话
sess.SetOption(INTERNET_OPTION_CONNECT_TIME, 10*1000, NULL);//似乎不起作用,微软SDK的一个bug
CHttpFile* fileGet = 0;
InternetParam param;
param.pSession = &sess;
param.pHttpFile = &fileGet;
param.strUrl = strUrl;
HANDLE hThread = ::CreateThread(NULL, 0, openUrlProc, (LPVOID)¶m, NULL, NULL);//自己开线程处理超时
DWORD dwTimeOut = 10*1000;
if (::WaitForSingleObject(hThread, dwTimeOut) == WAIT_TIMEOUT)
{
sess.Close();
return false;
}
fileGet = *(param.pHttpFile);
if(fileGet)
{
DWORD dwStatus;
DWORD dwBuffLen = sizeof(dwStatus);
BOOL bSuccess = fileGet->QueryInfo(
HTTP_QUERY_STATUS_CODE|HTTP_QUERY_FLAG_NUMBER,
&dwStatus, &dwBuffLen);
if( bSuccess && dwStatus>= 200 && dwStatus<300 )
{
const int ContentSize = 1024000;//maximin size of text form html file.
char* mbbuf = new char[ContentSize];
int nRead = fileGet->Read(mbbuf, ContentSize-1);
if(nRead > 0 && nRead < ContentSize)
{
mbbuf[nRead] = 0;
int nWordCount = MultiByteToWideChar(CP_ACP, NULL, mbbuf, nRead, NULL, NULL);
if(nWordCount > 0)
{
wchar_t* wstrUnicode = new wchar_t[nWordCount+1];
wstrUnicode[nWordCount] = 0;
MultiByteToWideChar(CP_ACP, NULL, mbbuf, nRead, wstrUnicode, nWordCount+1);
strHtml = wstrUnicode;
delete[] wstrUnicode;
bRet = true;
}
}
delete[] mbbuf;
}
fileGet->Close();
delete fileGet;
}
sess.Close();
return bRet;
}
UNIX主机和WINDOWS主机的回车问题
CString unixRetToWindowRet(const CString& strTxt)//
{
if(strTxt.GetLength() > 0)
{
wchar_t* buf = new wchar_t[strTxt.GetLength()*2+1];
const wchar_t* pSrc = (LPCTSTR)strTxt;
int nCount = 0;
if(pSrc[0] == '/n')
buf[nCount++] = '/r';
buf[nCount++] = pSrc[0];
for(int i = 1; i < strTxt.GetLength(); i++)
{
if(*(pSrc+i) == '/n' && *(pSrc+i-1) != '/r')
buf[nCount++] = '/r';
buf[nCount++] = *(pSrc+i);
}
buf[nCount++] = 0;
return buf;
}
return CString();
}
相关文章推荐
- telnet建立http连接获取网页HTML内容
- php获取网页标题和内容函数(不包含html标签)
- php获取网页标题和内容函数(不包含html标签)
- telnet建立http连接获取网页HTML内容
- 获取HTML网页内容SgmlReader
- 获取网页 HTML 中 <Title>内容的代码
- C# WebBrowser 获取 AJAX 后的网页HTML 内容
- php获取网页标题和内容函数(不包含html标签)
- 【C#】获取网页内容及HTML解析器HtmlAgilityPack的使用
- gprs连接成功以后,如何通过socket获取网页html内容(C#)
- Python获取网页内容、使用BeautifulSoup库分析html
- php获取网页标题和内容函数(不包含html标签)
- C# 获取指定HTML网页中的标签内容
- HTML to Image in C#指定网页地址,获取内容为图片
- PHP获取HTML网页内容的多种方法(精)
- C# 获取HTML网页内容SgmlReader
- telnet建立http连接获取网页HTML内容
- 获取WebView加载HTML时网页中的内容
- 通过网页地址获取网站HTML内容
- 使用XPath解析HTML获取网页内容