[vb]提取网页中表格内容
2007-07-21 19:14
246 查看
在网页html代码
<html>
<table width="200" border="0" cellpadding="0" cellspacing="0" id="xyf">
<tr>
<td >姓名</td>
<td>学历</td>
<td>籍贯</td>
</tr>
<tr>
<td id=xiaoxing1>小红</td>
<td id=xiaoxing2>研究生</td>
<td id=xiaoxing3>北京</td>
</tr>
<tr>
<td id=xiaofeng1>小奉</td>
<td id=xiaofeng2>本科</td>
<td id=xiaofeng3>上海</td>
</tr>
</table>
</html>
中提取出如下内容:
姓名 学历 籍贯
小红 研究生 北京
小奉 本科 上海
'两textbox,text1是html代码,text2是输出
'代码如下
Option Explicit
Private Sub Command1_Click()
Dim asTable() As String
Dim lRow As Long
Dim lColumn As Long
Dim i As Long
Dim j As Long
Call ReadHtmlTable(Text1.Text, asTable, lRow, lColumn)
For i = 0 To lRow
For j = 1 To lColumn
Text2.SelStart = 65535
Text2.SelText = asTable(i * lColumn + j - 1) & vbTab
'Debug.Print "asTable("; i * lColumn + j - 1; ")"
Next
Text2.SelStart = 65535
Text2.SelText = vbCrLf
Next
End Sub
Private Sub ReadHtmlTable(ByRef sHtml As String, _
ByRef asTable() As String, _
ByRef lRow As Long, _
ByRef lColumn As Long)
Dim lTablePos As Long
Dim lEndTablePos As Long
Dim lTRPos1 As Long
Dim lTRPos2 As Long
Dim lEndTRPos As Long
Dim sTDContent As String
Dim asTD() As String
Dim lCount As Long
Dim bContinue As Boolean
Dim i As Long
lTablePos = InStr(1, sHtml, "<table", vbTextCompare)
If lTablePos <= 0 Then Exit Sub
lTablePos = InStr(lTablePos, sHtml, ">", vbTextCompare)
lEndTablePos = InStr(lTablePos, sHtml, "</table>", vbTextCompare)
lTRPos1 = InStr(lTablePos, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTablePos, sHtml, "</tr>", vbTextCompare)
If lTRPos1 <= 0 Then Exit Sub
bContinue = True
While bContinue
bContinue = True
If lTRPos1 < lTablePos Or lTRPos1 > lEndTablePos Then bContinue = False
If lEndTRPos < lTablePos Or lEndTRPos > lEndTablePos Then bContinue = False
If bContinue Then
sTDContent = Mid(sHtml, lTRPos2 + 1, lEndTRPos - lTRPos2 - 1)
Call ReadHtmlTD(sTDContent, asTD, lCount)
lColumn = lCount + 1
For i = 0 To lCount
ReDim Preserve asTable(lRow * lColumn + lColumn)
asTable(lRow * lColumn + i) = asTD(i)
Debug.Print "asTable("; lRow * lColumn + i; ") = asTD("; i; ")"
'Debug.Print asTD(i),
Next
lRow = lRow + 1
Debug.Print
lTRPos1 = InStr(lTRPos1 + 1, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTRPos2 + 2, sHtml, "</tr>", vbTextCompare)
End If
Wend
lRow = lRow - 1
End Sub
Private Sub ReadHtmlTD(ByRef sHtml As String, _
ByRef asTD() As String, _
ByRef lCount As Long)
Dim lTDPos1 As Long
Dim lTDPos2 As Long
Dim lEndTDPos As Long
Dim lLen As Long
lLen = Len(sHtml)
Dim bContinue As Boolean
lCount = 0
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
If lTDPos1 <= 0 Then Exit Sub
bContinue = True
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
'Debug.Print Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend
ReDim asTD(lCount + 1) As String
lCount = 0
bContinue = True
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
asTD(lCount) = Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend
lCount = lCount - 1
End Sub
<html>
<table width="200" border="0" cellpadding="0" cellspacing="0" id="xyf">
<tr>
<td >姓名</td>
<td>学历</td>
<td>籍贯</td>
</tr>
<tr>
<td id=xiaoxing1>小红</td>
<td id=xiaoxing2>研究生</td>
<td id=xiaoxing3>北京</td>
</tr>
<tr>
<td id=xiaofeng1>小奉</td>
<td id=xiaofeng2>本科</td>
<td id=xiaofeng3>上海</td>
</tr>
</table>
</html>
中提取出如下内容:
姓名 学历 籍贯
小红 研究生 北京
小奉 本科 上海
'两textbox,text1是html代码,text2是输出
'代码如下
Option Explicit
Private Sub Command1_Click()
Dim asTable() As String
Dim lRow As Long
Dim lColumn As Long
Dim i As Long
Dim j As Long
Call ReadHtmlTable(Text1.Text, asTable, lRow, lColumn)
For i = 0 To lRow
For j = 1 To lColumn
Text2.SelStart = 65535
Text2.SelText = asTable(i * lColumn + j - 1) & vbTab
'Debug.Print "asTable("; i * lColumn + j - 1; ")"
Next
Text2.SelStart = 65535
Text2.SelText = vbCrLf
Next
End Sub
Private Sub ReadHtmlTable(ByRef sHtml As String, _
ByRef asTable() As String, _
ByRef lRow As Long, _
ByRef lColumn As Long)
Dim lTablePos As Long
Dim lEndTablePos As Long
Dim lTRPos1 As Long
Dim lTRPos2 As Long
Dim lEndTRPos As Long
Dim sTDContent As String
Dim asTD() As String
Dim lCount As Long
Dim bContinue As Boolean
Dim i As Long
lTablePos = InStr(1, sHtml, "<table", vbTextCompare)
If lTablePos <= 0 Then Exit Sub
lTablePos = InStr(lTablePos, sHtml, ">", vbTextCompare)
lEndTablePos = InStr(lTablePos, sHtml, "</table>", vbTextCompare)
lTRPos1 = InStr(lTablePos, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTablePos, sHtml, "</tr>", vbTextCompare)
If lTRPos1 <= 0 Then Exit Sub
bContinue = True
While bContinue
bContinue = True
If lTRPos1 < lTablePos Or lTRPos1 > lEndTablePos Then bContinue = False
If lEndTRPos < lTablePos Or lEndTRPos > lEndTablePos Then bContinue = False
If bContinue Then
sTDContent = Mid(sHtml, lTRPos2 + 1, lEndTRPos - lTRPos2 - 1)
Call ReadHtmlTD(sTDContent, asTD, lCount)
lColumn = lCount + 1
For i = 0 To lCount
ReDim Preserve asTable(lRow * lColumn + lColumn)
asTable(lRow * lColumn + i) = asTD(i)
Debug.Print "asTable("; lRow * lColumn + i; ") = asTD("; i; ")"
'Debug.Print asTD(i),
Next
lRow = lRow + 1
Debug.Print
lTRPos1 = InStr(lTRPos1 + 1, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTRPos2 + 2, sHtml, "</tr>", vbTextCompare)
End If
Wend
lRow = lRow - 1
End Sub
Private Sub ReadHtmlTD(ByRef sHtml As String, _
ByRef asTD() As String, _
ByRef lCount As Long)
Dim lTDPos1 As Long
Dim lTDPos2 As Long
Dim lEndTDPos As Long
Dim lLen As Long
lLen = Len(sHtml)
Dim bContinue As Boolean
lCount = 0
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
If lTDPos1 <= 0 Then Exit Sub
bContinue = True
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
'Debug.Print Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend
ReDim asTD(lCount + 1) As String
lCount = 0
bContinue = True
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
asTD(lCount) = Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend
lCount = lCount - 1
End Sub
相关文章推荐
- 将网页表格的内容提取出来
- 用正则表达式提取网页上表格的内容
- 提取网页中的内容(提取纯文字VB)
- JS提取网页中表格内容,将特定列内的html文本中id,href,onclick属性提取出来
- C# 正则提取网页内容
- 用xpath方法提取网页内容保存为json格式
- JSP提取表格内容
- 在提取网页内容时,请问匹配UTF8的全部内容,正则如何匹配 繁体、全角数字、标点 等字符
- 定时抓取网页连接,提取网页内容,存入数据库
- 根据表格内容,自动调整VB.NET中Datagrid控件单元格宽度
- nodejs提取网页内容
- VB使用xmlhttp获取远程网页内容
- [VB.NET]怎样提取查询结果中每个子段的内容
- 1分钟快速生成用于网页内容提取的xslt
- c#简单实现提取网页内容
- 【教程】如何在C#,VB.NET中提取消息内容
- [VB.NET]高分求关于网页内容分析的问题(自动翻页)
- [VB.NET]高手帮忙,水晶报表显示在网页上的内容不更新,在线等,只有40分了
- 网页内容爬取:如何提取正文内容
- 提取动态网页内容