您的位置:首页 > 编程语言 > VB

VB网页外链接抓取分析软件UrlX

2020-02-17 04:58 274 查看

  UrlX是一个我使用windows xp操作系统工作的时候开发的一个抓取网页外链接的简单软件,其可以无障碍无弹窗的浏览大部分的网页,并且分析抓取网页之上可以利用的外链接。虽然使用的是VB,实际之上VB我根本没有系统的学习过。所以说很多的功能都是牵强编写出来的。

 

MainForm.frm 文件代码

VERSION 5.00
Object = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}#1.1#0"; "shdocvw.dll"
Begin VB.Form MainForm
ClientHeight    =   9900
ClientLeft      =   60
ClientTop       =   345
ClientWidth     =   9375
Icon            =   "MainForm.frx":0000
LinkTopic       =   "Form1"
MaxButton       =   0   'False
ScaleHeight     =   9900
ScaleWidth      =   9375
StartUpPosition =   2  '屏幕中心
Begin VB.Frame Frame4
Caption         =   "历史链接"
Height          =   2535
Left            =   0
TabIndex        =   12
Top             =   7320
Width           =   9375
Begin VB.ListBox HisUrls
Height          =   2220
Left            =   120
TabIndex        =   13
Top             =   240
Width           =   9135
End
End
Begin VB.Frame Frame3
Caption         =   "获取链接"
Height          =   3135
Left            =   0
TabIndex        =   8
Top             =   4080
Width           =   9375
Begin VB.ListBox GetUrls
Height          =   2760
Left            =   120
TabIndex        =   9
Top             =   240
Width           =   9135
End
End
Begin VB.Frame Frame2
Caption         =   "工作页面"
Height          =   3255
Left            =   0
TabIndex        =   7
Top             =   720
Width           =   9375
Begin SHDocVwCtl.WebBrowser WorkPage
Height          =   2895
Left            =   120
TabIndex        =   10
Top             =   240
Width           =   9135
ExtentX         =   16113
ExtentY         =   5106
ViewMode        =   0
Offline         =   0
Silent          =   0
RegisterAsBrowser=   0
RegisterAsDropTarget=   1
AutoArrange     =   0   'False
NoClientEdge    =   0   'False
AlignLeft       =   0   'False
NoWebView       =   0   'False
HideFileNames   =   0   'False
SingleClick     =   0   'False
SingleSelection =   0   'False
NoFolders       =   0   'False
Transparent     =   0   'False
ViewID          =   "{0057D0E0-3573-11CF-AE69-08002B2E1262}"
Location        =   "http:///"
End
End
Begin VB.Frame Frame1
Height          =   615
Left            =   0
TabIndex        =   1
Top             =   0
Width           =   9375
Begin VB.CommandButton Command6
Caption         =   "H"
Height          =   255
Left            =   120
TabIndex        =   11
ToolTipText     =   "主页"
Top             =   240
Width           =   375
End
Begin VB.CommandButton Command5
Caption         =   "S"
Height          =   255
Left            =   8880
TabIndex        =   6
ToolTipText     =   "设置"
20000

Top             =   240
Width           =   375
End
Begin VB.CommandButton Command4
Caption         =   "X"
Height          =   255
Left            =   8520
TabIndex        =   5
ToolTipText     =   "停止"
Top             =   240
Width           =   375
End
Begin VB.CommandButton Command3
Caption         =   "R"
Height          =   255
Left            =   8160
TabIndex        =   4
ToolTipText     =   "刷新"
Top             =   240
Width           =   375
End
Begin VB.CommandButton Command2
Caption         =   ">"
Height          =   255
Left            =   840
TabIndex        =   3
ToolTipText     =   "前进"
Top             =   240
Width           =   375
End
Begin VB.CommandButton Command1
Caption         =   "<"
Height          =   255
Left            =   480
TabIndex        =   2
ToolTipText     =   "后退"
Top             =   240
Width           =   375
End
Begin VB.TextBox UrlText
Height          =   270
Left            =   1320
TabIndex        =   0
Top             =   240
Width           =   6735
End
End
End
Attribute VB_Name = "MainForm"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Private Sub Command1_Click()
On Error Resume Next
WorkPage.GoBack
End Sub
Private Sub Command2_Click()
On Error Resume Next
WorkPage.GoForward
End Sub

Private Sub Command3_Click()
If UrlText.Text <> "" Then
WorkPage.Navigate2 UrlText.Text
End If
End Sub
Private Sub Command4_Click()
WorkPage.Stop
Me.Caption = "停止工作"
End Sub
Private Sub Command5_Click()
ConfigForm.Show
End Sub
Private Sub Command6_Click()
On Error Resume Next
If Dir(App.Path & "\HomePage.inf") = "" Then
OutHomePage ("http://www.baidu.com/")
End If
WorkPage.Navigate2 InHomePage()
End Sub
Private Sub Form_Load()
Command6_Click
End Sub

Private Sub GetUrls_DblClick()
On Error Resume Next
Dim c As String
c = GetUrls.List(GetUrls.ListIndex)
Clipboard.Clear
Clipboard.SetText c
Dim WebSite As String
WebSite = GetWebSite(UrlText.Text)
If Dir(App.Path & "\History\" & WebSite & ".txt") = "" Then
Open App.Path & "\History\" & WebSite & ".txt" For Output As #1
Close #1
End If
Dim s As String
Dim b As Boolean
b = True
Open App.Path & "\History\" & WebSite & ".txt" For Input As #1
While Not EOF(1)
Input #1, s
If s = c Then
b = False
GoTo p
End If
Wend
p:  Close #1
If b Then
Open App.Path & "\History\" & WebSite & ".txt" For Append As #1
Print #1, c
Close #1
End If
End Sub
Private Sub HisUrls_DblClick()
Dim c As String
c = HisUrls.List(HisUrls.ListIndex)
Clipboard.Clear
Clipboard.SetText c
End Sub

Private Sub UrlText_KeyDown(KeyCode As Integer, Shift As Integer)
If KeyCode = 13 Then
Command3_Click
End If
End Sub
Private Sub WorkPage_BeforeNavigate2(ByVal pDisp As Object, Url As Variant, Flags As Variant, TargetFrameName As Variant, PostData As Variant, Headers As Variant, Cancel As Boolean)
On Error Resume Next
Me.Caption = "正在打开页面"
WorkPage.Silent = True
Dim WebSite As String
If (pDisp Is WorkPage.Object) Then
UrlText.Text = Url
WebSite = GetWebSite(UrlText.Text)
If Dir(App.Path & "\History\" & WebSite & ".txt") = "" Then
Open App.Path & "\History\" & WebSite & ".txt" For Output As #1
Close #1
End If
Dim s As String
HisUrls.Clear
Open App.Path & "\History\" & WebSite & ".txt" For Input As #1
While Not EOF(1)
Input #1, s
HisUrls.AddItem s
Wend
Close #1
End If
End Sub
Private Sub WorkPage_DocumentComplete(ByVal pDisp As Object, Url As Variant)
On Error Resume Next
Me.Caption = "页面下载完成,正在解析页面"
If (pDisp Is WorkPage.Object) Then
GetUrls.Clear
Dim n As Integer
Dim UrlX As String
Dim KeyWord As String
For n = 0 To WorkPage.Document.links.Length
UrlX = WorkPage.Document.links.Item(n)
KeyWord = WorkPage.Document.links.Item(n).innertext
If Len(KeyWord) > 16 Then
GetUrls.AddItem "【" & KeyWord & "】" & UrlX
End If
Next n
End If
Me.Caption = "页面解析完成"
End Sub
Private Sub WorkPage_NewWindow2(ppDisp As Object, Cancel As Boolean)
On Error Resume Next
Cancel = True
WorkPage.Navigate2 WorkPage.Document.activeElement.href
End Sub

 

ConfigForm.frm 文件代码

VERSION 5.00
Object = "{831FDD16-0C5C-11D2-A9FC-0000F8754DA1}#2.0#0"; "mscomctl.ocx"
Begin VB.Form ConfigForm
Caption         =   "UrlX设置"
ClientHeight    =   5235
ClientLeft      =   60
ClientTop       =   345
ClientWidth     =   4680
Icon            =   "ConfigForm.frx":0000
LinkTopic       =   "Form1"
MaxButton       =   0   'False
ScaleHeight     =   5235
ScaleWidth      =   4680
StartUpPosition =   2  '屏幕中心
Begin VB.Frame Frame3
Caption         =   "锚文本规则"
Height          =   2175
Left            =   0
TabIndex        =   4
Top             =   3000
Width           =   4695
Begin MSComctlLib.Slider Slider1
Height          =   255
Left            =   120
TabIndex        =   5
Top             =   480
Width           =   4455
_ExtentX        =   7858
_ExtentY        =   450
_Version        =   393216
Max             =   60
End
Begin VB.Label Label1
Caption         =   "文本最小长度"
Height          =   255
Left            =   240
TabIndex        =   6
Top             =   240
Width           =   1215
End
Begin VB.Line Line1
BorderColor     =   &H80000003&
X1              =   0
X2              =   4680
Y1              =   840
Y2              =   840
End
End
Begin VB.Frame Frame2
Caption         =   "链接规则"
Height          =   2055
Left            =   0
TabIndex        =   3
Top             =   840
Width           =   4695
End
Begin VB.Frame Frame1
Caption         =   "主页"
Height          =   615
Left            =   0
TabIndex        =   0
Top             =   120
Width           =   4695
Begin VB.CommandButton Command1
Caption         =   "C"
Height          =   255
Left            =   4200
TabIndex        =   2
Top             =   240
Width           =   375
End
Begin VB.TextBox HomeText
Height          =   270
Left            =   120
TabIndex        =   1
Top             =   240
Width           =   3975
End
End
End
Attribute VB_Name = "ConfigForm"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Private Sub Check1_Click(Index As Integer)

End Sub

Private Sub Command1_Click()
OutHomePage (HomeText.Text)
MsgBox HomeText.Text & " 已经成功设置成为主页!", vbOKOnly, "系统提示"
End Sub

Private Sub Form_Load()
If Dir(App.Path & "\HomePage.inf") = "" Then
OutHomePage ("http://www.baidu.com/")
End If
HomeText.Text = InHomePage()
End Sub

Function.bas 文件代码

Attribute VB_Name = "Function"
Public Sub OutHomePage(ByRef Url As String)
Open App.Path & "\HomePage.inf" For Output As #1
Print #1, Url
Close #1
End Sub
Public Function InHomePage() As String
Dim Url As String
Open App.Path & "\HomePage.inf" For Input As #1
Input #1, Url
Close #1
InHomePage = Url
End Function
Public Function GetWebSite(ByVal Url As String) As String
Dim ReUrl As String
ReUrl = ""
Dim i As Integer
i = 1
While Not (Mid(Url, i, 1) = "/" And Mid(Url, i + 1, 1) <> "/")
i = i + 1
Wend
i = i + 1
While Mid(Url, i, 1) <> "/"
ReUrl = ReUrl & Mid(Url, i, 1)
i = i + 1
Wend
GetWebSite = ReUrl
End Function

软件运行需要在当前目录新建一个History目录,因为其具有链接记忆功能(没有学习过数据库,仅仅是使用文件存储信息)。软件有很多的Bug,仅仅是一个半成品,基本上仅仅我自己会使用,其他用户根本都是不知道如何使用。

转载于:https://www.cnblogs.com/wrule/archive/2013/04/08/3008581.html

  • 点赞
  • 收藏
  • 分享
  • 文章举报
davidhunter1987 发布了0 篇原创文章 · 获赞 0 · 访问量 17 私信 关注
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: