Excel clawer
2015-09-17 17:21
260 查看
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/bby12345/article/details/84742114
Sub clawData()
Dim companies() As String
Dim rowNum As Long
Dim keywords As String
Dim ff As Long
companyNum = Sheets("Sheet1").Range("g65536").End(xlUp).Row
keywordsGeneral = getGeneralKeyWordsGeneral()
keywordsEnvironment = getGeneralKeyWordsEnvironment()
keywordsSocial = getGeneralKeyWordsSocial()
keywordsGovernance = getGeneralKeyWordsGovernance()
companies() = getCompanyList()
For ff = 0 To companyNum Step 1
If companies()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(keywordsGeneral), "General", CStr(companies()(ff)), ff * 4)
keywordsGeneral = clawResult(CStr(keywordsEnvironment), "Environment", CStr(companies()(ff)), ff * 4 + 1)
keywordsGeneral = clawResult(CStr(keywordsSocial), "Social", CStr(companies()(ff)), ff * 4 + 2)
keywordsGeneral = clawResult(CStr(keywordsGovernance), "Governance", CStr(companies()(ff)), ff * 4 + 3)
End If
Next
Shell ("taskkill /f /im IEXPLORE.exe")
4
End Sub
Function GetChs(strInput As String) As String
Dim regEx As Object
Set regEx = CreateObject("VBSCRIPT.REGEXP")
regEx.Pattern = "[^\u4e00-\u9fa5]"
regEx.IgnoreCase = True
regEx.Global = True
GetChs = regEx.Replace(strInput, "")
Set regEx = Nothing
End Function
Function getGeneralKeyWordsGeneral() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("h65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 8))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGeneral = strs
End Function
Function getGeneralKeyWordsEnvironment() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("i65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 9))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsEnvironment = strs
End Function
Function getGeneralKeyWordsSocial() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("j65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 10))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsSocial = strs
End Function
Function getGeneralKeyWordsGovernance() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("k65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 11))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGovernance = strs
End Function
Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("g65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 7))
If strT <> "" Then
array1(i - 2) = strT
End If
Next
getCompanyList = array1()
End Function
Function urlVerify(url As String) As Long
Dim result As Long
result = 1
IFind = InStr(url, ".pdf")
IFind2 = InStr(url, ".doc")
IFind3 = InStr(url, ".xls")
IFind4 = InStr(url, ".xlsx")
IFind5 = InStr(url, ".ppt")
If IFind = 0 And IFind2 = 0 And IFind3 = 0 And IFind4 = 0 And IFind5 = 0 Then
result = 0
End If
urlVerify = result
End Function
Function clawResult(keywords As String, keyWordsType As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strx2 As String, ie2, dmt2, tb2, i2&
For a = 0 To 4
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = False
.navigate "https://www.google.com.hk/search?q=" + keywords + "+%22+" + companyName + "%22&lr=lang_ja&newwindow=1&safe=strict&hl=zh-CN&as_qdr=all&tbs=lr:lang_1ja&ei=1LhIVKeUFc3W7Qb_oIGABQ&start=" + CStr(a) + "0&sa=N&biw=1920&bih=1016" '??§????3???§??3??
Do Until .ReadyState = 4
DoEvents
Loop
Set dmt = .document
If TypeName(dmt) <> "AcroPDF" Then
Set tb = dmt.all.tags("h3")
For i = 0 To tb.Length - 1
strx = Split(tb.Item(i).innerHTML, "href=")
strx2 = Split(strx(1), """")(1)
Cells(companyLine * 50 + a * 10 + 2 + i, 1) = strx2
Cells(companyLine * 50 + a * 10 + 2 + i, 2) = companyName
Cells(companyLine * 50 + a * 10 + 2 + i, 3) = tb.Item(i).innertext
Cells(companyLine * 50 + a * 10 + 2 + i, 4) = keyWordsType
IFind = urlVerify(strx2)
If IFind = 0 Then
Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate strx2
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop
Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set tb2 = dmt2.all.tags("p")
For i2 = 0 To tb2.Length - 1
strs2 = strs2 & vbCrLf & tb2.Item(i2).innertext
Next
Cells(companyLine * 50 + a * 10 + 2 + i, 5) = strs2
strs2 = ""
End If
End With
End If
Next
End If
End With
Next
Shell ("taskkill /f /im IEXPLORE.exe")
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 5
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
clawResult = ""
End Function
Dim companies() As String
Dim rowNum As Long
Dim keywords As String
Dim ff As Long
companyNum = Sheets("Sheet1").Range("g65536").End(xlUp).Row
keywordsGeneral = getGeneralKeyWordsGeneral()
keywordsEnvironment = getGeneralKeyWordsEnvironment()
keywordsSocial = getGeneralKeyWordsSocial()
keywordsGovernance = getGeneralKeyWordsGovernance()
companies() = getCompanyList()
For ff = 0 To companyNum Step 1
If companies()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(keywordsGeneral), "General", CStr(companies()(ff)), ff * 4)
keywordsGeneral = clawResult(CStr(keywordsEnvironment), "Environment", CStr(companies()(ff)), ff * 4 + 1)
keywordsGeneral = clawResult(CStr(keywordsSocial), "Social", CStr(companies()(ff)), ff * 4 + 2)
keywordsGeneral = clawResult(CStr(keywordsGovernance), "Governance", CStr(companies()(ff)), ff * 4 + 3)
End If
Next
Shell ("taskkill /f /im IEXPLORE.exe")
4
End Sub
Function GetChs(strInput As String) As String
Dim regEx As Object
Set regEx = CreateObject("VBSCRIPT.REGEXP")
regEx.Pattern = "[^\u4e00-\u9fa5]"
regEx.IgnoreCase = True
regEx.Global = True
GetChs = regEx.Replace(strInput, "")
Set regEx = Nothing
End Function
Function getGeneralKeyWordsGeneral() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("h65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 8))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGeneral = strs
End Function
Function getGeneralKeyWordsEnvironment() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("i65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 9))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsEnvironment = strs
End Function
Function getGeneralKeyWordsSocial() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("j65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 10))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsSocial = strs
End Function
Function getGeneralKeyWordsGovernance() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("k65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 11))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGovernance = strs
End Function
Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("g65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 7))
If strT <> "" Then
array1(i - 2) = strT
End If
Next
getCompanyList = array1()
End Function
Function urlVerify(url As String) As Long
Dim result As Long
result = 1
IFind = InStr(url, ".pdf")
IFind2 = InStr(url, ".doc")
IFind3 = InStr(url, ".xls")
IFind4 = InStr(url, ".xlsx")
IFind5 = InStr(url, ".ppt")
If IFind = 0 And IFind2 = 0 And IFind3 = 0 And IFind4 = 0 And IFind5 = 0 Then
result = 0
End If
urlVerify = result
End Function
Function clawResult(keywords As String, keyWordsType As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strx2 As String, ie2, dmt2, tb2, i2&
For a = 0 To 4
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = False
.navigate "https://www.google.com.hk/search?q=" + keywords + "+%22+" + companyName + "%22&lr=lang_ja&newwindow=1&safe=strict&hl=zh-CN&as_qdr=all&tbs=lr:lang_1ja&ei=1LhIVKeUFc3W7Qb_oIGABQ&start=" + CStr(a) + "0&sa=N&biw=1920&bih=1016" '??§????3???§??3??
Do Until .ReadyState = 4
DoEvents
Loop
Set dmt = .document
If TypeName(dmt) <> "AcroPDF" Then
Set tb = dmt.all.tags("h3")
For i = 0 To tb.Length - 1
strx = Split(tb.Item(i).innerHTML, "href=")
strx2 = Split(strx(1), """")(1)
Cells(companyLine * 50 + a * 10 + 2 + i, 1) = strx2
Cells(companyLine * 50 + a * 10 + 2 + i, 2) = companyName
Cells(companyLine * 50 + a * 10 + 2 + i, 3) = tb.Item(i).innertext
Cells(companyLine * 50 + a * 10 + 2 + i, 4) = keyWordsType
IFind = urlVerify(strx2)
If IFind = 0 Then
Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate strx2
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop
Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set tb2 = dmt2.all.tags("p")
For i2 = 0 To tb2.Length - 1
strs2 = strs2 & vbCrLf & tb2.Item(i2).innertext
Next
Cells(companyLine * 50 + a * 10 + 2 + i, 5) = strs2
strs2 = ""
End If
End With
End If
Next
End If
End With
Next
Shell ("taskkill /f /im IEXPLORE.exe")
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 5
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
clawResult = ""
End Function
相关文章推荐
- Struts 2 execAndWait interceptor example
- ray trace 第三章 翻译
- Android ViewGroup.setDescendantFocusability函数
- 【bzoj1059】【zjoi2007】【矩阵游戏】【二分图匹配】
- 测试中的网络监听
- 数据库的最简单实现【转】
- url
- 矩阵键盘 数码管
- php linux 创建文件夹权限问题
- Javascript学习笔记【第三章】2
- cloudera manager 5.3完整卸载脚本
- iOS 9 关于http 改成https 协议
- IIS7环境下实现svg/woff/woff2字体正常显示
- 设计模式:装饰者模式
- #征文再续#Android基于XMPP Smack Openfire开发IM即时聊天(二)
- 网络爬虫~【转】
- web前端优化
- 设计模式:装饰者模式
- 我的Android进阶之旅------>百度地图学习:BDLocation.getLocType ( )值分析
- [jQuery插件] fancyBox:适合移动端以及PC端的弹层+照片浏览插件