您的位置:首页 > 其它

Excel clawer

2015-09-17 17:21 260 查看
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。 本文链接:https://blog.csdn.net/bby12345/article/details/84742114 Sub clawData()
Dim companies() As String
Dim rowNum As Long
Dim keywords As String
Dim ff As Long

companyNum = Sheets("Sheet1").Range("g65536").End(xlUp).Row
keywordsGeneral = getGeneralKeyWordsGeneral()
keywordsEnvironment = getGeneralKeyWordsEnvironment()
keywordsSocial = getGeneralKeyWordsSocial()
keywordsGovernance = getGeneralKeyWordsGovernance()

companies() = getCompanyList()
For ff = 0 To companyNum Step 1
If companies()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(keywordsGeneral), "General", CStr(companies()(ff)), ff * 4)
keywordsGeneral = clawResult(CStr(keywordsEnvironment), "Environment", CStr(companies()(ff)), ff * 4 + 1)
keywordsGeneral = clawResult(CStr(keywordsSocial), "Social", CStr(companies()(ff)), ff * 4 + 2)
keywordsGeneral = clawResult(CStr(keywordsGovernance), "Governance", CStr(companies()(ff)), ff * 4 + 3)
End If
Next

Shell ("taskkill /f /im IEXPLORE.exe")
4
End Sub

Function GetChs(strInput As String) As String
Dim regEx As Object
Set regEx = CreateObject("VBSCRIPT.REGEXP")

regEx.Pattern = "[^\u4e00-\u9fa5]"
regEx.IgnoreCase = True
regEx.Global = True
GetChs = regEx.Replace(strInput, "")
Set regEx = Nothing
End Function

Function getGeneralKeyWordsGeneral() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("h65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 8))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next

getGeneralKeyWordsGeneral = strs


End Function

Function getGeneralKeyWordsEnvironment() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("i65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 9))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next

getGeneralKeyWordsEnvironment = strs


End Function

Function getGeneralKeyWordsSocial() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("j65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 10))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next

getGeneralKeyWordsSocial = strs


End Function
Function getGeneralKeyWordsGovernance() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("k65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 11))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next

getGeneralKeyWordsGovernance = strs


End Function

Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("g65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 7))
If strT <> "" Then
array1(i - 2) = strT
End If
Next


getCompanyList = array1()


End Function


Function urlVerify(url As String) As Long
Dim result As Long
result = 1
IFind = InStr(url, ".pdf")
IFind2 = InStr(url, ".doc")
IFind3 = InStr(url, ".xls")
IFind4 = InStr(url, ".xlsx")
IFind5 = InStr(url, ".ppt")
If IFind = 0 And IFind2 = 0 And IFind3 = 0 And IFind4 = 0 And IFind5 = 0 Then
result = 0
End If
urlVerify = result
End Function


Function clawResult(keywords As String, keyWordsType As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strx2 As String, ie2, dmt2, tb2, i2&

For a = 0 To 4

Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = False
.navigate "https://www.google.com.hk/search?q=" + keywords + "+%22+" + companyName + "%22&lr=lang_ja&newwindow=1&safe=strict&hl=zh-CN&as_qdr=all&tbs=lr:lang_1ja&ei=1LhIVKeUFc3W7Qb_oIGABQ&start=" + CStr(a) + "0&sa=N&biw=1920&bih=1016" '??§????3???§??3??

Do Until .ReadyState = 4
DoEvents
Loop
Set dmt = .document
If TypeName(dmt) <> "AcroPDF" Then
Set tb = dmt.all.tags("h3")

For i = 0 To tb.Length - 1
strx = Split(tb.Item(i).innerHTML, "href=")
strx2 = Split(strx(1), """")(1)
Cells(companyLine * 50 + a * 10 + 2 + i, 1) = strx2
Cells(companyLine * 50 + a * 10 + 2 + i, 2) = companyName
Cells(companyLine * 50 + a * 10 + 2 + i, 3) = tb.Item(i).innertext
Cells(companyLine * 50 + a * 10 + 2 + i, 4) = keyWordsType
IFind = urlVerify(strx2)
If IFind = 0 Then

Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate strx2
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop

Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set tb2 = dmt2.all.tags("p")

For i2 = 0 To tb2.Length - 1
strs2 = strs2 & vbCrLf & tb2.Item(i2).innertext
Next
Cells(companyLine * 50 + a * 10 + 2 + i, 5) = strs2
strs2 = ""
End If
End With

End If

Next
End If
End With

Next

Shell ("taskkill /f /im IEXPLORE.exe")

newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 5
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
clawResult = ""



End Function
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: