Excel中基于vba网页抓取

2018-08-08  本文已影响0人  RiverStar

使用公式"=TITLE(A1)"即可获取网站的标题。

创建自定义函数 进入VBA编辑器 粘贴代码

代码附录:

Option Explicit

Function Title(ByVal url As String) As String
    Dim res As String
    On Error Resume Next
    '    url = "http://" & Replace(url, "http://", "")
    res   = GetHtml(url)
    Title = Split(Split(res, "<title>")(1), "</title>")(0)

End Function

Private Function GetHtml(url As String)
    Dim xmlHttp As Object
    Set xmlHttp = CreateObject("Microsoft.XMLHTTP")
    xmlHttp.Open "GET", url, True
    xmlHttp.send (Null)
    While xmlHttp.ReadyState <> 4
    DoEvents
    Wend
    GetHtml = BytesToBstr(xmlHttp.responseBody)
End Function

Private Function BytesToBstr(Bytes)
    Dim Unicode As String
    If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理
    Unicode = "UTF-8"
Else
    Unicode = "GB2312"
End If

Dim objstream As Object
Set objstream = CreateObject("ADODB.Stream")

With objstream
    .Type        = 1
    .Mode        = 3
    .Open
    .Write Bytes
    .Position   = 0
    .Type       = 2
    .Charset    = Unicode
    BytesToBstr = .ReadText
    .Close
End With

End Function

'判断网页编码函数

Private Function IsUTF8(Bytes) As Boolean
Dim i As Long
Dim AscN As Long
Dim Length As Long
Length = UBound(Bytes) + 1

If Length < 3 Then
    IsUTF8 = False
    Exit Function
ElseIf Bytes(0) =  & HEF And Bytes(1) =  & HBB And Bytes(2) =  & HBF Then
    IsUTF8 = True
    Exit Function
End If

Do While i <= Length - 1

    If Bytes(i) < 128 Then
        i    = i + 1
        AscN = AscN + 1
    ElseIf (Bytes(i) And & HE0) =  & HC0 And (Bytes(i + 1) And & HC0) =  & H80 Then
        i    = i + 2

    ElseIf i + 2 < Length Then

        If (Bytes(i) And & HF0) =  & HE0 And (Bytes(i + 1) And & HC0) =  & H80 And (Bytes(i + 2) And & HC0) =  & H80 Then
            i      = i + 3
        Else
            IsUTF8 = False
            Exit Function
        End If

    Else
        IsUTF8 = False
        Exit Function
    End If

Loop

If AscN = Length Then
    IsUTF8 = False
Else
    IsUTF8 = True
End If

End Function

上一篇下一篇

猜你喜欢

热点阅读