VBA获取href值

时间:2015-09-20 09:41:43

标签: vba web web-scraping href extract

我正在编写宏来从网站中提取href值,这里的示例是提取值:< / listedco/listconews/SEHK/2015/0429/LTN201504291355_C.pdf'从下面的HTML代码。 href是html标签的一个属性' a',我添加了代码getElementbyTagName' a'但它没有用,我的问题是如何将该href值提取到L列。任何人都可以提供帮助吗?提前谢谢!

  <a id="ctl00_gvMain_ctl03_hlTitle" class="news" href="/listedco/listconews/SEHK/2015/0429/LTN201504291355_C.pdf" target="_blank">二零一四年年報</a>

Sub Download_From_HKEX()
    Dim internetdata As Object
    Dim div_result As Object
    Dim header_links As Object
    Dim link As Object
    Dim URL As String
    Dim IE As Object
    Dim i As Object
    Dim ieDoc As Object
    Dim selectItems As Variant
    Dim h As Variant

    Dim LocalFileName As String
    Dim B As Boolean
    Dim ErrorText As String
    Dim x As Variant

    'Key Ratios
    For x = 1 To 1579
        Set IE = New InternetExplorerMedium
        IE.Visible = True
        URL = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main_c.aspx"
        IE.navigate URL
        Do
            DoEvents
        Loop Until IE.readyState = 4
        Application.Wait (Now + TimeValue("0:00:05"))
        Call IE.Document.getElementById("ctl00_txt_stock_code").setAttribute("value", Worksheets("Stocks").Cells(x, 1).Value)

        Set selectItems = IE.Document.getElementsByName("ctl00$sel_tier_1")
        For Each i In selectItems
            i.Value = "4"
            i.FireEvent ("onchange")
        Next i

        Set selectItems = IE.Document.getElementsByName("ctl00$sel_tier_2")
        For Each i In selectItems
            i.Value = "159"
            i.FireEvent ("onchange")
        Next i

        Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_d")
        For Each i In selectItems
            i.Value = "01"
            i.FireEvent ("onchange")
        Next i

        Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_m")
        For Each i In selectItems
            i.Value = "04"
            i.FireEvent ("onchange")
        Next i

        Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_y")
        For Each i In selectItems
            i.Value = "1999"
            i.FireEvent ("onchange")
        Next i

        Application.Wait (Now + TimeValue("0:00:02"))
        Set ieDoc = IE.Document
        With ieDoc.forms(0)
            Call IE.Document.parentWindow.execScript("document.forms[0].submit()", "JavaScript")
            .submit
        End With
        Application.Wait (Now + TimeValue("0:00:03"))

        'Start here to extract the href value.
        Set internetdata = IE.Document
        Set div_result = internetdata.getElementById("ctl00_gvMain_ctl03_hlTitle")
        Set header_links = div_result.getElementsByTagName("a")
        For Each h In header_links
            Set link = h.ChildNodes.Item(0)
            Worksheets("Stocks").Cells(Range("L" & Rows.Count).End(xlUp).Row + 1, 12) = link.href
        Next
    Next x
End Sub

3 个答案:

答案 0 :(得分:1)

For Each h In header_links
     Worksheets("Stocks").Cells(Range("L" & Rows.Count).End(xlUp).Row + 1, 12) = h.href
Next

编辑:id属性在文档中应该是唯一的:应该只有一个具有任何给定id的元素。所以

IE.Document.getElementById("ctl00_gvMain_ctl03_hlTitle").href

应该有用。

答案 1 :(得分:1)

WB.Document.GetElementById("ctl00_gvMain_ctl04_hlTitle").GetAttribute("href").ToString

答案 2 :(得分:1)

使用CSS选择器获取元素,然后访问其href属性。

#ctl00_gvMain_ctl03_hlTitle

以上是带有id ctl00_gvMain_ctl03_hlTitle的元素。 "#"表示ID。

Debug.Print IE.document.querySelector("#ctl00_gvMain_ctl03_hlTitle").getAttribute("href").innerText