从网站上的搜索字词中抓取总页数

时间:2014-10-22 21:15:48

标签: vb.net winforms pagination

我正在抓取https://thepiratebay.se中的内容,并且我想知道如何获取搜索结果的总页数。

这是我的计划: thepiratebay scraper

正如您在左侧看到的那样,搜索结果显示出来,而右侧是对所选结果的评论(在这种情况下是最高结果)。在程序的底部,你可以看到它显示了它显示了多少结果,现在右边是页面。

我想以此格式{current page}/{total pages}显示结果的页数。我真的不想帮助查找current page,但我真的想得到total pages

我还希望评论的这个功能(右边的列表)。

这可能有点太多了,但它不应该那么难,因为结果很容易。无法在Google上找到任何内容,因此非常感谢您的帮助。

编辑: 刮痧结果:

Dim ResultCount, I As Integer
    Dim filenamelist, fileurllist, fileseeders, fileleechers, filemagneturl, filesize As New List(Of String)

    filenamelist.Clear()
    fileurllist.Clear()
    TreeView1.Nodes.Clear()

    Using WC As New System.Net.WebClient
        Source = WC.DownloadString("http://thepiratebay.se/search/" & TextBox1.Text.Replace(" "c, "%20") & "/0/7/")
        Link = Source
        If CheckBox6.Checked = True Then
            Source = Source & "600"
        End If
        If CheckBox5.Checked = True Then
            Source = Source & "500"
        End If
        If CheckBox7.Checked = True Then
            Source = Source & "400"
        End If
        If CheckBox4.Checked = True Then
            Source = Source & "300"
        End If
        If CheckBox3.Checked = True Then
            Source = Source & "200"
        End If
        If CheckBox2.Checked = True Then
            Source = Source & "100"
        End If
        If CheckBox1.Checked = True Then
            Source = Source & "0"
        End If

        If Source.Contains("No hits. Try adding an asterisk in you search phrase.") Then
            MessageBox.Show("Search returned 0 results.", "Error", MessageBoxButtons.OK, MessageBoxIcon.Stop)
        Else
            ResultCount = InstanceCount(Source, "vertTh")
            Dim Filename, FileUrl, Filemagnet, FS As String
            Dim FileSeed, FileLeech As Integer

            Do Until I = ResultCount
                Filename = GetBetween(Source, "title=" & Chr(34) & "Details for ", Chr(34) & ">", I)
                FileUrl = "http://thepiratebay.se/torrent/" & GetBetween(Source, "<a href=" & Chr(34) & "/torrent/", Chr(34) & " class=" & Chr(34) & "detLink" & Chr(34), I)
                FileSeed = GetBetween(Source, "<td align=" & Chr(34) & "right" & Chr(34) & ">", "</td>", I + I)
                FileLeech = GetBetween(Source, "<td align=" & Chr(34) & "right" & Chr(34) & ">", "</td>", I + I + 1)
                Filemagnet = "magnet:" & GetBetween(Source, "<a href=" & Chr(34) & "magnet:", Chr(34) & " title=" & Chr(34) & "Download this torrent using magnet", I)
                FS = GetBetween(Source, ", Size", ", ULed by ", I).Replace(" ", "").Replace("&nbsp;", " ")

                filemagneturl.Add(Filemagnet) 'add result to array
                filenamelist.Add(Filename) 'add result to array
                fileurllist.Add(FileUrl) 'add result to array
                fileseeders.Add(FileSeed) 'add result to array
                fileleechers.Add(FileLeech)
                filesize.Add(FS)
                I = I + 1 'increment i with 1 to get next result

            Loop
        End If
    End Using

    I = 0
    Do Until I = filenamelist.Count()
        Dim rootNode = TreeView1.Nodes.Add(filenamelist(I))
        rootNode.Nodes.Add("Seeders: " & fileseeders(I))
        rootNode.Nodes.Add("Leechers: " & fileleechers(I))
        rootNode.Nodes.Add(fileurllist(I))
        rootNode.Nodes.Add(filemagneturl(I))
        rootNode.Nodes.Add(filesize(I))
        I = I + 1
        LabelResults.Text = "Results: " & I
    Loop

对结果进行评论:

Try
        UsernameArr.Clear()
        PostArr.Clear()

        Using WC As New System.Net.WebClient
            Dim Source As String = WC.DownloadString(Link)
            Source = GetBetween(Source, "<div id=" & Chr(34) & "comments" & Chr(34) & ">", "<div class=" & Chr(34) & "ads" & Chr(34) & " id=" & Chr(34) & "sky-banner" & Chr(34) & ">", 0)
            Dim CommentCount As Integer = InstanceCount(Source, "byline")
            CommentCounter = CommentCount
            If CommentCount < 1 Then
                MessageBox.Show("This torrent has no comments")
            Else
                Dim itterator As Integer = 0

                Do Until itterator = CommentCount
                    Dim User As String = GetBetween(Source, "<a href=" & Chr(34) & "/user/", "/" & Chr(34) & " title=" & Chr(34) & "Browse ", itterator)
                    Dim Post As String = GetBetween(Source, "comment" & Chr(34) & ">", "</div>", itterator).Replace("<br />", " "c)
                    Dim DateStr As String = GetBetween(Source, "</a> at ", " CET:", itterator)

                    UsernameArr.Add(User)
                    postdatearr.Add(DateStr)
                    PostArr.Add(Post)

                    Dim rootNode = TreeView2.Nodes.Add(User)
                    rootNode.Nodes.Add(DateStr)
                    rootNode.Nodes.Add(Post)

                    itterator = itterator + 1
                Loop
            End If
        End Using

    Catch ex As Exception
        'do nothing
    End Try

在字符串之间获取文字:

Private Function GetBetween(ByVal input As String, ByVal str1 As String, ByVal str2 As String, ByVal index As Integer) As String
    Dim temp As String = Regex.Split(input, str1)(index + 1)
    Return Regex.Split(temp, str2)(0)
End Function

0 个答案:

没有答案
相关问题