我正在使用简单的线程池加载网页,同时从文件中动态加载网址。但是这个小程序慢慢分配与我的服务器一样多的内存,直到omm杀手停止它。它看起来像resp.Body.Close()不释放正文的内存(内存大小〜下载页面*平均页面大小)。如何强制golang释放为body html文本分配的内存?
package main
import (
"bufio"
"fmt"
"io/ioutil"
"net/http"
"os"
"strings"
"sync"
)
func worker(linkChan chan string, wg *sync.WaitGroup) {
defer wg.Done()
for url := range linkChan {
// Getting body text
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Fail url: %s\n", url)
continue
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
fmt.Printf("Fail url: %s\n", url)
continue
}
// Test page body
has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
fmt.Printf("Done url: %s\t%t\n", url, has_rem_code)
}
}
func main() {
// Creating worker pool
lCh := make(chan string, 30)
wg := new(sync.WaitGroup)
for i := 0; i < 30; i++ {
wg.Add(1)
go worker(lCh, wg)
}
// Opening file with urls
file, err := os.Open("./tmp/new.csv")
defer file.Close()
if err != nil {
panic(err)
}
reader := bufio.NewReader(file)
// Processing urls
for href, _, err := reader.ReadLine(); err == nil; href, _, err = reader.ReadLine() {
lCh <- string(href)
}
close(lCh)
wg.Wait()
}
以下是pprof工具的一些输出:
flat flat% sum% cum cum%
34.63MB 29.39% 29.39% 34.63MB 29.39% bufio.NewReaderSize
30MB 25.46% 54.84% 30MB 25.46% net/http.(*Transport).getIdleConnCh
23.09MB 19.59% 74.44% 23.09MB 19.59% bufio.NewWriter
11.63MB 9.87% 84.30% 11.63MB 9.87% net/http.(*Transport).putIdleConn
6.50MB 5.52% 89.82% 6.50MB 5.52% main.main
看起来像this issue,但它已于2年前修复。
答案 0 :(得分:4)
Found the answer in this thread on golang-nuts. http.Transport
saves connections for future reusing in case of request to same host, causing memory bloating in my case (hundreds thousands of different hosts). But disabling KeepAlives totally solves that problem.
Working code:
func worker(linkChan chan string, wg *sync.WaitGroup) {
defer wg.Done()
var transport http.RoundTripper = &http.Transport{
DisableKeepAlives: true,
}
c := &http.Client{Transport: transport}
for url := range linkChan {
// Getting body text
resp, err := c.Get(url)
if err != nil {
fmt.Printf("Fail url: %s\n", url)
continue
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
fmt.Printf("Fail url: %s\n", url)
continue
}
// Test page body
has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
fmt.Printf("Done url: %s\t%t\n", url, has_rem_code)
}
}