Goroutines,频道选择声明

时间:2017-07-24 00:31:46

标签: multithreading go channel goroutine

我在构建我的goroutines和频道时遇到了麻烦。我的select语句在所有goroutine完成之前一直退出,我知道问题是我发送完成信号的地方。我应该在哪里发送完成信号。

func startWorker(ok chan LeadRes, err chan LeadResErr, quit chan int, verbose bool, wg *sync.WaitGroup) {
    var results ProcessResults
    defer wg.Done()
    log.Info("Starting . . .")
    start := time.Now()

    for {
        select {
        case lead := <-ok:
            results.BackFill = append(results.BackFill, lead.Lead)
        case err := <-err:
            results.BadLeads = append(results.BadLeads, err)
        case <-quit:
            if verbose {
                log.Info("Logging errors from unprocessed leads . . .")
                logBl(results.BadLeads)
            }
            log.WithFields(log.Fields{
                "time-elapsed":                time.Since(start),
                "number-of-unprocessed-leads": len(results.BadLeads),
                "number-of-backfilled-leads":  len(results.BackFill),
            }).Info("Done")
            return
        }
    }
}

//BackFillParallel . . .
func BackFillParallel(leads []Lead, verbose bool) {
    var wg sync.WaitGroup
    gl, bl, d := getChans()
    for i, lead := range leads {
        done := false
        if len(leads)-1 == i {
            done = true
        }
        wg.Add(1)
        go func(lead Lead, done bool, wg *sync.WaitGroup) {
            ProcessLead(lead, gl, bl, d, done, wg)
        }(lead, done, &wg)

    }
    startWorker(gl, bl, d, verbose, &wg)
}

//ProcessLead . . .
func ProcessLead(lead Lead, c1 chan LeadRes, c2 chan LeadResErr, c3 chan int, done bool, wg *sync.WaitGroup) {
    defer wg.Done()
    var payloads []Payload
    for _, p := range lead.Payload {
        decMDStr, err := base64.StdEncoding.DecodeString(p.MetaData)
        if err != nil {
            c2 <- LeadResErr{lead, err.Error()}
        }
        var decMetadata Metadata
        if err := json.Unmarshal(decMDStr, &decMetadata); err != nil {
            goodMetadata, err := FixMDStr(string(decMDStr))
            if err != nil {
                c2 <- LeadResErr{lead, err.Error()}
            }
            p.MetaData = goodMetadata

            payloads = append(payloads, p)
        }
    }

    lead.Payload = payloads
    c1 <- LeadRes{lead}
    if done {
        c3 <- 0
    }
}

1 个答案:

答案 0 :(得分:0)

首先评论我在代码中看到的主要问题:

您正在将done个变量传递给上一次ProcessLead来电,然后您在ProcessLead中使用该变量通过quit频道停止您的工作人员。这个问题是,&#34; last&#34; ProcessLead调用可能会在其他ProcessLead次调用之前完成,因为它们是并行执行的。

第一次改进

将您的问题视为管道。你有3个步骤:

  1. 浏览所有潜在客户并为每个潜在客户启动例程
  2. 例程处理他们的主管
  3. 收集结果
  4. 在步骤2中展开后,最简单的同步方式是WaitGroup。如前所述,您没有调用同步,如果愿意,您当前会创建与收集例程相关的死锁。您需要另一个goroutine将同步与收集例程分开才能使其正常工作。

    这看起来如何(删除一些代码的sry,所以我可以更好地看到结构):

    //BackFillParallel . . .
    func BackFillParallel(leads []Lead, verbose bool) {
        gl, bl, d := make(chan LeadRes), make(chan LeadResErr), make(chan int)
        // additional goroutine with wg.Wait() and closing the quit channel
        go func(d chan int) {
            var wg sync.WaitGroup
            for i, lead := range leads {
                wg.Add(1)
                go func(lead Lead, wg *sync.WaitGroup) {
                    ProcessLead(lead, gl, bl, wg)
                }(lead, &wg)
            }
            wg.Wait()
            // stop routine after all other routines are done
            // if your channels have buffers you might want make sure there is nothing in the buffer before closing
            close(d) // you can simply close a quit channel. just make sure to only close it once
        }(d)
    
        // now startworker is running parallel to wg.Wait() and close(d)
        startWorker(gl, bl, d, verbose)
    }
    
    func startWorker(ok chan LeadRes, err chan LeadResErr, quit chan int, verbose bool) {
        for {
            select {
            case lead := <-ok:
                fmt.Println(lead)
            case err := <-err:
                fmt.Println(err)
            case <-quit:
                return
            }
        }
    }
    
    //ProcessLead . . .
    func ProcessLead(lead Lead, c1 chan LeadRes, c2 chan LeadResErr, wg *sync.WaitGroup) {
        defer wg.Done()
        var payloads []Payload
        for _, p := range lead.Payload {
            decMDStr, err := base64.StdEncoding.DecodeString(p.MetaData)
            if err != nil {
                c2 <- LeadResErr{lead, err.Error()}
            }
            var decMetadata Metadata
            if err := json.Unmarshal(decMDStr, &decMetadata); err != nil {
                goodMetadata, err := FixMDStr(string(decMDStr))
                if err != nil {
                    c2 <- LeadResErr{lead, err.Error()}
                }
                p.MetaData = goodMetadata
    
                payloads = append(payloads, p)
            }
        }
    
        lead.Payload = payloads
        c1 <- LeadRes{lead}
    }
    

    建议的解决方案

    如评论中所述,如果您有缓冲频道,则可能会遇到麻烦。复杂性伴随着您拥有的两个输出通道(针对Lead和LeadErr)。您可以使用以下结构来避免这种情况:

    //BackFillParallel . . .
    func BackFillParallel(leads []Lead, verbose bool) {
        gl, bl := make(chan LeadRes), make(chan LeadResErr)
    
        // one goroutine that blocks until all ProcessLead functions are done
        go func(gl chan LeadRes, bl chan LeadResErr) {
            var wg sync.WaitGroup
            for _, lead := range leads {
                wg.Add(1)
                go func(lead Lead, wg *sync.WaitGroup) {
                    ProcessLead(lead, gl, bl, wg)
                }(lead, &wg)
            }
            wg.Wait()
        }(gl, bl)
    
        // main routine blocks until all results and errors are collected
        var wg sync.WaitGroup
        res, errs := []LeadRes{}, []LeadResErr{}
        wg.Add(2) // add 2 for resCollector and errCollector
        go resCollector(&wg, gl, res)
        go errCollector(&wg, bl, errs)
        wg.Wait()
    
        fmt.Println(res, errs) // in these two variables you will have the results.
    }
    
    func resCollector(wg *sync.WaitGroup, ok chan LeadRes, res []LeadRes) {
        defer wg.Done()
        for lead := range ok {
            res = append(res, lead)
        }
    }
    
    func errCollector(wg *sync.WaitGroup, ok chan LeadResErr, res []LeadResErr) {
        defer wg.Done()
        for err := range ok {
            res = append(res, err)
        }
    }
    
    // ProcessLead function as in "First improvement"