Question

我的代码看似死锁，但我无法弄清楚原因。代码是一个简单的消费者/生产者，有一些额外的错误/执行处理。生产者填充ConcurrentQueue，而消费者循环队列并处理其内容，一次输入1个。

这就是扭曲：队列包含需要执行的异步操作，需要等待其结果。最后一部分允许某种程度的错误处理，例如重试。异步操作是将消息中继到集线器的SignalR调用。集线器的监听器完全依赖于这些消息，这就是为什么队列需要比平常更强大的原因。

编辑：以下是您要求我提供的更完整的代码示例。 不死锁。请看问题的底部。

public class Program
{
    public static void Main(string[] args)
    {
        Console.Write( "Run how many tries?: " );
        int tries = int.Parse( Console.ReadLine() );

        ActionPump pump = new ActionPump();
        Random r = new Random();

        for ( int i = 0; i < tries; i++ )
        {
            // Fail 10% of the time?
            int failOrNot = (int)( r.NextDouble() + 0.9 );
            Console.WriteLine( "{0}: {1}", i, failOrNot );
            pump.EnqueueAction( new ActionWrapper( () => RandomlyTimeout( failOrNot ) ) );
        }

        while ( !pump.IsDone )
            Thread.Sleep( 10 );

        // NOTE: Check pump.rejects here to see if any ActionWrappers still have a Task and if the Task.Status is WaitingForActivation.
        Debugger.Break();
    }

    private static Task RandomlyTimeout(int failOrNot)
    {
        // When failing, wait "indefinitly" to fake a timeout.
        return Task.Delay(failOrNot == 0 ? 3000 : 10);
    }
}

public class ActionWrapper
{
    private static long NextId;
    private long? _id;

    private readonly Func<Task> _action;
    protected readonly TaskStatus[] ValidTaskStates = { TaskStatus.Created, TaskStatus.WaitingForActivation, TaskStatus.WaitingToRun, TaskStatus.Running, TaskStatus.WaitingForChildrenToComplete, };

    public long Id
    {
        get { return _id ?? (_id = ++NextId) ?? 0; }
    }

    protected Task Task { get; set; }

    public int FailureCount { get; protected internal set; }
    public bool HasSucceeded { get; set; }

    public ActionWrapper(Func<Task> action)
    {
        _action = action;
    }

    protected virtual bool VerifyTaskState()
    {
        // Don't execute the action again if the task is still busy.
        if (Task != null)
        {
            // If the task is somehow not still running, dispose of it.
            if (Task.IsFaulted || Task.IsCanceled || Task.Exception != null || !ValidTaskStates.Any(x => x == Task.Status))
            {
                // This must have been an unregistered failure, because the ExecuteInternal method always clears
                // the Task property when it detects a failure. Since it hasn't, we can also assume the failure
                // counter hasn't been incremented.
                FailureCount++;
                HasSucceeded = false;

                // If we have an error to log, log it.
                if (Task.Exception != null)
                {
                    Debug.WriteLine("Discovered failure to execute ActionWrapper, the action with id {0} has failed {1} times.{2}", Id, FailureCount, Task.Exception);
                }
                else
                {
                    Debug.WriteLine("Discovered failure to execute ActionWrapper, Task has status '{0}'. The action with id {1} has failed {2} times.", Task.Status, Id, FailureCount);
                }

                try
                {
                    // Dispose of the failed or canceled task.
                    Task.Dispose();
                    Debug.WriteLine("The existing task for action with id {0} that has failed {1} times has been disposed. Advising new task creation.", Id, FailureCount);
                }
                catch (Exception e)
                {
                    Console.WriteLine(string.Format("Failed to dispose of corrupt task for action {0}.", Id), e);
                }
                finally
                {
                    Task = null;
                }
            }
            else
            {
                Debug.WriteLine("The action with id {0} that has failed {1} times is still running, awaiting same task.", Id, FailureCount);

                // If the task is still running and in a valid state, don't execute the action again. We don't want
                // to execute the action twice, because we don't know what it does.
                return false;
            }
        }
        else
        {
            Debug.WriteLine("The action with id {0} that has failed {1} times has not succeeded yet, advising new task creation.", Id, FailureCount);
        }

        return true;
    }

    public Task Execute()
    {
        bool recycleExistingTask = HasSucceeded || !VerifyTaskState();

        if (recycleExistingTask)
        {
            if (Task != null)
            {
                Debug.WriteLine("The action with id {0} that has failed {1} times has not succeeded yet, returning existing task.", Id, FailureCount);
                return Task;
            }

            Debug.WriteLine("The action with id {0} that has failed {1} times has already succeeded, returning empty task.", Id, FailureCount);
            return Task.FromResult(0);
        }

        Debug.WriteLine("The action with id {0} that has failed {1} times has not been executed yet, returning new task.", Id, FailureCount);
        return ExecuteInternal();
    }

    protected virtual async Task ExecuteInternal()
    {
        try
        {
            SynchronizationContext.SetSynchronizationContext(null);

            Task = _action();
            await Task.ConfigureAwait(false);

            HasSucceeded = true;

            Debug.WriteLine("The action with id {0} that has failed {1} times has succeeded.", Id, FailureCount);
        }
        catch (Exception e)
        {
            FailureCount++;
            HasSucceeded = false;

            Debug.WriteLine("Failed to execute ActionWrapper, the action with id {0} has failed {1} times.{2}", Id, FailureCount, e);
        }
        finally
        {
            Task = null;
        }
    }
}

public class ActionPump
{
    private readonly ConcurrentQueue<ActionWrapper> _actionQueue = new ConcurrentQueue<ActionWrapper>();
    private readonly List<ActionWrapper> _rejects = new List<ActionWrapper>();

    private Thread _pumpThread;

    public bool IsRunning { get; private set; }
    public int MaximumActionFailures { get; private set; }
    public int MaximumActionWaitTime { get; private set; }

    public bool IsDone
    {
        get { return !IsRunning && _actionQueue.Count == 0; }
    }

    public ActionPump(int maximumActionFailures = 5, int maximumActionWaitTime = 1000)
    {
        MaximumActionFailures = maximumActionFailures;
        MaximumActionWaitTime = maximumActionWaitTime;
    }

    public void EnqueueAction(ActionWrapper action)
    {
        _actionQueue.Enqueue(action);
        StartPump();
    }

    public void StartPump()
    {
        lock (this)
        {
            if (IsRunning)
                return;
            IsRunning = true;
        }

        _pumpThread = new Thread(Pump);
        _pumpThread.Start();
    }

    private void Pump()
    {
        ActionWrapper action = null;
        bool canRepeatedlyFail = MaximumActionFailures > 0;

        try
        {
            IsRunning = true;

            while (IsRunning && _actionQueue.Count > 0)
            {
                /* Preparation code before running action. */

                if (_actionQueue.TryDequeue(out action))
                {
                    int originalFailureCount = action.FailureCount;

                    action.Execute().Wait(MaximumActionWaitTime);

                    // The action has timed out if it has not succeeded nor failed after waiting for the allotted time.
                    bool hasTimedOut = !action.HasSucceeded && originalFailureCount == action.FailureCount;
                    bool hasRepeatedlyFailed = canRepeatedlyFail && action.FailureCount >= MaximumActionFailures;

                    if (hasTimedOut)
                    {
                        // A timeout counts as a failure. If it times out too many times, we'll discard it.
                        action.FailureCount++;
                        Debug.WriteLine("An action with id {0} that has failed {1} times (excl.) has timed out.", action.Id, originalFailureCount);

                        if (canRepeatedlyFail && action.FailureCount < MaximumActionFailures)
                        {
                            Debug.WriteLine("The action with id {0} that has failed {1} times (excl.) is requeued to be reexamined later", action.Id, originalFailureCount);
                            _actionQueue.Enqueue(action);
                        }
                    }
                    else if (canRepeatedlyFail && !action.HasSucceeded && !hasRepeatedlyFailed)
                    {
                        Debug.WriteLine("The action with id {0} that has failed {1} times (excl.) is requeued to be reexamined later", action.Id, originalFailureCount);
                        _actionQueue.Enqueue(action);
                    }
                    else if (!action.HasSucceeded || hasRepeatedlyFailed)
                    {
                        Debug.WriteLine("An action with id {0} has failed to execute too many times. Skipping execution and removing it from the queue.", action.Id);
                    }

                    if ( !action.HasSucceeded )
                    {
                        Debug.WriteLine( "Storing reject action with id {0} forever. Use in debugger to see if it completes sometime.",
                                         action.Id );
                        _rejects.Add( action );
                    }
                    else
                    {
                        _rejects.Remove( action );
                    }
                }
            }
        }
        catch (Exception e)
        {
            Debug.WriteLine("Failed to pump action with id {0}.{1}", action == null ? -1 : action.Id, e);
            /* Cleanup code */
        }
        finally
        {
            IsRunning = false;
        }
    }
}

在这段代码中，我试图介绍几个死锁场景：首先，我虽然ConfigureAwait会有所帮助，因为如果代码“同时”处于同步处理Wait调用的同一个线程上，我们就无法处理catch块或将HasSucceeded bool设置为true的代码。 Wait调用将阻塞该线程，而被阻塞的线程想要处理HasSucceeded = true语句。

然后，我认为使用Task.Run或TaskScheduler.StartNew启动泵将在ThreadPool线程中启动泵，或者至少是可用于Task系统的线程。即使我使用ConfigureAwait，await之后的代码也需要执行并需要一个线程。因为ConfigureAwait指示不必返回到同一个线程，这并不意味着它不会意外地选择它已经运行的确切ThreadPool线程继续。通过启动一个新的手动管理线程，我希望能够对付这个模糊的案例。

作为绝望的尝试将代码从SynchronizationContext中删除，允许它在特定线程上继续，我尝试将其设置为null。虽然在调试时我发现运行时上下文已经为空。

运行时会发生什么，我收到一条日志消息，说正在执行Execute。然后，在10秒后，我收到一条日志消息，说明操作失败了。这仅偶尔发生，即每50或100个泵入口一次。我认为ExecuteInternal方法的其余部分在前面的Wait调用中是死锁。

但为什么它会陷入僵局呢？它无法访问我的SynchronizationContext，无论如何都是null。发生了什么事？

PS：我发现Wait调用是同步的，本质上很容易出现死锁，但我无法控制该操作返回的Task。如果SignalR任务甚至完成，它有时会等待很长时间。在这种情况下，我希望选项超时并通过再次调用操作重试该调用。

编辑：起初我虽然样本是死锁的，因为我有一些拒绝任务（当输入15或更多输入时）有Task.Status“WaitingForActivation”。事实证明，这只是说“我准备好继续等待执行和/或将值返回到等待我的代码”。在我将Pump.IsDone添加到我的代码后，我看到这些任务最终被泵的重试代码清理。

我认为原始代码是死锁的原因是因为SignalR消息正在进入UI，但发送它们的任务有时会一直等待（看似）。这就是我首先在重试代码中构建的原因，但即使消息通过，SignalR Tasks仍然没有完成。我想我的问题更多的是SignalR，而不是任务死锁。

我有一个关于任务的最后一个小问题：当一个任务永远运行时会发生什么？有没有代码观察这个并通过超时关闭这些任务，或者它们只是在应用程序结束之前占用ThreadPool线程？如果不是_rejects列表，我只是“泄露”了x重试后仍然超时的任务，它们最终会被清除还是会导致内存泄漏？当我无法控制任务创建（阻止我在创建任务期间使用内置功能添加超时或取消令牌）时，如何才能最好地处理此问题？

在非ThreadPool线程

0 个答案: