
时间:2011-05-23 12:15:29

标签: .net linq group-by memory-management linq-to-objects

对许多项(千兆字节)运行LINQ to Objects GroupBy()方法可能会占用大量内存。如果密钥已经按IEnumerable<T>排序,我们可以写一个没有消耗太多内存的GroupBy


4 个答案:

答案 0 :(得分:3)


static IEnumerable<IList<TElement>> GroupByChanges<TElement, TKey>
    (this IEnumerable<TElement> source,
     Func<TElement, TKey> projection)
    // TODO: Argument validation, splitting this into two methods
    // to achieve eager validation.
    // TODO: Allow a custom comparer to be used, possibly even
    // an IComparer<T> instead of an IEqualityComparer<T>
    IEqualityComparer<TKey> comparer = EqualityComparer<TKey>.Default;

    using (IEnumerator<TElement> iterator = source.GetEnumerator())
        if (!iterator.MoveNext())
            yield break;
        TKey currentKey = projection(iterator.Current);
        IList<TElement> currentList = new List<TElement> { iterator.Current };
        while (iterator.MoveNext())
            TKey key = projection(iterator.Current);
            if (!comparer.Equals(currentKey, key))
                yield return currentList;
                currentList = new List<TElement>();
        yield return currentList;

如果您需要完整的IGrouping<,>实施,那将会稍微困难一点 - 但您可以随时抓住我的Edulinq implementation

GroupByChanges的实施变化很小 - 只需更改currentList分配即可将密钥传递给Grouping构造函数:

Grouping<TKey, TElement> currentGroup = new Grouping<TKey, TElement>(currentKey)
    { iterator.Current };

答案 1 :(得分:1)


答案 2 :(得分:1)


public static class Extensions

    public static IEnumerable<IGrouping<TKey, TSource>> GroupByAlreadyOrdered<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector)
        return source.GroupByAlreadyOrdered(keySelector, null);

    public static IEnumerable<IGrouping<TKey, TSource>> GroupByAlreadyOrdered<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey> comparer)
        TKey currentKey = default(TKey);
        bool first = true;
        List<TSource> currentGroup = null;
        comparer = comparer ?? EqualityComparer<TKey>.Default;

        foreach (var item in source)
            TKey key = keySelector(item);
            if (first || !comparer.Equals(key, currentKey))
                if (currentGroup != null && currentGroup.Any())
                    yield return new Grouping<TKey, TSource>(currentKey, currentGroup);
                currentGroup = new List<TSource>();

            first = false;
            currentKey = key;
        // Last group
        if (currentGroup != null && currentGroup.Any())
            yield return new Grouping<TKey, TSource>(currentKey, currentGroup);

    private class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
        private readonly TKey _key;
        private readonly IEnumerable<TElement> _elements;

        public Grouping(TKey key, IEnumerable<TElement> elements)
            _key = key;
            _elements = elements;

        public TKey Key
            get { return _key; }

        public IEnumerator<TElement> GetEnumerator()
            return _elements.GetEnumerator();

        IEnumerator IEnumerable.GetEnumerator()
            return GetEnumerator();


答案 3 :(得分:0)


public static IEnumerable<IGrouping<TKey, TSource>> FastGroupBy<TSource, TKey>(
    this IEnumerable<TSource> source,
    Func<TSource, TKey> keySelector)
    using (var enumerator = source.GetEnumerator())
        if (enumerator.MoveNext())
            Grouping<TKey, TSource> grouping;
            List<TSource> list = new List<TSource>();
            TKey key = keySelector(enumerator.Current);
            while (enumerator.MoveNext())
                var currentKey = keySelector(enumerator.Current);
                if (key.Equals(currentKey))

                grouping = new Grouping<TKey, TSource>(key, list);
                yield return grouping;

                key = currentKey;
                list = new List<TSource>();

            grouping = new Grouping<TKey, TSource>(key, list);
            yield return grouping;