
时间:2014-10-13 18:50:47

标签: algorithm random

假设我有一个项目列表,其中每个项目可以重复一次或多次(1到n)。我试图找到一种算法,从列表中提取随机项,直到它为空,但约束条件是项目不能连续重复超过固定次数(每个项目可能不同)项目)。我希望算法具有正确的概率" (我试着稍后解释一下)。


Item | Count | Max. consecutive
  A  |   2   |       1
  B  |   4   |       2


B A B A B B  

但以下情况不正确,因为" B"连续3次重复,最大值为2:


我设法创建了一个有效的算法,但它的概率存在问题。我将把代码放在第一位,然后再谈谈这个问题。它是一个C#类;抱歉,如果你不喜欢这种格式或GOTO,那就让我们成为朋友; P



方法GetIndexMinMaxGroups()在类内部用于检索项目的连续元素的最小和最大数量组(例如,在B B B A B A序列中,我们有2个组用于&# 34; A"和2" B")。实际上,在这种方法中计算的数字并不完全正确,因为有一些变量没有考虑到,但它返回的值对两件事很有用:从CheckIndexMaxConsecutiveCorrectness()进行检查(如果最小组数大于最大值,则最大重复次数不正确),并且知道何时必须从GetNextRandomIndex()返回确定的项目(当最小值等于最大值)。我还没有使用任何数学算法来推导出那些属性,所以有些东西可能是错的,尽管到目前为止它已经神奇地"在我的测试中完美地工作了数百万次......

class ShuffleBag

    // Class for the data:
    class IndexData
        public int Count = 0;                       // The current number of instances of the index in the bag.
        public int MaxConsecutive = int.MaxValue;   // The maximum consecutive repetitions allowed for the index.

    // List of indexes data for this bag:
    IndexData[] IndexesDataList;


    // Random number generator:
    Random RandGenerator;

    // Remaining elements in the bag:
    int _RemainingElementsCount = 0;

    public int RemainingElementsCount
        get { return _RemainingElementsCount; }

    // Last retrieved index (-1 if no index has been retrieved yet),
    // and the last consecutive repetitions of that index:
    int LastIndex = -1;
    int LastRepetitions = 0;

/// Constructor. ///

    public ShuffleBag(int uniqueIndexesCount)
        IndexesDataList = new IndexData[uniqueIndexesCount];
        for (int i = 0;  i < uniqueIndexesCount;  i++)
            IndexesDataList[i] = new IndexData();

        RandGenerator = new Random();

/// Resets the shuffle bag; must be called before reusing it. ///
/// The number of unique indexes won't be reset.              ///
/// Doesn't need to be called just after creating the bag.    ///

    public void Reset()
        for (int i = 0;  i < IndexesDataList.Length;  i++)
            IndexesDataList[i].Count = 0;
            IndexesDataList[i].MaxConsecutive = int.MaxValue;

        _RemainingElementsCount = 0;

        LastIndex = -1;
        LastRepetitions = 0;

/// Checks if it's possible to honor the max repetitions of an index with the provided data.         ///
/// If it was not possible, the behaviour of a shuffle bag with those parameters would be undefined. ///

    public static bool CheckIndexMaxConsecutiveCorrectness(int maxConsecutive, int indexElements, int bagTotalElements)
        int min, max;
        GetIndexMinMaxGroups(indexElements, maxConsecutive, bagTotalElements, out min, out max);
        return min <= max;

/// Sets the data for the specified index.                              ///
/// Can be called after starting to use the bag,                        ///
/// but if any parameters make the max consecutive repetitions invalid, ///
/// the behaviour of the bag will be undefined.                         ///

    public void SetIndexData(int index, int count, int maxConsecutive)
        IndexData data = IndexesDataList[index];

        _RemainingElementsCount += count - data.Count;

        data.Count          = count;
        data.MaxConsecutive = maxConsecutive;

/// Retrieves the next random index. The caller must check if there are remaining elements in the bag to be retrieved. ///

    public int GetNextRandomIndex()
    /*** GET THE INDEX ***/

        int index;

        // If, for any index, the minimum possible groups equals the maximum, it must be the returned index:
        for (index = 0;  index < IndexesDataList.Length;  index++)
            IndexData data = IndexesDataList[index];

            int minGroups, maxGroups;
            GetIndexMinMaxGroups(data.Count, data.MaxConsecutive, _RemainingElementsCount, out minGroups, out maxGroups);

            if (minGroups == maxGroups)
                goto _INDEX_FOUND_;

        // Get a random number to choose the index:
        int rand = RandGenerator.Next(_RemainingElementsCount);

        for (index = 0;  index < IndexesDataList.Length;  index++)
            IndexData data = IndexesDataList[index];

            // This index corresponds with the random number:
            if (rand < data.Count)
                // Check if the index has reached the maximum consecutive repetitions;
                // in that case, get the next available one:
                if (index == LastIndex  &&  data.MaxConsecutive == LastRepetitions)
                    for (int k = 1;  k <= IndexesDataList.Length - 1;  k++)
                        int m = WrapIndexSimple(index + k, IndexesDataList.Length);
                        if (IndexesDataList[m].Count > 0)
                            index = m;
                            goto _INDEX_FOUND_;

                goto _INDEX_FOUND_;

            // This index doesn't correspond with the random number; update it to check the next index:
                rand -= data.Count;


        IndexData resultData = IndexesDataList[index];


        if (LastIndex == index)
            LastIndex = index;
            LastRepetitions = 1;

        return index;

/// Calculates the minimum and maximum possible groups of consecutive repetitions ///
/// for an index with the specified data.                                         ///
/// If any provided data is invalid, the behaviour and results are undefined.     ///

    static void GetIndexMinMaxGroups(int indexRemainingElements, int indexMaxConsecutive, int bagRemainingElements, out int min, out int max)
        int rem;
        int div = Math.DivRem(indexRemainingElements, indexMaxConsecutive, out rem);

        min = rem == 0  ?  div  :  div + 1;
        max = bagRemainingElements - indexRemainingElements + 1;

/// Converts an index out of bounds to a valid value.     ///
/// "length" is the number of elements of the collection. ///
/// Only works for indexes that are less than 2*length.   ///

    static int WrapIndexSimple(int index, int length)
        if (index >= length)
            return index - length;

        else if (index < 0)
            return length + index;

            return index;



Item | Count | Max. consecutive
  a  |   30  |       3
  X  |   10  |       2

如果我没错,将有13种不同的可能序列。该方法应返回&#34; a&#34;每次返回12次&#34; X&#34;在前3个电话中;对于第四个,它应该返回&#34; a&#34;每3次返回10次&#34; X&#34 ;;这些是不同的序列:

Seq. 1 | Seq. 2 | Seq. 3 | Seq. 4 | Seq. 5 | Seq. 6 | Seq. 7 | Seq. 8 | Seq. 9 | Seq. 10 | Seq. 11 | Seq. 12 | Seq. 13 | Number of “a” | Number of “X”
   a   |    a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X    |    X    |    X    |    X    |        3      |      10
   a   |    a   |    a   |    X   |    X   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       11      |       2
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    X   |    X   |    X   |    X   |    X    |    X    |    X    |    X    |        4      |       9
   a   |    a   |    a   |    X   |    X   |    X   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       10      |       3
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    X   |    X   |    X   |    X    |    X    |    X    |    X    |        5      |       8
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    a   |    a   |    a    |    a    |    a    |    a    |        9      |       4
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    X   |    X   |    X    |    X    |    X    |    X    |        6      |       7
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    a   |    a    |    a    |    a    |    a    |        8      |       5
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    X   |    X    |    X    |    X    |    X    |        7      |       6
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X   |    a    |    a    |    a    |    a    |        7      |       6
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    X    |    X    |    X    |    X    |        8      |       5
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X   |    X    |    a    |    a    |    a    |        6      |       7
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    X    |    X    |    X    |        9      |       4
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X   |    X    |    X    |    a    |    a    |        5      |       8
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    X    |    X    |       10      |       3
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X   |    X    |    X    |    X    |    a    |        4      |       9
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    X    |       11      |       2
   a   |    a   |    a   |    X   |    X   |    X   |    X   |    X   |    X   |    X    |    X    |    X    |    X    |        3      |      10
   a   |    a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   a   |    X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1
   X   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a   |    a    |    a    |    a    |    a    |       12      |       1

这个小型控制台应用程序显示了该类的使用结果。对于指定的迭代次数,它会填充包并提取所有随机项。最后,它显示了每个项目的最大连续重复次数,以及每个项目的累计出现次数以及每次调用GetNextRandomIndex()的所有迭代次数。正如您所看到的,在经过100万次迭代后,第一次调用的累积元素大约为750,000,而#34; a&#34; &#34; X&#34;和最后一次通话的250,000约为999,950&#34; a&#34;和#34; X&#34;:

static void Main(string[] args)
    int  MaxIterations = 1000000;
    bool ShowResults = true;

    string[] items    = new string[] { "a", "X" };
    int[]    count    = new int[]    { 30,  10  };
    int[]    maxRepet = new int[]    {  3,   2  };

    int elementCount = count.Sum();

    bool maxRepetOK = true;
    for (int i = 0;  i < items.Length;  i++)
        maxRepetOK = maxRepetOK  &&  ShuffleBag.CheckIndexMaxConsecutiveCorrectness(maxRepet[i], count[i], elementCount);
    if (! maxRepetOK)
        Console.WriteLine("*** Bad number of repetitions!! ***\n");
        goto _END_;

    Dictionary<string, Tuple<int,int>> results = new Dictionary<string,Tuple<int,int>>();
    for (int i = 0;  i < items.Length;  i++)
        results.Add(items[i], new Tuple<int,int>(0, 0));

    int[,] resultsPerCall = new int[elementCount, items.Length];

    ShuffleBag bag = new ShuffleBag(items.Length);

    int iterations = 0;

    for (int x = 0;  x < MaxIterations;  x++)


        for (int i = 0;  i < items.Length;  i++)
            bag.SetIndexData(i, count[i], maxRepet[i]);

        string prevItem = "";
        int prevRepetitions = 0;

        int row = 0;
        while (bag.RemainingElementsCount > 0)
            int newIndex = bag.GetNextRandomIndex();

            if (prevItem == items[newIndex])
                prevItem = items[newIndex];
                prevRepetitions = 1;

            var resultAnt = results[items[newIndex]];
            results[items[newIndex]] = new Tuple<int,int>(resultAnt.Item1 + 1, Math.Max(prevRepetitions, resultAnt.Item2));

            resultsPerCall[row, newIndex]++;

            if (ShowResults)


        if (ShowResults  &&  MaxIterations > 1)
            Console.WriteLine("\nESC:\tEnd\nENTER:\tNext iteration\nTAB:\tAll iterations");

            while (true)
                switch (Console.ReadKey(true).Key)
                    case ConsoleKey.Enter:
                        goto _CONTINUE_;
                    case ConsoleKey.Escape:
                        goto _RESULTS_;
                    case ConsoleKey.Tab:
                        ShowResults = false;
                        goto _CONTINUE_;

            Console.WriteLine("Iterating ...");


    for (int i = 0;  i < items.Length;  i++)
        var data = results[items[i]];
        double average = (double)data.Item1 / iterations;

        Console.WriteLine(items[i] +
                          ": Average = " + average.ToString() + (average != count[i] ? "(!)" : "") +
                          "\t\tMax. repetitions = " + data.Item2.ToString() + (data.Item2 > maxRepet[i] ? "(!)" : ""));

    for (int i = 0;  i < elementCount;  i++)
        Console.Write((i+1).ToString("00") + ")");
        for (int k = 0;  k < items.Length;  k++)
            Console.Write("   \t" + items[k] + ": " + resultsPerCall[i, k].ToString());

    Console.WriteLine("\n\nESC to exit");
    while (Console.ReadKey(true).Key != ConsoleKey.Escape);

问题是,从GetNextRandomIndex()开始,我只使用项目剩余元素的数量作为选择它的概率,因此对于第一次调用,获得&#34; a&#34;获得&#34; X&#34;的概率是3倍。 (因为&#34; a&#34;以及&#34; X&#34;中有30个元素。有没有人知道如何更改我的算法(或使用不同的算法)来获得正确的概率?

2 个答案:

答案 0 :(得分:1)

正如所承诺的,这是马尔可夫链蒙特卡罗采样器。我似乎无法 使用Propp-Wilson技术获得精确版本;这只是一个 会聚到一个偏向和均匀的分布 期望的结果,可能相当缓慢。定义a的得分 排列是无效字母的数量。具体来说,如果A 应该最多连续出现一次,B最多应出现 连续两次,然后得分

 ^^   ^  ^^


从随机排列开始,重复选择两个位置 随机独立替换。对排列进行评分 交换那些位置的字母;这是建议的 排列。计算两个(当前得分 - 的权力) 建议得分)。如果0和1之间的均匀随机浮点数较小 比这个数字,然后保持这个拟议的交换;否则,撤消它。做 只要你能站立,那就采取下一个有效的排列。

答案 1 :(得分:0)


def make_automaton(max_consecutive):
    states = {letter * j for letter, k in max_consecutive.items() for j in range(1, k + 1)}
    automaton = {}
    for state in states:
        transitions = {}
        for letter in max_consecutive.keys():
            new_state = state + letter if letter == state[-1:] else letter
            if new_state in states:
                transitions[letter] = new_state
        automaton[state] = transitions
    return automaton


>>> from pprint import pprint
>>> pprint(make_automaton({'a': 3, 'X': 2}))
{'': {'X': 'X', 'a': 'a'},
 'X': {'X': 'XX', 'a': 'a'},
 'XX': {'a': 'a'},
 'a': {'X': 'X', 'a': 'aa'},
 'aa': {'X': 'X', 'a': 'aaa'},
 'aaa': {'X': 'X'}}




from collections import defaultdict

def make_probabilities(count, automaton):
    probabilities = [{min(automaton): 1.0}]
    total = sum(count.values())
    for i in range(1, total + 1):
        distribution = defaultdict(float)
        for state, p in probabilities[-1].items():
            transitions = automaton[state]
            for letter, k in count.items():
                if letter in transitions:
                    distribution[transitions[letter]] += p * (k / total)
    return probabilities

pprint(make_probabilities({'a': 30, 'X': 10}, make_automaton({'a': 3, 'X': 2})))

from random import random

def weighted_sample(distribution):
    while True:
        sample = random() * sum(distribution.values())
        for letter, k in distribution.items():
            sample -= k
            if sample < 0.0:
                return letter

from collections import Counter

print(Counter(weighted_sample({'a': 30, 'X': 10}) for i in range(10000)))

def unbiased_sample(count, max_consecutive):
    automaton = make_automaton(max_consecutive)
    probabilities = make_probabilities(count, automaton)
    total = sum(count.values())
    while True:
        sample = []
        state = weighted_sample(probabilities[-1])
        for i in range(len(probabilities) - 2, -1, -1):
            conditional_distribution = {}
            for old_state, p in probabilities[i].items():
                transitions = automaton[old_state]
                for letter, k in count.items():
                    if letter in transitions and transitions[letter] == state:
                        conditional_distribution[(old_state, letter)] = p * (k / total)
            state, letter = weighted_sample(conditional_distribution)
        if Counter(sample) == count:
            return ''.join(sample)

for r in range(1000):
    print(unbiased_sample({'A': 10, 'B': 10, 'C': 10, 'D': 10}, {'A': 5, 'B': 4, 'C': 3, 'D': 2}))