实现并行哈希表扩展(在OpenMP并行区域内重新调整哈希表的大小)

时间:2015-07-29 18:51:11

标签: c multithreading hash hashtable openmp

我正在尝试在OpenMP并行区域内实现Hash表的扩展。我想要实现的目标:只有1个线程继续扩展(锁定整个表并更新它),而剩余的线程在扩展过程中不会更新表。

在扩展过程正在进行时,其他线程正在更新我的代码死锁或如下所示(在printf语句中)。

有没有更好的方法来解决这个问题,或者我的方法是否明显错误?

以下是一些代码段:

#define WIN_SIZE                        30
#define HASH_TAB_SIZE                   100

omp_lock_t hash_lock;

/* Each key-value pair entry */
typedef struct htable_each_entry
{
        // key comprising of k+1 mer
        char kmer_plus_one[WIN_SIZE_PLUS];

        // value is a linked-list of read_id's
        struct read_info *read_list;

        struct htable_each_entry *hh_next;
        struct htable_each_entry *hh_prev;

} htable_entry;

/* structure for each bucket entry */
typedef struct htable_each_bucket
{
        htable_entry *series;

        // Lock for each bucket entry
        omp_lock_t writelock;

        int count;

} htable_bucket;

/* structure for the Hash table */
typedef struct hash_table_t {
        size_t size;
        htable_bucket *buckets;

} hash_table_t;
    int main (int argc, char *argv[])
    {
         /* Initializing global lock on table */                         
              omp_init_lock(&hash_lock);

            hash_table_t *my_hash_table = create_hash_table(HASH_TAB_SIZE);

    #pragma omp parallel shared(rd_seq, my_hash_table, hash_lock)
            {
                    int i=0;

                    //Populating the hash table
    #pragma omp for schedule(static) private(i) firstprivate(num_lines) nowait
                    for (i=0; i<num_lines; i++)
                    {
                            Sliding_window(my_hash_table,i);
                    }

            }
            double end_t = omp_get_wtime ();
            printf ("\nTime for populating entries to the hash table: %f\n", (end_t - start_t));

            int num_entries = count_hashtab_entries(my_hash_table);
            printf("There are %d entries in the Hash table \n", num_entries);
    }



    void Sliding_window (hash_table_t *my_hash_table, int idx)
    {
            char kmer_plus_1[WIN_SIZE_PLUS];
            int i=0;
            Sequence_r r_seq;
            htable_entry *tag;

            r_seq = rd_seq[idx];

            /* Table is resized when the number of unique entries in the hash table 
               exceeds half the number of total buckets allocated.
               Locking table using "hash_lock". Should be executed by 1 thread, whereas the
               remaining threads should wait and refrain from updating the table, while table
               expansion process is in progress.
            */

            omp_set_lock(&hash_lock);

            if (count_hashtab_entries(my_hash_table) >= (my_hash_table->size/2)) {

                 printf("BEFORE EXPAND: entries in hash table: %d Size: %d thread: %d > \n",count_hashtab_entries(my_hash_table),(int)my_hash_table->size, omp_get_thread_num());

                    Expand_hash_table(my_hash_table);

                 printf("AFTER EXPAND: entries in hash table: %d Size: %d thread: %d \n",count_hashtab_entries(my_hash_table),(int)my_hash_table->size, omp_get_thread_num());

            }

            /* Table expansion done, hence releasing the lock. */
            omp_unset_lock(&hash_lock);

        for(i=0; (r_seq.rlen - i) > WIN_SIZE; i++)
        {
                memcpy(kmer_plus_1,&r_seq.read_data[i],WIN_SIZE+1);
                kmer_plus_1[WIN_SIZE+1] = '\0';
                htable_entry *entry_returned = NULL;

                unsigned bucket_id = generate_hash(my_hash_table, kmer_plus_1);

                // setting a lock on the respective bucket obtained from hashing 
                omp_set_lock(&(my_hash_table->buckets[bucket_id].writelock));

                // Check if key already exists ?
                tag = Find_key_exists(my_hash_table, kmer_plus_1, bucket_id, &entry_returned);

                if (tag != NULL) /* Key already exists */
                {
                        append_read_info(tag, idx, i);
                } else {
                        /* key does not exist, therefore insert in hashtable */
                        tag =  (htable_entry *) malloc(sizeof( htable_entry ));
                        strcpy(tag->kmer_plus_one, kmer_plus_1);
                        tag->read_list = NULL;

                        Insert_key(my_hash_table, tag, bucket_id, entry_returned);
                        append_read_info(tag, idx, i);
                }

                // release lock on the respective bucket. 
                omp_unset_lock(&(my_hash_table->buckets[bucket_id].writelock));
       }
}

输出:

BEFORE EXPAND: entries in hash table: 500094 Size: 1000000 thread: 0 
AFTER EXPAND: entries in hash table: 500115 Size: 2000000 thread: 0 

(注意:为了避免我的帖子变得太长,我没有添加Expand_hash_table程序。如果需要,我可以更新它。)

如上所示,在扩展过程正在进行时,哈希表中的条目数已超过。这意味着,尽管存在锁定,多个线程访问并更新了哈希表的区域。

有没有办法可以在调整哈希表的大小时确保原子性,在&#34; OpenMP并行中进行&#34;区域?

0 个答案:

没有答案
相关问题