C ++:正确使用HASH和碰撞技术

时间:2011-02-21 00:06:33

标签: c++ memory-leaks hashtable stack-overflow hash-collision

哈希表可能存在大小限制吗?

我有点困惑,因为我可以看到为什么创建太小的哈希表会导致我的问题?看来太大的哈希表导致我的探测器抛出一个Sig错误?如果有人有Hash Tables的经验,这是我的代码。我当然感谢您提供的任何建议(请不要再编织编织):

#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>

using namespace std;

struct TABLE{
  int key;
  TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;

int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);

int main(){
  int tbSIZE = 0;
  int randARRAY[MAX_KEYS];
  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

  ///create random array of 5,000 unique int
  ///they will be of values between 1-30000
  randNUMS(randARRAY);
  ///get hash table size from user
  ///table must be larger than 6500 int
  tbSIZE = hashTableSize();
  ///driver function for all three
  ///collision resolution techniques
  threeHashMethods(randARRAY,tbSIZE);

  return 0;
}
int HASH(int key,int listSIZE){
  int address = 0;
  address = key % listSIZE;
  return address;
}
int doubleHASH(int key,int tbSIZE){
  int address = 0;
  address = (key % (tbSIZE - 2)) + 1;
  return address;
}
int hashTableSize(){
  int userCHOOSE = 0;

  cout << "Enter desired hash table size." << endl;
  cout << "NOTE: hash table size must exceed 6500: " << endl;
  cin >> userCHOOSE;
  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }
  return userCHOOSE;
}
int randNUMS(int *randARRAY){
  ///temporary fix for randARRAY array of numbers till hash is running
  int check = 0;
  int index = 0;
  int loop = 0;

  srand (time(NULL));
  for(index = 0; index < MAX_KEYS; index++){
    check = rand() % RANDOM + 1;
    while(randARRAY[loop] != 0){
      if(check == randARRAY[index]){
    check = rand() % RANDOM + 1;
    loop = 0;
      }
      loop++;
    }
    randARRAY[index] = check;
  }

  return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
  int *HT;


  ///this menu will allow user to select collision method
  HT = openAddressing(randARRAY,tbSIZE);
  listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
  int key = 0,
    address = 0,
    prb = 0,
    hashTABLE[tbSIZE * 2],
    *HT = hashTABLE;
  int percent = (5000.00 / tbSIZE) * 100;
  int load = (5000.00 / tbSIZE) * 10;
  int loadFACTOR = (tbSIZE * load)/10;

  if(percent > 0){

  for(int a = 0; a < tbSIZE; a++){

    hashTABLE[a] = 0;
  }

  while(randARRAY[key] != 0){
    ///get a purposed address
    ///and move through indexes
    ///in array of random int till
    ///empty index is found
    if(randARRAY[key] > tbSIZE){
    address = HASH(randARRAY[key],loadFACTOR);
    }
    ///if address is available 
    ///grab the key
    if(hashTABLE[address] == 0){
      hashTABLE[address] = randARRAY[key];
    }
    ///if a collision is the result run
    ///a linear probe until available address is found 
    else{
      address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
      hashTABLE[address] = randARRAY[key];
    }
    if(hashTABLE[address] == randARRAY[key]){
    key++;
    }
  }
  cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
  cout << "Load Factor = " << percent << "%" << endl;
  cout << "Results from searching for 2500 items." << endl;
  }
  else{
    cout << "Load Factor is maxed out." << endl;
  }

  return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
  while(HASH[address] != probeTHIS){
    address = (address + 1);
    probe++;
    if(address >= load){
      address = 0;
    }
  }
  return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
  int key = 0,
    address = 0,
    probe = 0,
    found = 0,
    attempts = 0;

  while(randARRAY[key] != 0){
    address =  HASH(randARRAY[key],tbSIZE);
    while(HT[address] != randARRAY[key] && attempts < tbSIZE){
      address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
      found++;
      attempts++;
    }
    key = key + 2;
    attempts = 0;
  }
  found = probe / found;

  cout << "Linear Probing." << endl;
  cout << probe  << " items examined ";
  cout << "(avg = " << found << " items examined per search.)" << endl;
}

1 个答案:

答案 0 :(得分:3)

关闭一个。这将填充randARRAY的第一个MAX_KEYS + 1个元素。

  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

再次询问,然后使用旧的userCHOOSE值。你想要return hashTableSize();

  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }

然后是真正的问题:openAddressingrandARRAY[key] != 0时进行扫描。你的randARRAY不是0终止(在main中设置tbSIZE会覆盖你早先的一个randARRAY [5000]),所以你将扫描超过5000个键。然后在listSearch中,您将访问大于5000的密钥的randARRAY [key],这意味着您正在读取“垃圾”数据,例如负数。哈希(模数),它仍然是负数。然后访问崩溃的HT [负值]。

编辑:修复:

  int randARRAY[MAX_KEYS+1];

这样可以防止逐个扫描,使0终止扫描工作,因此最大键值为5000。