Monday, November 9, 2009

Extendible Hashing Implementation

  1. /*
  2.  *      ExtendedHashing.cpp
  3.  *      
  4.  *      Copyright 2009 roKB <rooparambhakar@gmail.com>
  5.  *      
  6.  *      This program is free software; you can redistribute it and/or modify
  7.  *      it under the terms of the GNU General Public License as published by
  8.  *      the Free Software Foundation; either version 2 of the License, or
  9.  *      (at your option) any later version.
  10.  *      
  11.  *      This program is distributed in the hope that it will be useful,
  12.  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14.  *      GNU General Public License for more details.
  15.  *      
  16.  *      You should have received a copy of the GNU General Public License
  17.  *      along with this program; if not, write to the Free Software
  18.  *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19.  *      MA 02110-1301, USA.
  20.  */
  21.  
  22. #include <string>
  23. #include <cassert>
  24. #include <iostream>
  25. #include <cstdio>
  26. #include <cstdlib>
  27.  
  28. int hashCode(std::string const & key)
  29. /*
  30.  * calculates the hash of key using polynomial expansion,
  31.  * with x = 37
  32.  */
  33. {
  34.   const long x = 41L;
  35.   int n = key.length();
  36.   long value = 0L;
  37.   while( n-- > 0 ) {
  38.     value *= x;   value += key[n];
  39.   }
  40.   return static_cast<int>(value);
  41. }
  42.  
  43. // ------------------------------------------------------------------------
  44. // ------------------------------------------------------------------------
  45. struct Student
  46. {
  47.   std::string entryNo;
  48.   std::string name;
  49. };
  50.  
  51. std::ostream& operator << (std::ostream& out, Student& s) {
  52.   out << s.entryNo << " : " << s.name;
  53.   return out;
  54. }
  55. // ------------------------------------------------------------------------
  56. // ------------------------------------------------------------------------
  57.  
  58.  
  59. struct Node
  60. /* sentinal nodes of linked list */
  61. {
  62.   Node *next;
  63. };
  64.  
  65. struct Pair {
  66.   int first;
  67.   Student *second;
  68.  
  69.   ~Pair() {
  70.     if(second != NULL)
  71.       delete second;
  72.   }
  73. };
  74.  
  75. struct InternalNode : public Node
  76. /* internal nodes of linked list */
  77. {
  78.   Pair *element;
  79.  
  80.   ~InternalNode() {
  81.     delete element;
  82.   }
  83. };
  84.  
  85. class LinkedList
  86. /*
  87.  * buckets in hash table
  88.  */
  89. {
  90. public:
  91.   LinkedList() {
  92.     head = new Node();
  93.     head->next = NULL;
  94.   }
  95.   ~LinkedList() {
  96.     del(head);
  97.   }
  98.   void insert(Student & elem, int hashkey) {
  99.     // create hash, data pair
  100.     Pair *data = new Pair();
  101.     data->first = hashkey;
  102.     data->second = &elem;
  103.     // create node in list
  104.     InternalNode *temp = new InternalNode();
  105.     temp->next = head->next;
  106.     temp->element = data;
  107.     head->next = temp;
  108.   }
  109.   Student* search(int hashkey) {
  110.     Node *current = head->next;
  111.     while(current != NULL) {
  112.       if(static_cast<InternalNode*>(current)->element->first == hashkey)
  113.         return static_cast<InternalNode*>(current)->element->second;
  114.       current = current->next;
  115.     }
  116.     return NULL;
  117.   }
  118.   void remove(int hashkey) {
  119.     Node *current = head->next;
  120.     Node *parent = head;
  121.     while(current != NULL) {
  122.       if(static_cast<InternalNode*>(current)->element->first == hashkey) {
  123.         // fix links in list then remove and return
  124.         parent->next = current->next;
  125.         current->next = NULL;
  126.         del(current);
  127.         return;
  128.       }
  129.       parent = current;
  130.       current = current->next;
  131.     }
  132.   }
  133.   void removeAll() {
  134.     del(head->next);
  135.     head->next = NULL;
  136.   }
  137.   void print(){
  138.     InternalNode *current = static_cast<InternalNode*>(head->next);
  139.     int i = 1;
  140.     while(current != NULL) {
  141.       std::cout << i << " : hashkey(0x" << std::hex << static_cast<unsigned int>( current->element->first ) << ") <" << *(current->element->second) << ">" << std::endl;
  142.       current = static_cast<InternalNode*>(current->next);
  143.       ++i;
  144.     }
  145.   }
  146.   Node *getList() {
  147.     return head->next;
  148.   }
  149. private:
  150.   void del(Node *node) {
  151.     if(node == NULL)
  152.       return;
  153.      
  154.     del(node->next);
  155.     delete node;
  156.   }
  157.   Node *head;
  158. };
  159.  
  160. class Bucket
  161. /*
  162.  * uses LinkedList for storage
  163.  * bucket overflow and underflow is left for xtendedHashing functions
  164.  * they should check for underflow and overflow in bucket, otherwise
  165.  * program will terminate
  166.  */
  167. {
  168. public:
  169.   Bucket(int c, int s = 0) : sig_bits(s), cap(c), sz(0), code(0), root(false) {
  170.     data = new LinkedList();
  171.   }
  172.   ~Bucket() {
  173.     delete data;
  174.   }
  175.   int capacity() {
  176.     return cap;
  177.   }
  178.   int size() {
  179.     return sz;
  180.   }
  181.   void insert(Student& elem, int hashkey) {
  182.     data->insert(elem, hashkey);
  183.     ++sz;
  184.     if(root)
  185.       return;
  186.     assert( ( size() <= capacity() ) );
  187.   }
  188.   Student* search(int hashkey) {
  189.     return data->search(hashkey);
  190.   }
  191.   void remove(int hashkey) {
  192.     if(search(hashkey) != NULL)
  193.       --sz;
  194.     data->remove(hashkey);
  195.     assert( (size() >= 0) );
  196.   }
  197.   void emptyBucket() {
  198.     data->removeAll();
  199.     sig_bits = 0;
  200.     sz = 0;
  201.   }
  202.   bool isEmpty() {
  203.     return size() == 0;
  204.   }
  205.   void print() {
  206.     std::cout << "----- <BUCKET (";
  207.     printcode();
  208.     std::cout << ") > -----" << std::endl;
  209.     std::cout << "SIG_BITS : " << sig_bits << std::endl;
  210.     data->print();
  211.     std::cout << std::endl;
  212.   }
  213.   Node* getList() {
  214.     return data->getList();
  215.   }
  216.   int getSIG_BITS() {
  217.     return sig_bits;
  218.   }
  219.   void setSIG_BITS(int s) {
  220.     sig_bits = s;
  221.     assert( (sig_bits >= 0) );
  222.   }
  223.   void setCODE(int c) {
  224.     code = c;
  225.   }
  226.   void setROOT(bool state) {
  227.     root = state;
  228.   }
  229. private:
  230.   void printcode() {
  231.     int length = sig_bits;
  232.     int temp = code;
  233.     while(length-- > 0) {
  234.       std::cout << temp%2;
  235.       temp >>= 1;
  236.     }
  237.   }
  238.   LinkedList *data;
  239.   int sig_bits;
  240.   int cap;
  241.   int sz;
  242.   int code;
  243.   bool root;
  244. };
  245.  
  246. struct TrieNode
  247. {
  248.   Bucket *page;
  249.   TrieNode *next[2];
  250.   TrieNode *parent;
  251.  
  252.   ~TrieNode() {
  253.     if(page != NULL)
  254.       delete page;
  255.   }
  256. };
  257.  
  258. class HashTable
  259. /*
  260.  * at last we are at the implementation of Extendible-hashTable ^v^
  261.  * uses tries index for bucket lookup
  262.  * "now is the time to show your coding skills rrc"
  263.  */
  264. {
  265. public:
  266.   HashTable(int buk = DFL_BUCKET_SIZE) : bucket_size(buk) {
  267.     sig_bits = 0;
  268.     //root = false;
  269.     index = new TrieNode();
  270.     index->page = new Bucket(bucket_size, sig_bits);
  271.     index->next[0] = NULL;
  272.     index->next[1] = NULL;
  273.     index->parent = NULL;
  274.     no_of_buckets = 1;
  275.   }
  276.   ~HashTable() {
  277.     del(index);
  278.   }
  279.   Student* search(std::string number) {
  280.     int code;
  281.     int hashkey = hashCode(number);
  282.     TrieNode *node = findNode(hashkey, &code);
  283.     return node->page->search(hashkey);
  284.   }
  285.   void insert(Student stu) {
  286.     Student & st = *new Student();
  287.     st.name = stu.name;
  288.     st.entryNo = stu.entryNo;
  289.     int hashkey = hashCode(st.entryNo);
  290.     int code;
  291.     TrieNode *node = findNode(hashkey, &code);
  292.     Bucket *page = node->page;
  293.     //if(root || page->size()+1 <= page->capacity())
  294.     if(page->size()+1 <= page->capacity())
  295.     {
  296.       // insert in bucket
  297.       // page->setROOT(root);
  298.       page->insert(st, hashkey);
  299.       // page->setROOT(false);
  300.       return;
  301.     } else {    // bucket is full, make a new bucket
  302.       // step 7
  303.       Bucket *newPage = new Bucket(bucket_size);
  304.       no_of_buckets++;
  305.       // step 8
  306.       LinkedList *Q = new LinkedList();
  307.       InternalNode *current = static_cast<InternalNode*>( page->getList() );
  308.       while(current != NULL) {
  309.         Q->insert(*(current->element->second), current->element->first);
  310.         current = static_cast<InternalNode*>(current->next);
  311.       }
  312.       Q->insert(st, hashkey);
  313.       // step 9
  314.       int oldD = page->getSIG_BITS();
  315.       int MASK = 1 << oldD;
  316.       newPage->setSIG_BITS(oldD + 1);
  317.       // step 10
  318.       page->emptyBucket();
  319.       page->setSIG_BITS(oldD + 1);
  320.       if(oldD+1 > sig_bits){
  321.         ++sig_bits;
  322.       }
  323.         node->page = NULL;
  324.         TrieNode *left = new TrieNode();
  325.         TrieNode *right = new TrieNode();
  326.         node->next[0] = left;
  327.         node->next[1] = right;
  328.         left->page = page;
  329.         right->page = newPage;
  330.         left->next[0] = left->next[1] = NULL; left->parent = node;
  331.         right->next[0] = right->next[1] = NULL; right->parent = node;
  332.         left->page->setCODE(code);
  333.         right->page->setCODE(code | MASK);
  334.  
  335.       current = static_cast<InternalNode*>( Q->getList() );
  336.       // root = true;
  337.       while(current != NULL) {
  338.         insert( *(current->element->second) );
  339.         current = static_cast<InternalNode*>( current->next );
  340.       }
  341.       // root = false;
  342.     }
  343.   }
  344.   void remove(std::string number){
  345.     int hashkey = hashCode(number);
  346.     int code;
  347.     TrieNode *node = findNode(hashkey, &code);
  348.     Bucket *page = node->page;
  349.     if(page->search(hashkey) == NULL)
  350.       return;
  351.     page->remove(hashkey);
  352.     if(page->size() == 0 && node->parent != NULL)
  353.     {
  354.       TrieNode *parent = node->parent;
  355.       TrieNode *sibling = (parent->next[0] == node)?
  356.             (parent->next[1]) : (parent->next[0]);
  357.       if(sibling->next[0] != NULL || sibling->next[1] != NULL)
  358.         return;
  359.      
  360.       no_of_buckets--;
  361.       parent->page = sibling->page;
  362.       sibling->page = NULL;
  363.       parent->next[0] = NULL;
  364.       parent->next[1] = NULL;
  365.       del(node);
  366.       del(sibling);
  367.       parent->page->setSIG_BITS(parent->page->getSIG_BITS() - 1);
  368.       int MASK = 1 << parent->page->getSIG_BITS();
  369.       MASK -= 1;
  370.       code &= MASK;
  371.       parent->page->setCODE(code);
  372.       sig_bits = max_bits(index);
  373.     }
  374.   }
  375.   void print() {
  376.     std::cout << "-----------------------------------------" << std::endl;
  377.     std::cout << "bucket.capacity : " << bucket_size << "\t|||\t";
  378.     std::cout << "number ( buckets ) : " << no_of_buckets << "\t|||\t";
  379.     std::cout << "max sig_bits : " << sig_bits << std::endl;
  380.     int total = printNode(index);
  381.     std::cout << "-----------------------------------------" << std::endl;
  382.     std::cout << "elements in hashtable : " << total << std::endl;
  383.     std::cout << "-----------------------------------------" << std::endl;
  384.   }
  385. private:
  386.   TrieNode* findNode(int hashkey, int *const code)
  387.   // using lower sig_bits of hashCode to calculate key
  388.   {
  389.     int d = 1;  // mask for calculating key from k
  390.     d <<= sig_bits;
  391.     d -= 1;   // all ones
  392.     int key = hashkey & d;
  393.     *code = 0;
  394.     int length = 0;
  395.     TrieNode *current = index;
  396.     while(current != NULL) {
  397.       if(current->page != NULL)
  398.         return current;
  399.       if(current->next[key & 1] != NULL){
  400.         current = current->next[key & 1];
  401.         int MASK = key&1; // current decision
  402.         if(length != 0)
  403.           MASK <<= length;
  404.         *code |= MASK;
  405.         key >>= 1;
  406.         ++length;
  407.       }
  408.     }
  409.     assert( 0 );  // should not come here.
  410.     return NULL;
  411.   }
  412.   void del(TrieNode *node) {
  413.     if(node == NULL)
  414.       return;
  415.     TrieNode *left, *right;
  416.     left = node->next[0];
  417.     right = node->next[1];
  418.     del(left);
  419.     del(right);
  420.     delete node;
  421.   }
  422.   int printNode(TrieNode *current) {
  423.     if(current == NULL)
  424.       return 0;
  425.     if(current->page != NULL) {
  426.       current->page->print();
  427.       return current->page->size();
  428.     }
  429.     int number = printNode(current->next[0]);
  430.     number += printNode(current->next[1]);
  431.     return number;
  432.   }
  433.   int max_bits(TrieNode *node) {
  434.     if(node->page != NULL)
  435.       return node->page->getSIG_BITS();
  436.     int left = max_bits(node->next[0]);
  437.     int right = max_bits(node->next[1]);
  438.     int max = (left > right) ? left : right;
  439.     return max;
  440.   }
  441.   const static int DFL_BUCKET_SIZE = 4;   // default bucket size
  442.   TrieNode *index;
  443.   int sig_bits;
  444.   int bucket_size;
  445.   int no_of_buckets;
  446.   //bool root;
  447. };
  448.  
  449. int printMenu() {
  450.   std::system("clear");
  451.   std::cout << "-----------------------------------------" << std::endl;
  452.   std::cout << "Extendible Hashing Implementation." << std::endl;
  453.   std::cout << "-----------------------------------------" << std::endl;
  454.   std::cout << "\t\t 1. Insert a Record" << std::endl;
  455.   std::cout << "\t\t 2. Search a Record" << std::endl;
  456.   std::cout << "\t\t 3. Delete a Record" << std::endl;
  457.   std::cout << "\t\t 4. Show All Records"<< std::endl;
  458.   std::cout << "\t\t 5. Delete All Records" << std::endl;
  459.   std::cout << "\t\t 6. Exit" << std::endl;
  460.   int choice;
  461.   std::cout << "\nChoice : ";
  462.   std::cin >> choice;
  463.   std::cout.flush();
  464.   return choice;
  465. }
  466.  
  467. void credits() {
  468.   std::system("clear");
  469.   std::cout << "\n\n\n";
  470.   std::cout << "\t\t\t  ----------------------------------------------" << std::endl;
  471.   std::cout << "\t\t\t /\\--------------------------------------------/\\" << std::endl;
  472.   std::cout << "\t\t\t|| Thanks for evaluating our Extendible Hashing ||" << std::endl;
  473.   std::cout << "\t\t\t|| \t\tImplementation.\t\t\t||" << std::endl;
  474.   std::cout << "\t\t\t \\/--------------------------------------------\\/" << std::endl;
  475.   std::cout << "\t\t\t  ----------------------------------------------" << std::endl;
  476.   std::cout << "\n\n\t\tImplementors:-\n";
  477.   std::cout << "\t\t\t2006ECS20\tRooparam Choudhary\t(Main Coder)" << std::endl;
  478.   std::cout << "\t\t\t2006ECS57\tKumar Ranjeet\t\t(Creative Head)" << std::endl;
  479.   std::cout << "\n\nenter to quit...";
  480.   std::getchar();
  481. }
  482.  
  483. int main()
  484. {
  485.   std::cout << "Enter bucket-size : ";
  486.   int n;
  487.   char ch;
  488.   std::cin >> n;
  489.   HashTable *table = new HashTable(n);
  490.   while(true) {
  491.     int choice = printMenu();
  492.     ch = std::getchar();  // eat newline
  493.     switch(choice) {
  494.       case 1:   // Insert a record
  495.       {
  496.         Student st;
  497.         std::cout << "\nEnter Student Name : ";
  498.         std::getline(std::cin, st.name);
  499.         std::cout << "Enter Entry Number : ";
  500.         std::getline(std::cin, st.entryNo);
  501.         table->insert(st);
  502.         break;
  503.       }
  504.       case 2:   // search a Record
  505.       {
  506.         std::string entNum;
  507.         std::cout << "\nEnter Entry Number: ";
  508.         std::getline(std::cin, entNum);
  509.         Student *st = table->search(entNum);
  510.         std::cout << *st;
  511.         std::cout << std::endl << "enter to continue ...";
  512.         ch = std::getchar();
  513.         break;
  514.       }
  515.       case 3:   // delete a Record
  516.       {
  517.         std::string entNum;
  518.         std::cout << "\nEnter Entry Number: ";
  519.         std::getline(std::cin, entNum);
  520.         table->remove(entNum);
  521.         break;
  522.       }
  523.       case 4:   // show All Records
  524.         std::system("clear");
  525.         table->print();
  526.         std::cout << std::endl << "enter to continue ...";
  527.         ch = std::getchar();
  528.         break;
  529.       case 5:   // clear All Data
  530.         delete table;
  531.         table = new HashTable(n);
  532.         break;
  533.       case 6:   // Exit
  534.         goto DONE;
  535.     }
  536.   }
  537.   DONE:
  538.   delete table;
  539.   credits();
  540.   return 0;
  541. }
  542.