Monday 9 November 2009

Extendible Hashing Implementation

  1. /*
  2.  *      ExtendedHashing.cpp
  3.  *      
  4.  *      Copyright 2009 roKB <rooparambhakar@gmail.com>
  5.  *      
  6.  *      This program is free software; you can redistribute it and/or modify
  7.  *      it under the terms of the GNU General Public License as published by
  8.  *      the Free Software Foundation; either version 2 of the License, or
  9.  *      (at your option) any later version.
  10.  *      
  11.  *      This program is distributed in the hope that it will be useful,
  12.  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14.  *      GNU General Public License for more details.
  15.  *      
  16.  *      You should have received a copy of the GNU General Public License
  17.  *      along with this program; if not, write to the Free Software
  18.  *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  19.  *      MA 02110-1301, USA.
  20.  */
  21.  
  22. #include <string>
  23. #include <cassert>
  24. #include <iostream>
  25. #include <cstdio>
  26. #include <cstdlib>
  27.  
  28. int hashCode(std::string const & key)
  29. /*
  30.  * calculates the hash of key using polynomial expansion,
  31.  * with x = 37
  32.  */
  33. {
  34.   const long x = 41L;
  35.   int n = key.length();
  36.   long value = 0L;
  37.   while( n-- > 0 ) {
  38.     value *= x;   value += key[n];
  39.   }
  40.   return static_cast<int>(value);
  41. }
  42.  
  43. // ------------------------------------------------------------------------
  44. // ------------------------------------------------------------------------
  45. struct Student
  46. {
  47.   std::string entryNo;
  48.   std::string name;
  49. };
  50.  
  51. std::ostream& operator << (std::ostream& out, Student& s) {
  52.   out << s.entryNo << " : " << s.name;
  53.   return out;
  54. }
  55. // ------------------------------------------------------------------------
  56. // ------------------------------------------------------------------------
  57.  
  58.  
  59. struct Node
  60. /* sentinal nodes of linked list */
  61. {
  62.   Node *next;
  63. };
  64.  
  65. struct Pair {
  66.   int first;
  67.   Student *second;
  68.  
  69.   ~Pair() {
  70.     if(second != NULL)
  71.       delete second;
  72.   }
  73. };
  74.  
  75. struct InternalNode : public Node
  76. /* internal nodes of linked list */
  77. {
  78.   Pair *element;
  79.  
  80.   ~InternalNode() {
  81.     delete element;
  82.   }
  83. };
  84.  
  85. class LinkedList
  86. /*
  87.  * buckets in hash table
  88.  */
  89. {
  90. public:
  91.   LinkedList() {
  92.     head = new Node();
  93.     head->next = NULL;
  94.   }
  95.   ~LinkedList() {
  96.     del(head);
  97.   }
  98.   void insert(Student & elem, int hashkey) {
  99.     // create hash, data pair
  100.     Pair *data = new Pair();
  101.     data->first = hashkey;
  102.     data->second = &elem;
  103.     // create node in list
  104.     InternalNode *temp = new InternalNode();
  105.     temp->next = head->next;
  106.     temp->element = data;
  107.     head->next = temp;
  108.   }
  109.   Student* search(int hashkey) {
  110.     Node *current = head->next;
  111.     while(current != NULL) {
  112.       if(static_cast<InternalNode*>(current)->element->first == hashkey)
  113.         return static_cast<InternalNode*>(current)->element->second;
  114.       current = current->next;
  115.     }
  116.     return NULL;
  117.   }
  118.   void remove(int hashkey) {
  119.     Node *current = head->next;
  120.     Node *parent = head;
  121.     while(current != NULL) {
  122.       if(static_cast<InternalNode*>(current)->element->first == hashkey) {
  123.         // fix links in list then remove and return
  124.         parent->next = current->next;
  125.         current->next = NULL;
  126.         del(current);
  127.         return;
  128.       }
  129.       parent = current;
  130.       current = current->next;
  131.     }
  132.   }
  133.   void removeAll() {
  134.     del(head->next);
  135.     head->next = NULL;
  136.   }
  137.   void print(){
  138.     InternalNode *current = static_cast<InternalNode*>(head->next);
  139.     int i = 1;
  140.     while(current != NULL) {
  141.       std::cout << i << " : hashkey(0x" << std::hex << static_cast<unsigned int>( current->element->first ) << ") <" << *(current->element->second) << ">" << std::endl;
  142.       current = static_cast<InternalNode*>(current->next);
  143.       ++i;
  144.     }
  145.   }
  146.   Node *getList() {
  147.     return head->next;
  148.   }
  149. private:
  150.   void del(Node *node) {
  151.     if(node == NULL)
  152.       return;
  153.      
  154.     del(node->next);
  155.     delete node;
  156.   }
  157.   Node *head;
  158. };
  159.  
  160. class Bucket
  161. /*
  162.  * uses LinkedList for storage
  163.  * bucket overflow and underflow is left for xtendedHashing functions
  164.  * they should check for underflow and overflow in bucket, otherwise
  165.  * program will terminate
  166.  */
  167. {
  168. public:
  169.   Bucket(int c, int s = 0) : sig_bits(s), cap(c), sz(0), code(0), root(false) {
  170.     data = new LinkedList();
  171.   }
  172.   ~Bucket() {
  173.     delete data;
  174.   }
  175.   int capacity() {
  176.     return cap;
  177.   }
  178.   int size() {
  179.     return sz;
  180.   }
  181.   void insert(Student& elem, int hashkey) {
  182.     data->insert(elem, hashkey);
  183.     ++sz;
  184.     if(root)
  185.       return;
  186.     assert( ( size() <= capacity() ) );
  187.   }
  188.   Student* search(int hashkey) {
  189.     return data->search(hashkey);
  190.   }
  191.   void remove(int hashkey) {
  192.     if(search(hashkey) != NULL)
  193.       --sz;
  194.     data->remove(hashkey);
  195.     assert( (size() >= 0) );
  196.   }
  197.   void emptyBucket() {
  198.     data->removeAll();
  199.     sig_bits = 0;
  200.     sz = 0;
  201.   }
  202.   bool isEmpty() {
  203.     return size() == 0;
  204.   }
  205.   void print() {
  206.     std::cout << "----- <BUCKET (";
  207.     printcode();
  208.     std::cout << ") > -----" << std::endl;
  209.     std::cout << "SIG_BITS : " << sig_bits << std::endl;
  210.     data->print();
  211.     std::cout << std::endl;
  212.   }
  213.   Node* getList() {
  214.     return data->getList();
  215.   }
  216.   int getSIG_BITS() {
  217.     return sig_bits;
  218.   }
  219.   void setSIG_BITS(int s) {
  220.     sig_bits = s;
  221.     assert( (sig_bits >= 0) );
  222.   }
  223.   void setCODE(int c) {
  224.     code = c;
  225.   }
  226.   void setROOT(bool state) {
  227.     root = state;
  228.   }
  229. private:
  230.   void printcode() {
  231.     int length = sig_bits;
  232.     int temp = code;
  233.     while(length-- > 0) {
  234.       std::cout << temp%2;
  235.       temp >>= 1;
  236.     }
  237.   }
  238.   LinkedList *data;
  239.   int sig_bits;
  240.   int cap;
  241.   int sz;
  242.   int code;
  243.   bool root;
  244. };
  245.  
  246. struct TrieNode
  247. {
  248.   Bucket *page;
  249.   TrieNode *next[2];
  250.   TrieNode *parent;
  251.  
  252.   ~TrieNode() {
  253.     if(page != NULL)
  254.       delete page;
  255.   }
  256. };
  257.  
  258. class HashTable
  259. /*
  260.  * at last we are at the implementation of Extendible-hashTable ^v^
  261.  * uses tries index for bucket lookup
  262.  * "now is the time to show your coding skills rrc"
  263.  */
  264. {
  265. public:
  266.   HashTable(int buk = DFL_BUCKET_SIZE) : bucket_size(buk) {
  267.     sig_bits = 0;
  268.     //root = false;
  269.     index = new TrieNode();
  270.     index->page = new Bucket(bucket_size, sig_bits);
  271.     index->next[0] = NULL;
  272.     index->next[1] = NULL;
  273.     index->parent = NULL;
  274.     no_of_buckets = 1;
  275.   }
  276.   ~HashTable() {
  277.     del(index);
  278.   }
  279.   Student* search(std::string number) {
  280.     int code;
  281.     int hashkey = hashCode(number);
  282.     TrieNode *node = findNode(hashkey, &code);
  283.     return node->page->search(hashkey);
  284.   }
  285.   void insert(Student stu) {
  286.     Student & st = *new Student();
  287.     st.name = stu.name;
  288.     st.entryNo = stu.entryNo;
  289.     int hashkey = hashCode(st.entryNo);
  290.     int code;
  291.     TrieNode *node = findNode(hashkey, &code);
  292.     Bucket *page = node->page;
  293.     //if(root || page->size()+1 <= page->capacity())
  294.     if(page->size()+1 <= page->capacity())
  295.     {
  296.       // insert in bucket
  297.       // page->setROOT(root);
  298.       page->insert(st, hashkey);
  299.       // page->setROOT(false);
  300.       return;
  301.     } else {    // bucket is full, make a new bucket
  302.       // step 7
  303.       Bucket *newPage = new Bucket(bucket_size);
  304.       no_of_buckets++;
  305.       // step 8
  306.       LinkedList *Q = new LinkedList();
  307.       InternalNode *current = static_cast<InternalNode*>( page->getList() );
  308.       while(current != NULL) {
  309.         Q->insert(*(current->element->second), current->element->first);
  310.         current = static_cast<InternalNode*>(current->next);
  311.       }
  312.       Q->insert(st, hashkey);
  313.       // step 9
  314.       int oldD = page->getSIG_BITS();
  315.       int MASK = 1 << oldD;
  316.       newPage->setSIG_BITS(oldD + 1);
  317.       // step 10
  318.       page->emptyBucket();
  319.       page->setSIG_BITS(oldD + 1);
  320.       if(oldD+1 > sig_bits){
  321.         ++sig_bits;
  322.       }
  323.         node->page = NULL;
  324.         TrieNode *left = new TrieNode();
  325.         TrieNode *right = new TrieNode();
  326.         node->next[0] = left;
  327.         node->next[1] = right;
  328.         left->page = page;
  329.         right->page = newPage;
  330.         left->next[0] = left->next[1] = NULL; left->parent = node;
  331.         right->next[0] = right->next[1] = NULL; right->parent = node;
  332.         left->page->setCODE(code);
  333.         right->page->setCODE(code | MASK);
  334.  
  335.       current = static_cast<InternalNode*>( Q->getList() );
  336.       // root = true;
  337.       while(current != NULL) {
  338.         insert( *(current->element->second) );
  339.         current = static_cast<InternalNode*>( current->next );
  340.       }
  341.       // root = false;
  342.     }
  343.   }
  344.   void remove(std::string number){
  345.     int hashkey = hashCode(number);
  346.     int code;
  347.     TrieNode *node = findNode(hashkey, &code);
  348.     Bucket *page = node->page;
  349.     if(page->search(hashkey) == NULL)
  350.       return;
  351.     page->remove(hashkey);
  352.     if(page->size() == 0 && node->parent != NULL)
  353.     {
  354.       TrieNode *parent = node->parent;
  355.       TrieNode *sibling = (parent->next[0] == node)?
  356.             (parent->next[1]) : (parent->next[0]);
  357.       if(sibling->next[0] != NULL || sibling->next[1] != NULL)
  358.         return;
  359.      
  360.       no_of_buckets--;
  361.       parent->page = sibling->page;
  362.       sibling->page = NULL;
  363.       parent->next[0] = NULL;
  364.       parent->next[1] = NULL;
  365.       del(node);
  366.       del(sibling);
  367.       parent->page->setSIG_BITS(parent->page->getSIG_BITS() - 1);
  368.       int MASK = 1 << parent->page->getSIG_BITS();
  369.       MASK -= 1;
  370.       code &= MASK;
  371.       parent->page->setCODE(code);
  372.       sig_bits = max_bits(index);
  373.     }
  374.   }
  375.   void print() {
  376.     std::cout << "-----------------------------------------" << std::endl;
  377.     std::cout << "bucket.capacity : " << bucket_size << "\t|||\t";
  378.     std::cout << "number ( buckets ) : " << no_of_buckets << "\t|||\t";
  379.     std::cout << "max sig_bits : " << sig_bits << std::endl;
  380.     int total = printNode(index);
  381.     std::cout << "-----------------------------------------" << std::endl;
  382.     std::cout << "elements in hashtable : " << total << std::endl;
  383.     std::cout << "-----------------------------------------" << std::endl;
  384.   }
  385. private:
  386.   TrieNode* findNode(int hashkey, int *const code)
  387.   // using lower sig_bits of hashCode to calculate key
  388.   {
  389.     int d = 1;  // mask for calculating key from k
  390.     d <<= sig_bits;
  391.     d -= 1;   // all ones
  392.     int key = hashkey & d;
  393.     *code = 0;
  394.     int length = 0;
  395.     TrieNode *current = index;
  396.     while(current != NULL) {
  397.       if(current->page != NULL)
  398.         return current;
  399.       if(current->next[key & 1] != NULL){
  400.         current = current->next[key & 1];
  401.         int MASK = key&1; // current decision
  402.         if(length != 0)
  403.           MASK <<= length;
  404.         *code |= MASK;
  405.         key >>= 1;
  406.         ++length;
  407.       }
  408.     }
  409.     assert( 0 );  // should not come here.
  410.     return NULL;
  411.   }
  412.   void del(TrieNode *node) {
  413.     if(node == NULL)
  414.       return;
  415.     TrieNode *left, *right;
  416.     left = node->next[0];
  417.     right = node->next[1];
  418.     del(left);
  419.     del(right);
  420.     delete node;
  421.   }
  422.   int printNode(TrieNode *current) {
  423.     if(current == NULL)
  424.       return 0;
  425.     if(current->page != NULL) {
  426.       current->page->print();
  427.       return current->page->size();
  428.     }
  429.     int number = printNode(current->next[0]);
  430.     number += printNode(current->next[1]);
  431.     return number;
  432.   }
  433.   int max_bits(TrieNode *node) {
  434.     if(node->page != NULL)
  435.       return node->page->getSIG_BITS();
  436.     int left = max_bits(node->next[0]);
  437.     int right = max_bits(node->next[1]);
  438.     int max = (left > right) ? left : right;
  439.     return max;
  440.   }
  441.   const static int DFL_BUCKET_SIZE = 4;   // default bucket size
  442.   TrieNode *index;
  443.   int sig_bits;
  444.   int bucket_size;
  445.   int no_of_buckets;
  446.   //bool root;
  447. };
  448.  
  449. int printMenu() {
  450.   std::system("clear");
  451.   std::cout << "-----------------------------------------" << std::endl;
  452.   std::cout << "Extendible Hashing Implementation." << std::endl;
  453.   std::cout << "-----------------------------------------" << std::endl;
  454.   std::cout << "\t\t 1. Insert a Record" << std::endl;
  455.   std::cout << "\t\t 2. Search a Record" << std::endl;
  456.   std::cout << "\t\t 3. Delete a Record" << std::endl;
  457.   std::cout << "\t\t 4. Show All Records"<< std::endl;
  458.   std::cout << "\t\t 5. Delete All Records" << std::endl;
  459.   std::cout << "\t\t 6. Exit" << std::endl;
  460.   int choice;
  461.   std::cout << "\nChoice : ";
  462.   std::cin >> choice;
  463.   std::cout.flush();
  464.   return choice;
  465. }
  466.  
  467. void credits() {
  468.   std::system("clear");
  469.   std::cout << "\n\n\n";
  470.   std::cout << "\t\t\t  ----------------------------------------------" << std::endl;
  471.   std::cout << "\t\t\t /\\--------------------------------------------/\\" << std::endl;
  472.   std::cout << "\t\t\t|| Thanks for evaluating our Extendible Hashing ||" << std::endl;
  473.   std::cout << "\t\t\t|| \t\tImplementation.\t\t\t||" << std::endl;
  474.   std::cout << "\t\t\t \\/--------------------------------------------\\/" << std::endl;
  475.   std::cout << "\t\t\t  ----------------------------------------------" << std::endl;
  476.   std::cout << "\n\n\t\tImplementors:-\n";
  477.   std::cout << "\t\t\t2006ECS20\tRooparam Choudhary\t(Main Coder)" << std::endl;
  478.   std::cout << "\t\t\t2006ECS57\tKumar Ranjeet\t\t(Creative Head)" << std::endl;
  479.   std::cout << "\n\nenter to quit...";
  480.   std::getchar();
  481. }
  482.  
  483. int main()
  484. {
  485.   std::cout << "Enter bucket-size : ";
  486.   int n;
  487.   char ch;
  488.   std::cin >> n;
  489.   HashTable *table = new HashTable(n);
  490.   while(true) {
  491.     int choice = printMenu();
  492.     ch = std::getchar();  // eat newline
  493.     switch(choice) {
  494.       case 1:   // Insert a record
  495.       {
  496.         Student st;
  497.         std::cout << "\nEnter Student Name : ";
  498.         std::getline(std::cin, st.name);
  499.         std::cout << "Enter Entry Number : ";
  500.         std::getline(std::cin, st.entryNo);
  501.         table->insert(st);
  502.         break;
  503.       }
  504.       case 2:   // search a Record
  505.       {
  506.         std::string entNum;
  507.         std::cout << "\nEnter Entry Number: ";
  508.         std::getline(std::cin, entNum);
  509.         Student *st = table->search(entNum);
  510.         std::cout << *st;
  511.         std::cout << std::endl << "enter to continue ...";
  512.         ch = std::getchar();
  513.         break;
  514.       }
  515.       case 3:   // delete a Record
  516.       {
  517.         std::string entNum;
  518.         std::cout << "\nEnter Entry Number: ";
  519.         std::getline(std::cin, entNum);
  520.         table->remove(entNum);
  521.         break;
  522.       }
  523.       case 4:   // show All Records
  524.         std::system("clear");
  525.         table->print();
  526.         std::cout << std::endl << "enter to continue ...";
  527.         ch = std::getchar();
  528.         break;
  529.       case 5:   // clear All Data
  530.         delete table;
  531.         table = new HashTable(n);
  532.         break;
  533.       case 6:   // Exit
  534.         goto DONE;
  535.     }
  536.   }
  537.   DONE:
  538.   delete table;
  539.   credits();
  540.   return 0;
  541. }
  542.  

8 comments:

Anonymous said...

man, you saved my life with this ! thanks a lot ! be blessed.

rooparam said...

thank u anonymous reader. i hope u do visit my blog regulary

regards,
rooparam

Anonymous said...

thank you soooo much

AyaM Sakata said...

I was like looking for this the whole day.. thanks

Anonymous said...

can´t thank you enough.. was terribly stuck
you saved my semester

Richa Gupta said...

thank you so much for the post. can you please suggest me some good source to study and implement database implementation algorithms.

rooparam said...

hi richa, you can use course [1] to implement database alogrithms. In this course, they are using attica (basic database implemented in java) database to understand and improve database algorithms.

For completion, I will mention a similar course [2] for Operating Systems algorithms and data structures. Stanford offers CS140 course for operating systems in which they use a basis operating system (pintos) written in C language to implement memory management, task scheduler etc.

links:
[1] http://www.inf.ed.ac.uk/teaching/courses/adbs/
[2] https://web.stanford.edu/~ouster/cgi-bin/cs140-winter16/index.php

Pavan said...

Great work Buddy!!
Thanks :)