Sean的吃喝玩樂+工作: 5月 2012

日前找到hash_map相關說明，據說比map效率更好，以下就承前一次vector與map，這次再加入hash_map來比較

［1］vector , map及hash_map 比較
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#include <ext/hash_map> /*for hash_map*/
//---------------------------------------------------------------------------
#pragma argsused

using namespace std;
using namespace __gnu_cxx; /*for hash_map*/

int main(int argc, char* argv[])
{

        //vector
        vector<int> v;
        vector<int> ::iterator it;

        v.push_back( 10 );

        cout << "vector container " << endl << "================" << endl << endl;
        time_t t1; time(&t1);
        for(int i=0;i<2000000;i++)
                v.push_back(i);
        //            v.insert( v.begin( ) , i );
        time_t t2; time(&t2);
        cout<<" vcetor insert data , the time it takes is " << t2-t1 << " sec." << endl;

        sort(v.begin(),v.end());

        int a;
        time_t t5; time(&t5);
        for(int i=0;i<2000000;i++)
        {
            srand(time(NULL));
            a=(rand() % 2000000) +1;
            bool b1 = binary_search(v.begin(),v.end(),a);
        }
        time_t t6; time(&t6);
        cout<<" vcetor search data , the time it takes is " << t6-t5 << " sec." << endl;

        time_t t3; time(&t3);
        v.erase (v.begin(),v.begin()+2000000);
        time_t t4; time(&t4);
        cout <<" vcetor erase data , the time it takes is " << t4-t3 << " sec." << endl ;
      //map
      map< int,int > mymap;
      map< int,int >::iterator map_it;
      pair< map< int,int >::iterator,bool > ret;

      mymap.insert ( pair< int,int >(0,100) );

      cout << endl << "map container " << endl << "================" << endl;
      time_t map_t1; time(&map_t1);
      for(int i=1;i<2000000;i++)
          mymap.insert ( pair< int,int >(i,2000000+i) );
      time_t map_t2; time(&map_t2);
      cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;

      int c;
      time_t map_t5; time(&map_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          c=(rand() % 2000000) +1;
          map_it=mymap.find(c);
      }
      time_t map_t6; time(&map_t6);
      cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;

      time_t map_t3; time(&map_t3);
      mymap.erase(mymap.begin(),mymap.end());
      time_t map_t4; time(&map_t4);
      cout <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;

      //hash_map default
      //hash_map< int,int > myhash1(997);
      //hash_map< int,int > myhash1;
      hash_map< int,int > myhash1(1543);
      hash_map< int,int >::iterator hash1_it;
      pair< hash_map< int,int >::iterator,bool > hash1_ret;

      myhash1.insert ( pair< int,int >(0,100) );

      cout << endl << "map container " << endl << "================" << endl;
      cout <<"bucket count:" << myhash1.bucket_count()<<endl;
      time_t hash1_t1; time(&hash1_t1);
      for(int i=1;i<2000000;i++)
          myhash1.insert ( pair< int,int >(i,2000000+i) );
      time_t hash1_t2; time(&hash1_t2);
      cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;

      int d;
      time_t hash1_t5; time(&hash1_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          d=(rand() % 2000000) +1;
          hash1_it=myhash1.find(d);
      }
      time_t hash1_t6; time(&hash1_t6);
      cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;

      time_t hash1_t3; time(&hash1_t3);
      myhash1.erase(myhash1.begin(),myhash1.end());
      time_t hash1_t4; time(&hash1_t4);
      cout <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;

      system("pause");
      return 0;
}

得到的結果：

[root@localhost ~]# ./comp

vector container
================

vcetor insert data , the time it takes is 0 sec.
vcetor search data , the time it takes is 8 sec.
vcetor erase data , the time it takes is 0 sec.

map container
================

map insert data , the time it takes is 3 sec.
map search data , the time it takes is 8 sec.
map erase data , the time it takes is 0 sec.

map container
================
bucket count:1543

hash_map default insert data , the time it takes is 1 sec.
hash_map default search data , the time it takes is 8 sec.
hash_map default erase data , the time it takes is 0 sec.

［2］hash_map 調校與比較
針對hash_map進行string的處理與map比較，看看hash方式不一樣是否有差別

#if defined(__BORLANDC__)
   #include < vcl.h >
#endif
#pragma hdrstop
#include <iostream>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#if defined(_MSC_VER)
   # include <hash_map>
   using stdext::hash_map;
#elif defined(__GNUC__)
   #include <ext/hash_map> /*for hash_map*/
   using namespace __gnu_cxx; /*for hash_map*/
#elif defined(__BORLANDC__)
   #include <stlport\hash_map> /*for hash_map*/
#else
   # error wrong compiler
#endif
//---------------------------------------------------------------------------
#pragma argsused

using namespace std;
struct str_hash{      //自写hash函数
   size_t operator()(const string& str) const
   {
       unsigned long __h = 0;
       for (size_t i = 0 ; i < str.size() ; i ++)
       {
           __h = 107*__h + str[i];
       }
       return size_t(__h);
   }
};
struct str_hash_default{    //自带的string hash函数
   size_t operator()(const string& str) const
   {
       return __stl_hash_string(str.c_str());
   }
};
struct str_equal{      //string 判断相等函数
   bool operator()(const string& s1,const string& s2) const
   {
       return s1==s2;
   }
};

struct char_equal{      //char* 判断相等函数
   bool operator()(const char* s1,const char* s2) const
   {
       return strcmp(s1,s2)==0;
   }
};

int main(int argc, char* argv[])
{
      char buf[100];
      //map
      map< string,int > mymap;
      map< string,int >::iterator map_it;
      pair< map< string,int >::iterator,bool > ret;

      mymap.insert ( pair< string,int >("0",100) );

      cout << endl << "map container " << endl << "================" << endl;
      time_t map_t1; time(&map_t1);
      for(int i=1;i<2000000;i++)
      {
      sprintf(buf,"%d",i);
          mymap.insert ( pair< string,int >(buf,2000000+i) );
      }
      time_t map_t2; time(&map_t2);
      cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;

      int c;
      time_t map_t5; time(&map_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          c=(rand() % 2000000) +1;
      sprintf(buf,"%d",c);
          map_it=mymap.find(buf);
      }
      time_t map_t6; time(&map_t6);
      cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;

      time_t map_t3; time(&map_t3);
      mymap.erase(mymap.begin(),mymap.end());
      time_t map_t4; time(&map_t4);
      cout <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;

      //hash_map default
      //hash_map< int,int > myhash1(997);
      //hash_map< int,int > myhash1;
      //hash_map< string,int,str_hash,str_equal > myhash1(1543);
// Episode 1: hash_map<,,str_hash,str_equal> vs map<>
//    hash_map< string,int,str_hash,str_equal > myhash1;
//    hash_map< string,int,str_hash,str_equal > myhash1(12289);
//    hash_map< string,int,str_hash,str_equal >::iterator hash1_it;
//    pair< hash_map< string,int,str_hash,str_equal >::iterator,bool > hash1_ret;

// Episode 2: hash_map<,,str_hash_default,str_equal> vs map<>
//      hash_map< string,int,str_hash_default,str_equal > myhash1;
//      hash_map< string,int,str_hash_default,str_equal > myhash1(1543);
    hash_map< string,int,str_hash_default,str_equal > myhash1(12289);
    hash_map< string,int,str_hash_default,str_equal >::iterator hash1_it;
    pair< hash_map< string,int,str_hash_default,str_equal >::iterator,bool > hash1_ret;

// Episode Trail: hash_map<,,hash<const char*>,char_equal> vs map<>
//      hash_map< const char*,int,hash<const char*>,char_equal > myhash1;
//      hash_map< const char*,int,hash<const char*>,char_equal >::iterator hash1_it;
//      pair< hash_map< const char*,int,hash<const char*>,char_equal >::iterator,bool > hash1_ret;

//      myhash1.insert ( pair< const char*,int >("0",100) ); /*Episode Trail*/
      myhash1.insert ( pair< string,int >("0",100) );

      cout << endl << "map container " << endl << "================" << endl;
      cout <<"bucket count:" << myhash1.bucket_count()<<endl;
      time_t hash1_t1; time(&hash1_t1);
      for(int i=1;i<2000000;i++)
      {
      sprintf(buf,"%d",i);
//          myhash1.insert ( pair< const char*,int >(buf,2000000+i) ); /* Episode Trail */
          myhash1.insert ( pair< string,int >(buf,2000000+i) );
      }
      time_t hash1_t2; time(&hash1_t2);
      cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;

      int d;
      time_t hash1_t5; time(&hash1_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          d=(rand() % 2000000) +1;
      sprintf(buf,"%d",d);
          hash1_it=myhash1.find(buf);
      }
      time_t hash1_t6; time(&hash1_t6);
      cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;

      time_t hash1_t3; time(&hash1_t3);
      myhash1.erase(myhash1.begin(),myhash1.end());
      time_t hash1_t4; time(&hash1_t4);
      cout <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;

      system("pause");
      return 0;
}

結果1(map與Episode1的比較，自訂義hash方式）

[root@localhost ~]# ./comp1

map container
================

map insert data , the time it takes is 7 sec.
map search data , the time it takes is 11 sec.
map erase data , the time it takes is 0 sec.

map container
================
bucket count:12289

hash_map default insert data , the time it takes is 2 sec.
hash_map default search data , the time it takes is 8 sec.
hash_map default erase data , the time it takes is 1 sec.

結果2(map與Episode2的比較，使用string的hash方式）
[root@localhost ~]# ./comp1

map container
================

map insert data , the time it takes is 7 sec.
map search data , the time it takes is 10 sec.
map erase data , the time it takes is 1 sec.

map container
================
bucket count:12289

hash_map default insert data , the time it takes is 3 sec.
hash_map default search data , the time it takes is 9 sec.
hash_map default erase data , the time it takes is 1 sec.

從結果1及2，發現hash_map在資料插入的效率比map好很多，而search的能力也有些微的強化。而使用自訂定的hash方式，又比string預設的hash效果又好一些

hash的bucket_count設定多少（在contructor參數帶入），要實測過才知道，不是愈大愈好，不僅愈大記憶體吃愈凶，過大甚至發現不見得對效能有正面的幫助(設定時以質數為要）
得到的結果：

相關連結：

http://blog.watashi.ws/618/gcc-visual-studio-hash-map/

http://www.cnblogs.com/kex1n/archive/2010/01/13/1646911.html

http://www.360doc.com/content/08/0801/13/26860_1497144.shtml

質數表：http://wikiyou.tw/%E8%B3%AA%E6%95%B8%E8%A1%A8/
hash function: http://lumiera.org/documentation/technical/howto/HashFunctions.html
  http://stackoverflow.com/questions/1059545/how-to-use-stdexthash-map-where-the-key-is-a-custom-object
  http://www.sparknotes.com/cs/searching/hashtables/section2.rhtml
http://stackoverflow.com/questions/5303205/hash-map-questions-tutorial
struct hash http://stackoverflow.com/questions/13389631/whats-a-good-hash-function-for-struct-with-3-unsigned-chars-and-an-int-for-uno
  http://stackoverflow.com/questions/8319886/hashing-function-for-nested-struct-in-struct-when-using-an-unordered-set
http://en.cppreference.com/w/cpp/utility/hash
http://blog.163.com/xychenbaihu@yeah/blog/static/132229655201310201072713/

bucket_size : http://www.itpub.net/thread-1294185-1-1.html
  http://forums.codeguru.com/showthread.php?371872-hash_map-amp-bucket_size
  http://blog.sina.com.cn/s/blog_648d306d0100mx4l.html
less<> : http://hi.baidu.com/mxp446533129/item/337f6cf182fc2a1ece9f328b
  http://blog.csdn.net/kunlong0909/article/details/8990069
testOK: http://blog.csdn.net/fullsail/article/details/5045355