2012年5月29日 星期二

vector與map...怎麼選擇(後續)

日前找到hash_map相關說明,據說比map效率更好, 以下就承前一次vector與map, 這次再加入hash_map來比較

[1]vector , map及hash_map 比較
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#include <ext/hash_map>  /*for hash_map*/
//---------------------------------------------------------------------------
#pragma argsused

using namespace std;
using namespace __gnu_cxx; /*for hash_map*/


int main(int argc, char* argv[])
{

        //vector
        vector<int>  v;
        vector<int> ::iterator it;

        v.push_back( 10 );

        cout << "vector container " << endl << "================" << endl << endl;
        time_t t1; time(&t1);
        for(int i=0;i<2000000;i++)
                v.push_back(i);
        //            v.insert( v.begin( ) , i );
        time_t t2; time(&t2);
        cout<<" vcetor insert data , the time it takes is " << t2-t1 << " sec." << endl;

        sort(v.begin(),v.end());

        int a;
        time_t t5; time(&t5);
        for(int i=0;i<2000000;i++)
        {
            srand(time(NULL));
            a=(rand() % 2000000) +1;
            bool b1 = binary_search(v.begin(),v.end(),a);
        }
        time_t t6; time(&t6);
        cout<<" vcetor search data , the time it takes is " << t6-t5 << " sec." << endl;


        time_t t3; time(&t3);
        v.erase (v.begin(),v.begin()+2000000);
        time_t t4; time(&t4);
        cout <<" vcetor erase data , the time it takes is " << t4-t3 << " sec." << endl ;
      //map
      map< int,int > mymap;
      map< int,int >::iterator map_it;
      pair< map< int,int >::iterator,bool > ret;


      mymap.insert ( pair< int,int >(0,100) );

      cout << endl << "map container " << endl << "================"  << endl;
      time_t map_t1; time(&map_t1);
      for(int i=1;i<2000000;i++)
          mymap.insert ( pair< int,int >(i,2000000+i) );
      time_t map_t2; time(&map_t2);
      cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;


      int c;
      time_t map_t5; time(&map_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          c=(rand() % 2000000) +1;
          map_it=mymap.find(c);
      }
      time_t map_t6; time(&map_t6);
      cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;



      time_t map_t3; time(&map_t3);
      mymap.erase(mymap.begin(),mymap.end());
      time_t map_t4; time(&map_t4);
      cout  <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;

      //hash_map default
      //hash_map< int,int > myhash1(997);
      //hash_map< int,int > myhash1;
      hash_map< int,int > myhash1(1543);
      hash_map< int,int >::iterator hash1_it;
      pair< hash_map< int,int >::iterator,bool > hash1_ret;


      myhash1.insert ( pair< int,int >(0,100) );

      cout << endl << "map container " << endl << "================"  << endl;
      cout <<"bucket count:" << myhash1.bucket_count()<<endl;
      time_t hash1_t1; time(&hash1_t1);
      for(int i=1;i<2000000;i++)
          myhash1.insert ( pair< int,int >(i,2000000+i) );
      time_t hash1_t2; time(&hash1_t2);
      cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;


      int d;
      time_t hash1_t5; time(&hash1_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          d=(rand() % 2000000) +1;
          hash1_it=myhash1.find(d);
      }
      time_t hash1_t6; time(&hash1_t6);
      cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;



      time_t hash1_t3; time(&hash1_t3);
      myhash1.erase(myhash1.begin(),myhash1.end());
      time_t hash1_t4; time(&hash1_t4);
      cout  <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;

      system("pause");
      return 0;
}


得到的結果:

[root@localhost ~]# ./comp

vector container
================

 vcetor insert data , the time it takes is 0 sec.
 vcetor search data , the time it takes is 8 sec.
 vcetor erase data , the time it takes is 0 sec.


map container
================

 map insert data , the time it takes is 3 sec.
 map search data , the time it takes is 8 sec.
 map erase data , the time it takes is 0 sec.


map container
================
bucket count:1543

 hash_map default insert data , the time it takes is 1 sec.
 hash_map default search data , the time it takes is 8 sec.
 hash_map default erase data , the time it takes is 0 sec.


[2]hash_map 調校與比較
針對hash_map進行string的處理與map比較, 看看hash方式不一樣是否有差別


#if defined(__BORLANDC__)
    #include < vcl.h >
#endif
#pragma hdrstop
#include <iostream>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#if defined(_MSC_VER)
    # include <hash_map>
    using stdext::hash_map;
#elif defined(__GNUC__)
    #include <ext/hash_map>  /*for hash_map*/
    using namespace __gnu_cxx; /*for hash_map*/
#elif defined(__BORLANDC__)
    #include <stlport\hash_map>  /*for hash_map*/
#else
    # error wrong compiler
#endif
//---------------------------------------------------------------------------
#pragma argsused

using namespace std;
struct str_hash{      //自写hash函数
    size_t operator()(const string& str) const
    {
        unsigned long __h = 0;
        for (size_t i = 0 ; i < str.size() ; i ++)
        {
            __h = 107*__h + str[i];
        }
        return size_t(__h);
    }
};
struct str_hash_default{    //自带的string hash函数
    size_t operator()(const string& str) const
    {
        return __stl_hash_string(str.c_str());
    }
};
struct str_equal{      //string 判断相等函数
    bool operator()(const string& s1,const string& s2) const
    {
        return s1==s2;
    }
};

struct char_equal{      //char* 判断相等函数
    bool operator()(const char* s1,const char* s2) const
    {
        return strcmp(s1,s2)==0;
    }
};

int main(int argc, char* argv[])
{
      char buf[100];
      //map
      map< string,int > mymap;
      map< string,int >::iterator map_it;
      pair< map< string,int >::iterator,bool > ret;


      mymap.insert ( pair< string,int >("0",100) );

      cout << endl << "map container " << endl << "================"  << endl;
      time_t map_t1; time(&map_t1);
      for(int i=1;i<2000000;i++)
      {
      sprintf(buf,"%d",i);
          mymap.insert ( pair< string,int >(buf,2000000+i) );
      }
      time_t map_t2; time(&map_t2);
      cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;


      int c;
      time_t map_t5; time(&map_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          c=(rand() % 2000000) +1;
      sprintf(buf,"%d",c);
          map_it=mymap.find(buf);
      }
      time_t map_t6; time(&map_t6);
      cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;



      time_t map_t3; time(&map_t3);
      mymap.erase(mymap.begin(),mymap.end());
      time_t map_t4; time(&map_t4);
      cout  <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;

      //hash_map default
      //hash_map< int,int > myhash1(997);
      //hash_map< int,int > myhash1;
      //hash_map< string,int,str_hash,str_equal > myhash1(1543);
// Episode 1: hash_map<,,str_hash,str_equal> vs map<>
//    hash_map< string,int,str_hash,str_equal > myhash1;
//    hash_map< string,int,str_hash,str_equal > myhash1(12289);
//    hash_map< string,int,str_hash,str_equal >::iterator hash1_it;
//    pair< hash_map< string,int,str_hash,str_equal >::iterator,bool > hash1_ret;

// Episode 2: hash_map<,,str_hash_default,str_equal> vs map<>
//      hash_map< string,int,str_hash_default,str_equal > myhash1;
//      hash_map< string,int,str_hash_default,str_equal > myhash1(1543);
    hash_map< string,int,str_hash_default,str_equal > myhash1(12289);
    hash_map< string,int,str_hash_default,str_equal >::iterator hash1_it;
    pair< hash_map< string,int,str_hash_default,str_equal >::iterator,bool > hash1_ret;

// Episode Trail: hash_map<,,hash<const char*>,char_equal> vs map<>
//      hash_map< const char*,int,hash<const char*>,char_equal > myhash1;
//      hash_map< const char*,int,hash<const char*>,char_equal >::iterator hash1_it;
//      pair< hash_map< const char*,int,hash<const char*>,char_equal >::iterator,bool > hash1_ret;


//      myhash1.insert ( pair< const char*,int >("0",100) ); /*Episode Trail*/
      myhash1.insert ( pair< string,int >("0",100) );

      cout << endl << "map container " << endl << "================"  << endl;
      cout <<"bucket count:" << myhash1.bucket_count()<<endl;
      time_t hash1_t1; time(&hash1_t1);
      for(int i=1;i<2000000;i++)
      {
      sprintf(buf,"%d",i);
//          myhash1.insert ( pair< const char*,int >(buf,2000000+i) ); /* Episode Trail */
          myhash1.insert ( pair< string,int >(buf,2000000+i) );
      }
      time_t hash1_t2; time(&hash1_t2);
      cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;


      int d;
      time_t hash1_t5; time(&hash1_t5);
      for(int i=0;i<2000000;i++)
      {
          srand(time(NULL));
          d=(rand() % 2000000) +1;
      sprintf(buf,"%d",d);
          hash1_it=myhash1.find(buf);
      }
      time_t hash1_t6; time(&hash1_t6);
      cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;



      time_t hash1_t3; time(&hash1_t3);
      myhash1.erase(myhash1.begin(),myhash1.end());
      time_t hash1_t4; time(&hash1_t4);
      cout  <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;

      system("pause");
      return 0;
}


結果1(map與Episode1的比較,自訂義hash方式)

[root@localhost ~]# ./comp1

map container
================

 map insert data , the time it takes is 7 sec.
 map search data , the time it takes is 11 sec.
 map erase data , the time it takes is 0 sec.


map container
================
bucket count:12289

 hash_map default insert data , the time it takes is 2 sec.
 hash_map default search data , the time it takes is 8 sec.
 hash_map default erase data , the time it takes is 1 sec.



結果2(map與Episode2的比較,使用string的hash方式)
[root@localhost ~]# ./comp1

map container
================

 map insert data , the time it takes is 7 sec.
 map search data , the time it takes is 10 sec.
 map erase data , the time it takes is 1 sec.


map container
================
bucket count:12289

 hash_map default insert data , the time it takes is 3 sec.
 hash_map default search data , the time it takes is 9 sec.
 hash_map default erase data , the time it takes is 1 sec.

從結果1及2,發現hash_map在資料插入的效率比map好很多, 而search的能力也有些微的強化。 而使用自訂定的hash方式, 又比string預設的hash效果又好一些

hash的bucket_count設定多少(在contructor參數帶入),要實測過才知道, 不是愈大愈好,不僅愈大記憶體吃愈凶, 過大甚至發現不見得對效能有正面的幫助(設定時以質數為要)
得到的結果:

相關連結:

http://blog.watashi.ws/618/gcc-visual-studio-hash-map/

http://www.cnblogs.com/kex1n/archive/2010/01/13/1646911.html

http://www.360doc.com/content/08/0801/13/26860_1497144.shtml

質數表:http://wikiyou.tw/%E8%B3%AA%E6%95%B8%E8%A1%A8/
hash function: http://lumiera.org/documentation/technical/howto/HashFunctions.html
                      http://stackoverflow.com/questions/1059545/how-to-use-stdexthash-map-where-the-key-is-a-custom-object
                      http://www.sparknotes.com/cs/searching/hashtables/section2.rhtml
                     http://stackoverflow.com/questions/5303205/hash-map-questions-tutorial
                   struct hash http://stackoverflow.com/questions/13389631/whats-a-good-hash-function-for-struct-with-3-unsigned-chars-and-an-int-for-uno
                          http://stackoverflow.com/questions/8319886/hashing-function-for-nested-struct-in-struct-when-using-an-unordered-set
                          http://en.cppreference.com/w/cpp/utility/hash
                          http://blog.163.com/xychenbaihu@yeah/blog/static/132229655201310201072713/

                   bucket_size : http://www.itpub.net/thread-1294185-1-1.html
                          http://forums.codeguru.com/showthread.php?371872-hash_map-amp-bucket_size
                          http://blog.sina.com.cn/s/blog_648d306d0100mx4l.html
                   less<> : http://hi.baidu.com/mxp446533129/item/337f6cf182fc2a1ece9f328b
                          http://blog.csdn.net/kunlong0909/article/details/8990069
                   testOK: http://blog.csdn.net/fullsail/article/details/5045355

沒有留言:

張貼留言

文章分類