日前找到hash_map相關說明,據說比map效率更好, 以下就承前一次vector與map, 這次再加入hash_map來比較
[1]vector , map及hash_map 比較
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#include <ext/hash_map> /*for hash_map*/
//---------------------------------------------------------------------------
#pragma argsused
using namespace std;
using namespace __gnu_cxx; /*for hash_map*/
int main(int argc, char* argv[])
{
//vector
vector<int> v;
vector<int> ::iterator it;
v.push_back( 10 );
cout << "vector container " << endl << "================" << endl << endl;
time_t t1; time(&t1);
for(int i=0;i<2000000;i++)
v.push_back(i);
// v.insert( v.begin( ) , i );
time_t t2; time(&t2);
cout<<" vcetor insert data , the time it takes is " << t2-t1 << " sec." << endl;
sort(v.begin(),v.end());
int a;
time_t t5; time(&t5);
for(int i=0;i<2000000;i++)
{
srand(time(NULL));
a=(rand() % 2000000) +1;
bool b1 = binary_search(v.begin(),v.end(),a);
}
time_t t6; time(&t6);
cout<<" vcetor search data , the time it takes is " << t6-t5 << " sec." << endl;
time_t t3; time(&t3);
v.erase (v.begin(),v.begin()+2000000);
time_t t4; time(&t4);
cout <<" vcetor erase data , the time it takes is " << t4-t3 << " sec." << endl ;
//map
map< int,int > mymap;
map< int,int >::iterator map_it;
pair< map< int,int >::iterator,bool > ret;
mymap.insert ( pair< int,int >(0,100) );
cout << endl << "map container " << endl << "================" << endl;
time_t map_t1; time(&map_t1);
for(int i=1;i<2000000;i++)
mymap.insert ( pair< int,int >(i,2000000+i) );
time_t map_t2; time(&map_t2);
cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;
int c;
time_t map_t5; time(&map_t5);
for(int i=0;i<2000000;i++)
{
srand(time(NULL));
c=(rand() % 2000000) +1;
map_it=mymap.find(c);
}
time_t map_t6; time(&map_t6);
cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;
time_t map_t3; time(&map_t3);
mymap.erase(mymap.begin(),mymap.end());
time_t map_t4; time(&map_t4);
cout <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;
//hash_map default
//hash_map< int,int > myhash1(997);
//hash_map< int,int > myhash1;
hash_map< int,int > myhash1(1543);
hash_map< int,int >::iterator hash1_it;
pair< hash_map< int,int >::iterator,bool > hash1_ret;
myhash1.insert ( pair< int,int >(0,100) );
cout << endl << "map container " << endl << "================" << endl;
cout <<"bucket count:" << myhash1.bucket_count()<<endl;
time_t hash1_t1; time(&hash1_t1);
for(int i=1;i<2000000;i++)
myhash1.insert ( pair< int,int >(i,2000000+i) );
time_t hash1_t2; time(&hash1_t2);
cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;
int d;
time_t hash1_t5; time(&hash1_t5);
for(int i=0;i<2000000;i++)
{
srand(time(NULL));
d=(rand() % 2000000) +1;
hash1_it=myhash1.find(d);
}
time_t hash1_t6; time(&hash1_t6);
cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;
time_t hash1_t3; time(&hash1_t3);
myhash1.erase(myhash1.begin(),myhash1.end());
time_t hash1_t4; time(&hash1_t4);
cout <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;
system("pause");
return 0;
}
得到的結果:
[root@localhost ~]# ./comp
vector container
================
vcetor insert data , the time it takes is 0 sec.
vcetor search data , the time it takes is 8 sec.
vcetor erase data , the time it takes is 0 sec.
map container
================
map insert data , the time it takes is 3 sec.
map search data , the time it takes is 8 sec.
map erase data , the time it takes is 0 sec.
map container
================
bucket count:1543
hash_map default insert data , the time it takes is 1 sec.
hash_map default search data , the time it takes is 8 sec.
hash_map default erase data , the time it takes is 0 sec.
[2]hash_map 調校與比較
針對hash_map進行string的處理與map比較, 看看hash方式不一樣是否有差別
#if defined(__BORLANDC__)
#include < vcl.h >
#endif
#pragma hdrstop
#include <iostream>
#include <map>
#include <ctime>
#include <stdio.h>
#include <time.h>
#if defined(_MSC_VER)
# include <hash_map>
using stdext::hash_map;
#elif defined(__GNUC__)
#include <ext/hash_map> /*for hash_map*/
using namespace __gnu_cxx; /*for hash_map*/
#elif defined(__BORLANDC__)
#include <stlport\hash_map> /*for hash_map*/
#else
# error wrong compiler
#endif
//---------------------------------------------------------------------------
#pragma argsused
using namespace std;
struct str_hash{ //自写hash函数
size_t operator()(const string& str) const
{
unsigned long __h = 0;
for (size_t i = 0 ; i < str.size() ; i ++)
{
__h = 107*__h + str[i];
}
return size_t(__h);
}
};
struct str_hash_default{ //自带的string hash函数
size_t operator()(const string& str) const
{
return __stl_hash_string(str.c_str());
}
};
struct str_equal{ //string 判断相等函数
bool operator()(const string& s1,const string& s2) const
{
return s1==s2;
}
};
struct char_equal{ //char* 判断相等函数
bool operator()(const char* s1,const char* s2) const
{
return strcmp(s1,s2)==0;
}
};
int main(int argc, char* argv[])
{
char buf[100];
//map
map< string,int > mymap;
map< string,int >::iterator map_it;
pair< map< string,int >::iterator,bool > ret;
mymap.insert ( pair< string,int >("0",100) );
cout << endl << "map container " << endl << "================" << endl;
time_t map_t1; time(&map_t1);
for(int i=1;i<2000000;i++)
{
sprintf(buf,"%d",i);
mymap.insert ( pair< string,int >(buf,2000000+i) );
}
time_t map_t2; time(&map_t2);
cout << endl <<" map insert data , the time it takes is " << map_t2-map_t1 << " sec." << endl;
int c;
time_t map_t5; time(&map_t5);
for(int i=0;i<2000000;i++)
{
srand(time(NULL));
c=(rand() % 2000000) +1;
sprintf(buf,"%d",c);
map_it=mymap.find(buf);
}
time_t map_t6; time(&map_t6);
cout<<" map search data , the time it takes is " << map_t6-map_t5 << " sec." << endl;
time_t map_t3; time(&map_t3);
mymap.erase(mymap.begin(),mymap.end());
time_t map_t4; time(&map_t4);
cout <<" map erase data , the time it takes is " << map_t4-map_t3 << " sec." << endl << endl;
//hash_map default
//hash_map< int,int > myhash1(997);
//hash_map< int,int > myhash1;
//hash_map< string,int,str_hash,str_equal > myhash1(1543);
// Episode 1: hash_map<,,str_hash,str_equal> vs map<>
// hash_map< string,int,str_hash,str_equal > myhash1;
// hash_map< string,int,str_hash,str_equal > myhash1(12289);
// hash_map< string,int,str_hash,str_equal >::iterator hash1_it;
// pair< hash_map< string,int,str_hash,str_equal >::iterator,bool > hash1_ret;
// Episode 2: hash_map<,,str_hash_default,str_equal> vs map<>
// hash_map< string,int,str_hash_default,str_equal > myhash1;
// hash_map< string,int,str_hash_default,str_equal > myhash1(1543);
hash_map< string,int,str_hash_default,str_equal > myhash1(12289);
hash_map< string,int,str_hash_default,str_equal >::iterator hash1_it;
pair< hash_map< string,int,str_hash_default,str_equal >::iterator,bool > hash1_ret;
// Episode Trail: hash_map<,,hash<const char*>,char_equal> vs map<>
// hash_map< const char*,int,hash<const char*>,char_equal > myhash1;
// hash_map< const char*,int,hash<const char*>,char_equal >::iterator hash1_it;
// pair< hash_map< const char*,int,hash<const char*>,char_equal >::iterator,bool > hash1_ret;
// myhash1.insert ( pair< const char*,int >("0",100) ); /*Episode Trail*/
myhash1.insert ( pair< string,int >("0",100) );
cout << endl << "map container " << endl << "================" << endl;
cout <<"bucket count:" << myhash1.bucket_count()<<endl;
time_t hash1_t1; time(&hash1_t1);
for(int i=1;i<2000000;i++)
{
sprintf(buf,"%d",i);
// myhash1.insert ( pair< const char*,int >(buf,2000000+i) ); /* Episode Trail */
myhash1.insert ( pair< string,int >(buf,2000000+i) );
}
time_t hash1_t2; time(&hash1_t2);
cout << endl <<" hash_map default insert data , the time it takes is " << hash1_t2-hash1_t1 << " sec." << endl;
int d;
time_t hash1_t5; time(&hash1_t5);
for(int i=0;i<2000000;i++)
{
srand(time(NULL));
d=(rand() % 2000000) +1;
sprintf(buf,"%d",d);
hash1_it=myhash1.find(buf);
}
time_t hash1_t6; time(&hash1_t6);
cout<<" hash_map default search data , the time it takes is " << hash1_t6-hash1_t5 << " sec." << endl;
time_t hash1_t3; time(&hash1_t3);
myhash1.erase(myhash1.begin(),myhash1.end());
time_t hash1_t4; time(&hash1_t4);
cout <<" hash_map default erase data , the time it takes is " << hash1_t4-hash1_t3 << " sec." << endl << endl;
system("pause");
return 0;
}
結果1(map與Episode1的比較,自訂義hash方式)
[root@localhost ~]# ./comp1
map container
================
map insert data , the time it takes is 7 sec.
map search data , the time it takes is 11 sec.
map erase data , the time it takes is 0 sec.
map container
================
bucket count:12289
hash_map default insert data , the time it takes is 2 sec.
hash_map default search data , the time it takes is 8 sec.
hash_map default erase data , the time it takes is 1 sec.
結果2(map與Episode2的比較,使用string的hash方式)
[root@localhost ~]# ./comp1
map container
================
map insert data , the time it takes is 7 sec.
map search data , the time it takes is 10 sec.
map erase data , the time it takes is 1 sec.
map container
================
bucket count:12289
hash_map default insert data , the time it takes is 3 sec.
hash_map default search data , the time it takes is 9 sec.
hash_map default erase data , the time it takes is 1 sec.
從結果1及2,發現hash_map在資料插入的效率比map好很多, 而search的能力也有些微的強化。 而使用自訂定的hash方式, 又比string預設的hash效果又好一些
hash的bucket_count設定多少(在contructor參數帶入),要實測過才知道, 不是愈大愈好,不僅愈大記憶體吃愈凶, 過大甚至發現不見得對效能有正面的幫助(設定時以質數為要)
得到的結果:
相關連結:
http://blog.watashi.ws/618/gcc-visual-studio-hash-map/
http://www.cnblogs.com/kex1n/archive/2010/01/13/1646911.html
http://www.360doc.com/content/08/0801/13/26860_1497144.shtml
質數表:http://wikiyou.tw/%E8%B3%AA%E6%95%B8%E8%A1%A8/
hash function: http://lumiera.org/documentation/technical/howto/HashFunctions.html
http://stackoverflow.com/questions/1059545/how-to-use-stdexthash-map-where-the-key-is-a-custom-object
http://www.sparknotes.com/cs/searching/hashtables/section2.rhtml
http://stackoverflow.com/questions/5303205/hash-map-questions-tutorial
struct hash http://stackoverflow.com/questions/13389631/whats-a-good-hash-function-for-struct-with-3-unsigned-chars-and-an-int-for-uno
http://stackoverflow.com/questions/8319886/hashing-function-for-nested-struct-in-struct-when-using-an-unordered-set
http://en.cppreference.com/w/cpp/utility/hash
http://blog.163.com/xychenbaihu@yeah/blog/static/132229655201310201072713/
bucket_size : http://www.itpub.net/thread-1294185-1-1.html
http://forums.codeguru.com/showthread.php?371872-hash_map-amp-bucket_size
http://blog.sina.com.cn/s/blog_648d306d0100mx4l.html
less<> : http://hi.baidu.com/mxp446533129/item/337f6cf182fc2a1ece9f328b
http://blog.csdn.net/kunlong0909/article/details/8990069
testOK: http://blog.csdn.net/fullsail/article/details/5045355
2012年5月29日 星期二
訂閱:
文章 (Atom)
文章分類
- 爬山 (3)
- 參考文章 (3)
- 鳥事 (5)
- 報稅 (1)
- AIX (2)
- ajax (1)
- BCB (3)
- C/C++ (2)
- cloudera (3)
- DISK (1)
- ftp (1)
- Fuse (2)
- gdb (2)
- hadoop (13)
- hdfs (8)
- HPC (2)
- hypertable (12)
- iOS (1)
- iscsi (1)
- JAVA (2)
- KFS (5)
- kickstart (1)
- KVM (2)
- LAMP (2)
- linux (2)
- Lion (1)
- LVM (2)
- mapreduce (3)
- mpi (3)
- mpich2 (4)
- msgpack (2)
- mysql (2)
- nfs (1)
- openmp (2)
- OS (1)
- OSX (2)
- others (5)
- PBS (1)
- performance_tuning (3)
- php (3)
- phplist (3)
- programming (27)
- REST (2)
- RHCA (6)
- rhel (13)
- rhel6 (4)
- scp (1)
- shell_scripts (2)
- snowleopard (2)
- Solaris (6)
- ssh (1)
- syslog (1)
- t-442-1 (4)
- torque (1)
- ubuntu (2)
- VNC (1)
- watercolor (5)
- windows (1)
- yum (1)