#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <sys/types.h>
#include <stdint.h>
#include <unistd.h>
#include "khash.hh"
#include <ext/hash_set>
#include <tr1/unordered_set>
#ifdef HAVE_GOOGLEHASH
#include <google/sparse_hash_set>
#include <google/dense_hash_set>
#endif
static int data_size = 5000000;
static unsigned *int_data;
static char **str_data;
typedef struct {
size_t rss, vsize;
double utime, stime;
} RunProcDyn;
RunProcDyn g_init_rpd;
int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd);
struct eqint {
inline bool operator()(unsigned s1, unsigned s2) const {
return s1 == s2;
}
};
struct eqstr {
inline bool operator()(const char *s1, const char *s2) const {
return strcmp(s1, s2) == 0;
}
};
void ht_reset_rpd(int is)
{
RunProcDyn rpd;
run_get_dynamic_proc_info(getpid(), &rpd);
printf("--- current rss: %lu (reset: %c)\n", rpd.rss, is? 'Y' : 'N');
if (is) g_init_rpd = rpd;
}
void ht_init_data()
{
int i;
char buf[256];
printf("--- generating data... ");
srand48(11);
int_data = (unsigned*)calloc(data_size, sizeof(unsigned));
str_data = (char**)calloc(data_size, sizeof(char*));
for (i = 0; i < data_size; ++i) {
int_data[i] = (unsigned)(data_size * drand48() / 4) * 271828183u;
sprintf(buf, "%x", int_data[i]);
str_data[i] = strdup(buf);
}
printf("done!\n");
}
void ht_destroy_data()
{
int i;
for (i = 0; i < data_size; ++i) free(str_data[i]);
free(str_data); free(int_data);
}
template<class hash_t, class keytype_t>
void ht_cpp(hash_t *h, keytype_t *data)
{
RunProcDyn rpd_init, rpd;
pid_t pid = getpid();
int i, step = data_size / 10;
size_t max_rss = 0;
run_get_dynamic_proc_info(pid, &rpd_init);
rpd = rpd_init;
for (i = 0; i < data_size; ++i) {
if ((i + 1) % step == 0) {
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
}
if (h->insert(data[i]).second == false) h->erase(data[i]);
}
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %lu, %lu)\n", rpd.utime - rpd_init.utime,
rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, h->size(), h->bucket_count());
}
#ifdef HAVE_GLIB
#include <glib.h>
void ht_glib_int()
{
RunProcDyn rpd_init, rpd;
pid_t pid = getpid();
int i, step = data_size / 10;
size_t max_rss = 0;
unsigned *data = int_data;
GHashTable *h;
int val = 11;
h = g_hash_table_new(g_int_hash, g_int_equal);
run_get_dynamic_proc_info(pid, &rpd_init);
rpd = rpd_init;
for (i = 0; i < data_size; ++i) {
if ((i + 1) % step == 0) {
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
}
if (g_hash_table_lookup(h, &data[i])) g_hash_table_steal(h, &data[i]);
else g_hash_table_insert(h, &data[i], &val);
}
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
printf("(user, sys, peak_rss, size): (%.3lf, %.3lf, %lu, %u)\n", rpd.utime - rpd_init.utime,
rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, g_hash_table_size(h));
g_hash_table_destroy(h);
}
void ht_glib_str()
{
RunProcDyn rpd_init, rpd;
pid_t pid = getpid();
int i, step = data_size / 10;
size_t max_rss = 0;
char **data = str_data;
GHashTable *h;
int val = 11;
h = g_hash_table_new(g_str_hash, g_str_equal);
run_get_dynamic_proc_info(pid, &rpd_init);
rpd = rpd_init;
for (i = 0; i < data_size; ++i) {
if ((i + 1) % step == 0) {
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
}
if (g_hash_table_lookup(h, data[i])) g_hash_table_steal(h, data[i]);
else g_hash_table_insert(h, data[i], &val);
}
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
printf("(user, sys, peak_rss, size): (%.3lf, %.3lf, %lu, %u)\n", rpd.utime - rpd_init.utime,
rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, g_hash_table_size(h));
g_hash_table_destroy(h);
}
#endif
#ifdef HAVE_GHTHASH
#include <ght_hash_table.h>
void ht_ghthash_int()
{
RunProcDyn rpd_init, rpd;
pid_t pid = getpid();
int i, step = data_size / 10;
size_t max_rss = 0;
unsigned *data = int_data;
ght_hash_table_t *h;
h = ght_create(data_size/5);
run_get_dynamic_proc_info(pid, &rpd_init);
rpd = rpd_init;
for (i = 0; i < data_size; ++i) {
if ((i + 1) % step == 0) {
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
}
if (ght_insert(h, 0, 4, &data[i]) == -1) ght_remove(h, 4, &data[i]);
}
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %d, %u)\n", rpd.utime - rpd_init.utime,
rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, ght_size(h), ght_table_size(h));
ght_finalize(h);
}
void ht_ghthash_str()
{
RunProcDyn rpd_init, rpd;
pid_t pid = getpid();
int i, step = data_size / 10;
size_t max_rss = 0;
char **data = str_data;
ght_hash_table_t *h;
h = ght_create(data_size/5);
run_get_dynamic_proc_info(pid, &rpd_init);
rpd = rpd_init;
for (i = 0; i < data_size; ++i) {
int l = strlen(data[i]);
if ((i + 1) % step == 0) {
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
}
if (ght_insert(h, 0, l, data[i]) == -1) ght_remove(h, l, data[i]);
}
run_get_dynamic_proc_info(pid, &rpd);
if (rpd.rss > max_rss) max_rss = rpd.rss;
printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %d, %u)\n", rpd.utime - rpd_init.utime,
rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, ght_size(h), ght_table_size(h));
ght_finalize(h);
}
#endif
static int usage()
{
fprintf(stderr, "Usage: hash_test [th] [-n %d]\n", data_size);
fprintf(stderr, "Options: -n INT size of data [%d]\n", data_size);
fprintf(stderr, " -c HEX bitwise codes for evaluation [%x]\n", 0x6f);
fprintf(stderr, " -S disable string test\n");
fprintf(stderr, " -h help\n");
return 1;
}
int main(int argc, char *argv[])
{
int c, code = 0x6f, is_str = 1;
while ((c = getopt(argc, argv, "c:n:hS")) >= 0) {
switch (c) {
case 'c': sscanf(optarg, "%x", &code); break;
case 'h': return usage();
case 'n': data_size = atoi(optarg); break;
case 'S': is_str = 0; break;
}
}
ht_init_data();
ht_reset_rpd(1);
if (code & 0x1) {
printf("--- khash ---\n");
ht_reset_rpd(1);
khset_t<unsigned> *h_int = new khset_t<unsigned>;
ht_cpp(h_int, int_data);
delete h_int;
if (is_str) {
ht_reset_rpd(0);
khset_t<const char*> *h_str = new khset_t<const char*>;
ht_cpp(h_str, str_data);
delete h_str;
}
}
#ifdef HAVE_GLIB
if (code & 0x2) {
printf("--- GLIB ---\n");
ht_reset_rpd(1);
ht_glib_int();
ht_reset_rpd(0);
if (is_str) ht_glib_str();
}
#endif
if (code &0x4) {
printf("--- TR1 ---\n");
ht_reset_rpd(1);
using namespace __gnu_cxx;
using namespace std::tr1;
unordered_set<unsigned> *h_int = new unordered_set<unsigned>;
ht_cpp(h_int, int_data);
delete h_int;
if (is_str) {
ht_reset_rpd(0);
unordered_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str
= new unordered_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
ht_cpp(h_str, str_data);
delete h_str;
}
}
if (code & 0x8){
printf("--- SGI STL ---\n");
ht_reset_rpd(0);
using namespace __gnu_cxx;
using namespace std;
hash_set<unsigned, hash<unsigned>, eqint> *h_int = new hash_set<unsigned, hash<unsigned>, eqint>;
ht_cpp(h_int, int_data);
delete h_int;
if (is_str) {
ht_reset_rpd(0);
hash_set<const char*, hash<const char*>, eqstr> *h_str = new hash_set<const char*, hash<const char*>, eqstr>;
ht_cpp(h_str, str_data);
delete h_str;
}
}
#ifdef HAVE_GHTHASH
if (code & 0x10) {
printf("--- GHTHASH ---\n");
ht_reset_rpd(1);
ht_ghthash_int();
ht_reset_rpd(0);
if (is_str) ht_ghthash_str();
}
#endif
#ifdef HAVE_GOOGLEHASH
if (code & 0x20) {
printf("--- Google sparse hash ---\n");
using google::sparse_hash_set;
ht_reset_rpd(0);
sparse_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint> *h_int = new sparse_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint>;
h_int->set_deleted_key(0xffffffffu);
ht_cpp(h_int, int_data);
delete h_int;
if (is_str) {
ht_reset_rpd(0);
sparse_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str = new sparse_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
h_str->set_deleted_key("");
ht_cpp(h_str, str_data);
delete h_str;
}
}
if (code & 0x40) {
printf("--- Google dense hash ---\n");
using google::dense_hash_set;
ht_reset_rpd(0);
dense_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint> *h_int = new dense_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint>;
h_int->set_empty_key(0xffffffffu);
h_int->set_deleted_key(0xfffffffeu);
ht_cpp(h_int, int_data);
delete h_int;
if (is_str) {
ht_reset_rpd(0);
dense_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str = new dense_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
h_str->set_empty_key("*");
h_str->set_deleted_key("");
ht_cpp(h_str, str_data);
delete h_str;
}
}
#endif
ht_reset_rpd(0);
ht_destroy_data();
ht_reset_rpd(0);
return 0;
}
#define RUN_ERR_WRONG_OS 1
#define RUN_ERR_MISSING_INFO 2
#define RUN_ERR_PROC_FINISHED 3
#ifdef __linux__
int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd)
{
int c, n_spc;
char str[64];
FILE *fp;
unsigned long tmp, tmp2;
size_t page_size;
page_size = sysconf(_SC_PAGESIZE);
sprintf(str, "/proc/%u/stat", pid);
fp = fopen(str, "r");
if (fp == 0) return RUN_ERR_PROC_FINISHED;
n_spc = 0;
while ((c = fgetc(fp)) != EOF) {
if (c == ' ') ++n_spc;
if (n_spc == 13) break;
}
fscanf(fp, "%lu%lu", &tmp, &tmp2);
rpd->utime = tmp / 100.0;
rpd->stime = tmp2 / 100.0;
++n_spc;
while ((c = fgetc(fp)) != EOF) {
if (c == ' ') ++n_spc;
if (n_spc == 22) break;
}
fscanf(fp, "%lu%lu", &tmp, &tmp2);
fclose(fp);
rpd->vsize = tmp / 1024;
rpd->rss = tmp2 * (page_size / 1024);
rpd->rss *= 1024;
rpd->vsize *= 1024;
return 0;
}
#endif
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <mach/mach_init.h>
#include <mach/mach_host.h>
#include <mach/mach_port.h>
#include <mach/mach_traps.h>
#include <mach/task_info.h>
#include <mach/thread_info.h>
#include <mach/thread_act.h>
#include <mach/vm_region.h>
#include <mach/vm_map.h>
#include <mach/task.h>
#include <mach/shared_memory_server.h>
typedef struct vmtotal vmtotal_t;
int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd)
{
task_t task;
kern_return_t error;
mach_msg_type_number_t count;
thread_array_t thread_table;
thread_basic_info_t thi;
thread_basic_info_data_t thi_data;
unsigned table_size;
struct task_basic_info ti;
error = task_for_pid(mach_task_self(), pid, &task);
if (error != KERN_SUCCESS) {
rpd->rss = rpd->vsize = 0;
rpd->utime = rpd->stime = 0;
return 0;
}
count = TASK_BASIC_INFO_COUNT;
error = task_info(task, TASK_BASIC_INFO, (task_info_t)&ti, &count);
assert(error == KERN_SUCCESS);
{
vm_region_basic_info_data_64_t b_info;
vm_address_t address = GLOBAL_SHARED_TEXT_SEGMENT;
vm_size_t size;
mach_port_t object_name;
count = VM_REGION_BASIC_INFO_COUNT_64;
error = vm_region_64(task, &address, &size, VM_REGION_BASIC_INFO,
(vm_region_info_t)&b_info, &count, &object_name);
if (error == KERN_SUCCESS) {
if (b_info.reserved && size == (SHARED_TEXT_REGION_SIZE) &&
ti.virtual_size > (SHARED_TEXT_REGION_SIZE + SHARED_DATA_REGION_SIZE))
{
ti.virtual_size -= (SHARED_TEXT_REGION_SIZE + SHARED_DATA_REGION_SIZE);
}
}
rpd->rss = ti.resident_size;
rpd->vsize = ti.virtual_size;
}
{
unsigned i;
rpd->utime = ti.user_time.seconds + ti.user_time.microseconds * 1e-6;
rpd->stime = ti.system_time.seconds + ti.system_time.microseconds * 1e-6;
error = task_threads(task, &thread_table, &table_size);
assert(error == KERN_SUCCESS);
thi = &thi_data;
for (i = 0; i != table_size; ++i) {
count = THREAD_BASIC_INFO_COUNT;
error = thread_info(thread_table[i], THREAD_BASIC_INFO, (thread_info_t)thi, &count);
assert(error == KERN_SUCCESS);
if ((thi->flags & TH_FLAGS_IDLE) == 0) {
rpd->utime += thi->user_time.seconds + thi->user_time.microseconds * 1e-6;
rpd->stime += thi->system_time.seconds + thi->system_time.microseconds * 1e-6;
}
if (task != mach_task_self()) {
error = mach_port_deallocate(mach_task_self(), thread_table[i]);
assert(error == KERN_SUCCESS);
}
}
error = vm_deallocate(mach_task_self(), (vm_offset_t)thread_table, table_size * sizeof(thread_array_t));
assert(error == KERN_SUCCESS);
}
mach_port_deallocate(mach_task_self(), task);
return 0;
}
#endif