#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <sys/types.h>
#include <stdint.h>
#include <unistd.h>

#include "khash.hh"
#include <ext/hash_set>
#include <tr1/unordered_set>

#ifdef HAVE_GOOGLEHASH
#include <google/sparse_hash_set>
#include <google/dense_hash_set>
#endif

static int data_size = 5000000;
static unsigned *int_data;
static char **str_data;

typedef struct {
	size_t rss, vsize;
	double utime, stime;
} RunProcDyn;

RunProcDyn g_init_rpd;

int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd);

struct eqint {
	inline bool operator()(unsigned s1, unsigned s2) const {
		return s1 == s2;
    }
};

struct eqstr {
	inline bool operator()(const char *s1, const char *s2) const {
		return strcmp(s1, s2) == 0;
    }
};

void ht_reset_rpd(int is)
{
	RunProcDyn rpd;
	run_get_dynamic_proc_info(getpid(), &rpd);
	printf("--- current rss: %lu (reset: %c)\n", rpd.rss, is? 'Y' : 'N');
	if (is) g_init_rpd = rpd;
}
void ht_init_data()
{
	int i;
	char buf[256];
	printf("--- generating data... ");
	srand48(11);
	int_data = (unsigned*)calloc(data_size, sizeof(unsigned));
	str_data = (char**)calloc(data_size, sizeof(char*));
	for (i = 0; i < data_size; ++i) {
		int_data[i] = (unsigned)(data_size * drand48() / 4) * 271828183u;
		sprintf(buf, "%x", int_data[i]);
		str_data[i] = strdup(buf);
	}
	printf("done!\n");
}
void ht_destroy_data()
{
	int i;
	for (i = 0; i < data_size; ++i) free(str_data[i]);
	free(str_data); free(int_data);
}

// for khash, STL and TR1
template<class hash_t, class keytype_t>
void ht_cpp(hash_t *h, keytype_t *data)
{
	RunProcDyn rpd_init, rpd;
	pid_t pid = getpid();
	int i, step = data_size / 10;
	size_t max_rss = 0;

	run_get_dynamic_proc_info(pid, &rpd_init);
	rpd = rpd_init;
	for (i = 0; i < data_size; ++i) {
		if ((i + 1) % step == 0) {
			run_get_dynamic_proc_info(pid, &rpd);
			if (rpd.rss > max_rss) max_rss = rpd.rss;
		}
		if (h->insert(data[i]).second == false) h->erase(data[i]);
//		if (h->find(data[i]) != h->end()) h->erase(data[i]);
//		else h->insert(data[i]);
	}
	run_get_dynamic_proc_info(pid, &rpd);
	if (rpd.rss > max_rss) max_rss = rpd.rss;
	printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %lu, %lu)\n", rpd.utime - rpd_init.utime,
		   rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, h->size(), h->bucket_count());
}

// for glib
#ifdef HAVE_GLIB

#include <glib.h>

void ht_glib_int()
{
	RunProcDyn rpd_init, rpd;
	pid_t pid = getpid();
	int i, step = data_size / 10;
	size_t max_rss = 0;
	unsigned *data = int_data;
	GHashTable *h;
	int val = 11;

	h = g_hash_table_new(g_int_hash, g_int_equal);
	run_get_dynamic_proc_info(pid, &rpd_init);
	rpd = rpd_init;
	for (i = 0; i < data_size; ++i) {
		if ((i + 1) % step == 0) {
			run_get_dynamic_proc_info(pid, &rpd);
			if (rpd.rss > max_rss) max_rss = rpd.rss;
		}
		if (g_hash_table_lookup(h, &data[i])) g_hash_table_steal(h, &data[i]);
		else g_hash_table_insert(h, &data[i], &val);
	}
	run_get_dynamic_proc_info(pid, &rpd);
	if (rpd.rss > max_rss) max_rss = rpd.rss;
	printf("(user, sys, peak_rss, size): (%.3lf, %.3lf, %lu, %u)\n", rpd.utime - rpd_init.utime,
		   rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, g_hash_table_size(h));
	g_hash_table_destroy(h);
}
void ht_glib_str()
{
	RunProcDyn rpd_init, rpd;
	pid_t pid = getpid();
	int i, step = data_size / 10;
	size_t max_rss = 0;
	char **data = str_data;
	GHashTable *h;
	int val = 11;

	h = g_hash_table_new(g_str_hash, g_str_equal);
	run_get_dynamic_proc_info(pid, &rpd_init);
	rpd = rpd_init;
	for (i = 0; i < data_size; ++i) {
		if ((i + 1) % step == 0) {
			run_get_dynamic_proc_info(pid, &rpd);
			if (rpd.rss > max_rss) max_rss = rpd.rss;
		}
		if (g_hash_table_lookup(h, data[i])) g_hash_table_steal(h, data[i]);
		else g_hash_table_insert(h, data[i], &val);
	}
	run_get_dynamic_proc_info(pid, &rpd);
	if (rpd.rss > max_rss) max_rss = rpd.rss;
	printf("(user, sys, peak_rss, size): (%.3lf, %.3lf, %lu, %u)\n", rpd.utime - rpd_init.utime,
		   rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, g_hash_table_size(h));
	g_hash_table_destroy(h);
}
#endif // HAVE_GLIB

#ifdef HAVE_GHTHASH
#include <ght_hash_table.h>
void ht_ghthash_int()
{
	RunProcDyn rpd_init, rpd;
	pid_t pid = getpid();
	int i, step = data_size / 10;
	size_t max_rss = 0;
	unsigned *data = int_data;
	ght_hash_table_t *h;

	h = ght_create(data_size/5);
//	ght_set_rehash(h, 1);
	run_get_dynamic_proc_info(pid, &rpd_init);
	rpd = rpd_init;
	for (i = 0; i < data_size; ++i) {
		if ((i + 1) % step == 0) {
			run_get_dynamic_proc_info(pid, &rpd);
			if (rpd.rss > max_rss) max_rss = rpd.rss;
		}
		if (ght_insert(h, 0, 4, &data[i]) == -1) ght_remove(h, 4, &data[i]);
	}
	run_get_dynamic_proc_info(pid, &rpd);
	if (rpd.rss > max_rss) max_rss = rpd.rss;
	printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %d, %u)\n", rpd.utime - rpd_init.utime,
		   rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, ght_size(h), ght_table_size(h));
	ght_finalize(h);
}
void ht_ghthash_str()
{
	RunProcDyn rpd_init, rpd;
	pid_t pid = getpid();
	int i, step = data_size / 10;
	size_t max_rss = 0;
	char **data = str_data;
	ght_hash_table_t *h;

	h = ght_create(data_size/5);
//	ght_set_rehash(h, 1);
	run_get_dynamic_proc_info(pid, &rpd_init);
	rpd = rpd_init;
	for (i = 0; i < data_size; ++i) {
		int l = strlen(data[i]);
		if ((i + 1) % step == 0) {
			run_get_dynamic_proc_info(pid, &rpd);
			if (rpd.rss > max_rss) max_rss = rpd.rss;
		}
		if (ght_insert(h, 0, l, data[i]) == -1) ght_remove(h, l, data[i]);
	}
	run_get_dynamic_proc_info(pid, &rpd);
	if (rpd.rss > max_rss) max_rss = rpd.rss;
	printf("(user, sys, peak_rss, size, capacity): (%.3lf, %.3lf, %lu, %d, %u)\n", rpd.utime - rpd_init.utime,
		   rpd.stime - rpd_init.stime, max_rss - g_init_rpd.rss, ght_size(h), ght_table_size(h));
	ght_finalize(h);
}
#endif

static int usage()
{
	fprintf(stderr, "Usage:   hash_test [th] [-n %d]\n", data_size);
	fprintf(stderr, "Options: -n INT  size of data [%d]\n", data_size);
	fprintf(stderr, "         -c HEX  bitwise codes for evaluation [%x]\n", 0x6f);
	fprintf(stderr, "         -S      disable string test\n");
	fprintf(stderr, "         -h      help\n");
	return 1;
}

int main(int argc, char *argv[])
{
	int c, code = 0x6f, is_str = 1;
	while ((c = getopt(argc, argv, "c:n:hS")) >= 0) {
		switch (c) {
		case 'c': sscanf(optarg, "%x", &code); break;
		case 'h': return usage();
		case 'n': data_size = atoi(optarg); break;
		case 'S': is_str = 0; break;
		}
	}

	ht_init_data();
	ht_reset_rpd(1);

	if (code & 0x1) {
		printf("--- khash ---\n");
		ht_reset_rpd(1);
		khset_t<unsigned> *h_int = new khset_t<unsigned>;
		ht_cpp(h_int, int_data);
		delete h_int;
		if (is_str) {
			ht_reset_rpd(0);
			khset_t<const char*> *h_str = new khset_t<const char*>;
			ht_cpp(h_str, str_data);
			delete h_str;
		}
	}
#ifdef HAVE_GLIB
	if (code & 0x2) {
		printf("--- GLIB ---\n");
		ht_reset_rpd(1);
		ht_glib_int();
		ht_reset_rpd(0);
		if (is_str) ht_glib_str();
	}
#endif
	if (code &0x4) {
		printf("--- TR1 ---\n");
		ht_reset_rpd(1);
		using namespace __gnu_cxx;
		using namespace std::tr1;
		unordered_set<unsigned> *h_int = new unordered_set<unsigned>;
		ht_cpp(h_int, int_data);
		delete h_int;
		if (is_str) {
			ht_reset_rpd(0);
			unordered_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str
				= new unordered_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
			ht_cpp(h_str, str_data);
			delete h_str;
		}
	}
	if (code & 0x8){
		printf("--- SGI STL ---\n");
		ht_reset_rpd(0);
		using namespace __gnu_cxx;
		using namespace std;
		hash_set<unsigned, hash<unsigned>, eqint> *h_int = new hash_set<unsigned, hash<unsigned>, eqint>;
		ht_cpp(h_int, int_data);
		delete h_int;
		if (is_str) {
			ht_reset_rpd(0);
			hash_set<const char*, hash<const char*>, eqstr> *h_str = new hash_set<const char*, hash<const char*>, eqstr>;
			ht_cpp(h_str, str_data);
			delete h_str;
		}
	}
#ifdef HAVE_GHTHASH
	if (code & 0x10) {
		printf("--- GHTHASH ---\n");
		ht_reset_rpd(1);
		ht_ghthash_int();
		ht_reset_rpd(0);
		if (is_str) ht_ghthash_str();
	}
#endif
#ifdef HAVE_GOOGLEHASH
	if (code & 0x20) {
		printf("--- Google sparse hash ---\n");
		using google::sparse_hash_set;
		ht_reset_rpd(0);
		sparse_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint> *h_int = new sparse_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint>;
		h_int->set_deleted_key(0xffffffffu);
		ht_cpp(h_int, int_data);
		delete h_int;
		if (is_str) {
			ht_reset_rpd(0);
			sparse_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str = new sparse_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
			h_str->set_deleted_key("");
			ht_cpp(h_str, str_data);
			delete h_str;
		}
	}
	if (code & 0x40) {
		printf("--- Google dense hash ---\n");
		using google::dense_hash_set;
		ht_reset_rpd(0);
		dense_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint> *h_int = new dense_hash_set<unsigned, __gnu_cxx::hash<unsigned>, eqint>;
		h_int->set_empty_key(0xffffffffu);
		h_int->set_deleted_key(0xfffffffeu);
		ht_cpp(h_int, int_data);
		delete h_int;
		if (is_str) {
			ht_reset_rpd(0);
			dense_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr> *h_str = new dense_hash_set<const char*, __gnu_cxx::hash<const char*>, eqstr>;
			h_str->set_empty_key("*");
			h_str->set_deleted_key("");
			ht_cpp(h_str, str_data);
			delete h_str;
		}
	}
#endif


	ht_reset_rpd(0);
	ht_destroy_data();
	ht_reset_rpd(0);

	return 0;
}

// attractivechaos: statistic routine, copied from my runlib

#define RUN_ERR_WRONG_OS		1
#define RUN_ERR_MISSING_INFO	2
#define RUN_ERR_PROC_FINISHED	3

#ifdef __linux__
int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd)
{
	int c, n_spc;
	char str[64];
	FILE *fp;
	unsigned long tmp, tmp2;
	size_t page_size;

	page_size = sysconf(_SC_PAGESIZE);
	sprintf(str, "/proc/%u/stat", pid);
	fp = fopen(str, "r");
	if (fp == 0) return RUN_ERR_PROC_FINISHED;
	n_spc = 0;
	while ((c = fgetc(fp)) != EOF) {
		if (c == ' ') ++n_spc;
		if (n_spc == 13) break;
	}
	fscanf(fp, "%lu%lu", &tmp, &tmp2);
	rpd->utime = tmp / 100.0;
	rpd->stime = tmp2 / 100.0;
	++n_spc;
	while ((c = fgetc(fp)) != EOF) {
		if (c == ' ') ++n_spc;
		if (n_spc == 22) break;
	}
	fscanf(fp, "%lu%lu", &tmp, &tmp2);
	fclose(fp);
	rpd->vsize = tmp / 1024;
	rpd->rss = tmp2 * (page_size / 1024);
	rpd->rss *= 1024;
	rpd->vsize *= 1024;
	return 0;
}
#endif /* __linux */

/* MAC OS X */

#ifdef __APPLE__

#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <mach/mach_init.h>
#include <mach/mach_host.h>
#include <mach/mach_port.h>
#include <mach/mach_traps.h>
#include <mach/task_info.h>
#include <mach/thread_info.h>
#include <mach/thread_act.h>
#include <mach/vm_region.h>
#include <mach/vm_map.h>
#include <mach/task.h>
#include <mach/shared_memory_server.h>

typedef struct vmtotal vmtotal_t;

/* On Mac OS X, the only way to get enough information is to become root. Pretty frustrating! */
int run_get_dynamic_proc_info(pid_t pid, RunProcDyn *rpd)
{
	task_t task;
	kern_return_t error;
	mach_msg_type_number_t count;
	thread_array_t thread_table;
	thread_basic_info_t thi;
	thread_basic_info_data_t thi_data;
	unsigned table_size;
	struct task_basic_info ti;

	error = task_for_pid(mach_task_self(), pid, &task);
	if (error != KERN_SUCCESS) {
		/* fprintf(stderr, "++ Probably you have to set suid or become root.\n"); */
		rpd->rss = rpd->vsize = 0;
		rpd->utime = rpd->stime = 0;
		return 0;
	}
	count = TASK_BASIC_INFO_COUNT;
	error = task_info(task, TASK_BASIC_INFO, (task_info_t)&ti, &count);
	assert(error == KERN_SUCCESS);
	{ /* adapted from ps/tasks.c */
		vm_region_basic_info_data_64_t b_info;
        vm_address_t address = GLOBAL_SHARED_TEXT_SEGMENT;
        vm_size_t size;
        mach_port_t object_name;
        count = VM_REGION_BASIC_INFO_COUNT_64;
        error = vm_region_64(task, &address, &size, VM_REGION_BASIC_INFO,
							 (vm_region_info_t)&b_info, &count, &object_name);
		if (error == KERN_SUCCESS) {
			if (b_info.reserved && size == (SHARED_TEXT_REGION_SIZE) &&
                ti.virtual_size > (SHARED_TEXT_REGION_SIZE + SHARED_DATA_REGION_SIZE))
			{
				ti.virtual_size -= (SHARED_TEXT_REGION_SIZE + SHARED_DATA_REGION_SIZE);
			}
		}
		rpd->rss = ti.resident_size;
		rpd->vsize = ti.virtual_size;
	}
	{ /* calculate CPU times, adapted from top/libtop.c */
		unsigned i;
		rpd->utime = ti.user_time.seconds + ti.user_time.microseconds * 1e-6;
		rpd->stime = ti.system_time.seconds + ti.system_time.microseconds * 1e-6;
		error = task_threads(task, &thread_table, &table_size);
		assert(error == KERN_SUCCESS);
		thi = &thi_data;
		for (i = 0; i != table_size; ++i) {
			count = THREAD_BASIC_INFO_COUNT;
			error = thread_info(thread_table[i], THREAD_BASIC_INFO, (thread_info_t)thi, &count);
			assert(error == KERN_SUCCESS);
			if ((thi->flags & TH_FLAGS_IDLE) == 0) {
				rpd->utime += thi->user_time.seconds + thi->user_time.microseconds * 1e-6;
				rpd->stime += thi->system_time.seconds + thi->system_time.microseconds * 1e-6;
			}
			if (task != mach_task_self()) {
				error = mach_port_deallocate(mach_task_self(), thread_table[i]);
				assert(error == KERN_SUCCESS);
			}
		}
		error = vm_deallocate(mach_task_self(), (vm_offset_t)thread_table, table_size * sizeof(thread_array_t));
		assert(error == KERN_SUCCESS);
	}
	mach_port_deallocate(mach_task_self(), task);
	return 0;
}

#endif /* __APPLE__ */
