Merge branch 'kb/fast-hashmap'

Improvements to our hash table to get it to meet the needs of the msysgit fscache project, with some nice performance improvements. * kb/fast-hashmap: name-hash: retire unused index_name_exists() hashmap.h: use 'unsigned int' for hash-codes everywhere test-hashmap.c: drop unnecessary #includes .gitignore: test-hashmap is a generated file read-cache.c: fix memory leaks caused by removed cache entries builtin/update-index.c: cleanup update_one fix 'git update-index --verbose --again' output remove old hash.[ch] implementation name-hash.c: remove cache entries instead of marking them CE_UNHASHED name-hash.c: use new hash map implementation for cache entries name-hash.c: remove unreferenced directory entries name-hash.c: use new hash map implementation for directories diffcore-rename.c: use new hash map implementation diffcore-rename.c: simplify finding exact renames diffcore-rename.c: move code around to prepare for the next patch buitin/describe.c: use new hash map implementation add a hashtable implementation that supports O(1) removal submodule: don't access the .gitmodules cache entry after removing it
author: Junio C Hamano <gitster@pobox.com> 2014-02-27 14:01:09 -0800
committer: Junio C Hamano <gitster@pobox.com> 2014-02-27 14:01:09 -0800
commit: d637d1b9a8fb765a8542e69bd2e04b3e229f663b (patch)
tree: eea008a78eacbc6afbfd793377a70a9642624221 /diffcore-rename.c
parent: 810273bc33b1f50191f90deef74277ee84174efd (diff)
parent: 7b359ea6b3333a87fd3fa8b84913f2b75ed244ad (diff)
download: git-d637d1b9a8fb765a8542e69bd2e04b3e229f663b.tar.xz
1 files changed, 63 insertions, 120 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 6c7a72fbe7..9b4f068eb3 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -4,7 +4,7 @@
 #include "cache.h"
 #include "diff.h"
 #include "diffcore.h"
-#include "hash.h"
+#include "hashmap.h"
 #include "progress.h"
 
 /* Table of rename/copy destinations */
@@ -243,137 +243,82 @@ static int score_compare(const void *a_, const void *b_)
 }
 
 struct file_similarity {
-	int src_dst, index;
+	struct hashmap_entry entry;
+	int index;
 	struct diff_filespec *filespec;
-	struct file_similarity *next;
 };
 
-static int find_identical_files(struct file_similarity *src,
-				struct file_similarity *dst,
+static unsigned int hash_filespec(struct diff_filespec *filespec)
+{
+	unsigned int hash;
+	if (!filespec->sha1_valid) {
+		if (diff_populate_filespec(filespec, 0))
+			return 0;
+		hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
+	}
+	memcpy(&hash, filespec->sha1, sizeof(hash));
+	return hash;
+}
+
+static int find_identical_files(struct hashmap *srcs,
+				int dst_index,
 				struct diff_options *options)
 {
 	int renames = 0;
 
+	struct diff_filespec *target = rename_dst[dst_index].two;
+	struct file_similarity *p, *best, dst;
+	int i = 100, best_score = -1;
+
 	/*
-	 * Walk over all the destinations ...
+	 * Find the best source match for specified destination.
 	 */
-	do {
-		struct diff_filespec *target = dst->filespec;
-		struct file_similarity *p, *best;
-		int i = 100, best_score = -1;
+	best = NULL;
+	hashmap_entry_init(&dst, hash_filespec(target));
+	for (p = hashmap_get(srcs, &dst, NULL); p; p = hashmap_get_next(srcs, p)) {
+		int score;
+		struct diff_filespec *source = p->filespec;
 
-		/*
-		 * .. to find the best source match
-		 */
-		best = NULL;
-		for (p = src; p; p = p->next) {
-			int score;
-			struct diff_filespec *source = p->filespec;
-
-			/* False hash collision? */
-			if (hashcmp(source->sha1, target->sha1))
-				continue;
-			/* Non-regular files? If so, the modes must match! */
-			if (!S_ISREG(source->mode) || !S_ISREG(target->mode)) {
-				if (source->mode != target->mode)
-					continue;
-			}
-			/* Give higher scores to sources that haven't been used already */
-			score = !source->rename_used;
-			if (source->rename_used && options->detect_rename != DIFF_DETECT_COPY)
+		/* False hash collision? */
+		if (hashcmp(source->sha1, target->sha1))
+			continue;
+		/* Non-regular files? If so, the modes must match! */
+		if (!S_ISREG(source->mode) || !S_ISREG(target->mode)) {
+			if (source->mode != target->mode)
 				continue;
-			score += basename_same(source, target);
-			if (score > best_score) {
-				best = p;
-				best_score = score;
-				if (score == 2)
-					break;
-			}
-
-			/* Too many identical alternatives? Pick one */
-			if (!--i)
-				break;
 		}
-		if (best) {
-			record_rename_pair(dst->index, best->index, MAX_SCORE);
-			renames++;
+		/* Give higher scores to sources that haven't been used already */
+		score = !source->rename_used;
+		if (source->rename_used && options->detect_rename != DIFF_DETECT_COPY)
+			continue;
+		score += basename_same(source, target);
+		if (score > best_score) {
+			best = p;
+			best_score = score;
+			if (score == 2)
+				break;
 		}
-	} while ((dst = dst->next) != NULL);
-	return renames;
-}
 
-static void free_similarity_list(struct file_similarity *p)
-{
-	while (p) {
-		struct file_similarity *entry = p;
-		p = p->next;
-		free(entry);
+		/* Too many identical alternatives? Pick one */
+		if (!--i)
+			break;
 	}
-}
-
-static int find_same_files(void *ptr, void *data)
-{
-	int ret;
-	struct file_similarity *p = ptr;
-	struct file_similarity *src = NULL, *dst = NULL;
-	struct diff_options *options = data;
-
-	/* Split the hash list up into sources and destinations */
-	do {
-		struct file_similarity *entry = p;
-		p = p->next;
-		if (entry->src_dst < 0) {
-			entry->next = src;
-			src = entry;
-		} else {
-			entry->next = dst;
-			dst = entry;
-		}
-	} while (p);
-
-	/*
-	 * If we have both sources *and* destinations, see if
-	 * we can match them up
-	 */
-	ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
-
-	/* Free the hashes and return the number of renames found */
-	free_similarity_list(src);
-	free_similarity_list(dst);
-	return ret;
-}
-
-static unsigned int hash_filespec(struct diff_filespec *filespec)
-{
-	unsigned int hash;
-	if (!filespec->sha1_valid) {
-		if (diff_populate_filespec(filespec, 0))
-			return 0;
-		hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
+	if (best) {
+		record_rename_pair(dst_index, best->index, MAX_SCORE);
+		renames++;
 	}
-	memcpy(&hash, filespec->sha1, sizeof(hash));
-	return hash;
+	return renames;
 }
 
-static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
+static void insert_file_table(struct hashmap *table, int index, struct diff_filespec *filespec)
 {
-	void **pos;
-	unsigned int hash;
 	struct file_similarity *entry = xmalloc(sizeof(*entry));
 
-	entry->src_dst = src_dst;
 	entry->index = index;
 	entry->filespec = filespec;
-	entry->next = NULL;
-
-	hash = hash_filespec(filespec);
-	pos = insert_hash(hash, entry, table);
 
-	/* We already had an entry there? */
-	if (pos) {
-		entry->next = *pos;
-		*pos = entry;
-	}
+	hashmap_entry_init(entry, hash_filespec(filespec));
+	hashmap_add(table, entry);
 }
 
 /*
@@ -385,24 +330,22 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
  */
 static int find_exact_renames(struct diff_options *options)
 {
-	int i;
-	struct hash_table file_table;
+	int i, renames = 0;
+	struct hashmap file_table;
 
-	init_hash(&file_table);
-	preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
+	/* Add all sources to the hash table */
+	hashmap_init(&file_table, NULL, rename_src_nr);
 	for (i = 0; i < rename_src_nr; i++)
-		insert_file_table(&file_table, -1, i, rename_src[i].p->one);
+		insert_file_table(&file_table, i, rename_src[i].p->one);
 
+	/* Walk the destinations and find best source match */
 	for (i = 0; i < rename_dst_nr; i++)
-		insert_file_table(&file_table, 1, i, rename_dst[i].two);
-
-	/* Find the renames */
-	i = for_each_hash(&file_table, find_same_files, options);
+		renames += find_identical_files(&file_table, i, options);
 
-	/* .. and free the hash data structure */
-	free_hash(&file_table);
+	/* Free the hash data structure and entries */
+	hashmap_free(&file_table, 1);
 
-	return i;
+	return renames;
 }
 
 #define NUM_CANDIDATE_PER_DST 4
author	Junio C Hamano <gitster@pobox.com>	2014-02-27 14:01:09 -0800
committer	Junio C Hamano <gitster@pobox.com>	2014-02-27 14:01:09 -0800
commit	d637d1b9a8fb765a8542e69bd2e04b3e229f663b (patch)
tree	eea008a78eacbc6afbfd793377a70a9642624221 /diffcore-rename.c
parent	810273bc33b1f50191f90deef74277ee84174efd (diff)
parent	7b359ea6b3333a87fd3fa8b84913f2b75ed244ad (diff)
download	git-d637d1b9a8fb765a8542e69bd2e04b3e229f663b.tar.xz