fix a bug with hash collisions on file names

pull/62/head
Zlatin Balevsky 2021-06-15 20:33:16 +01:00
parent 9dcf6e7bea
commit b4f525d282
No known key found for this signature in database
GPG Key ID: A72832072D525E41
2 changed files with 41 additions and 30 deletions

View File

@ -26,31 +26,29 @@ public class SearchIndexImpl {
void add(String string, String [] split) throws IOException {
final int hash = string.hashCode();
for (String keyword : split) {
int [] existingHashes = keywords.get(keyword);
int[] existingHashes = keywords.get(keyword);
if (existingHashes == null) {
existingHashes = new int[1];
existingHashes[0] = hash;
keywords.put(keyword, existingHashes);
hashes.put(hash, new String[] {string});
} else {
int [] newHashes = DataUtil.insertIntoSortedArray(existingHashes, hash);
if (newHashes != existingHashes) {
int[] newHashes = DataUtil.insertIntoSortedArray(existingHashes, hash);
if (newHashes != existingHashes)
keywords.put(keyword, newHashes);
}
String[] fileNames = hashes.get(hash);
if (fileNames == null) {
fileNames = new String[] {string};
hashes.put(hash, fileNames);
} else {
Set<String> unique = new HashSet<>();
for (String fileName : fileNames)
unique.add(fileName);
if (unique.add(string))
hashes.put(hash, unique.toArray(new String[0]));
}
}
}
String[] fileNames = hashes.get(hash);
if (fileNames == null) {
fileNames = new String[] {string};
hashes.put(hash, fileNames);
} else {
Set<String> unique = new HashSet<>();
for (String fileName : fileNames)
unique.add(fileName);
if (unique.add(string))
hashes.put(hash, unique.toArray(new String[0]));
}
}
void remove(String string, String[] split) throws IOException {
@ -72,24 +70,22 @@ public class SearchIndexImpl {
if (idx == -1)
return;
String [] newStrings = new String[strings.length - 1];
System.arraycopy(strings, 0, newStrings, 0, idx);
System.arraycopy(strings, idx + 1, newStrings, idx, newStrings.length - idx);
hashes.put(hash, newStrings);
if (strings.length == 1) {
hashes.remove(hash);
} else {
String[] newStrings = new String[strings.length - 1];
System.arraycopy(strings, 0, newStrings, 0, idx);
System.arraycopy(strings, idx + 1, newStrings, idx, newStrings.length - idx);
hashes.put(hash, newStrings);
return;
}
for (String keyword : split) {
int [] existingHashes = keywords.get(keyword);
if (existingHashes == null)
throw new IllegalStateException();
idx = -1;
for (int i = 0; i < existingHashes.length; i ++) {
if (existingHashes[i] == hash) {
idx = i;
break;
}
}
if (idx == -1)
idx = Arrays.binarySearch(existingHashes, hash);
if (idx < 0)
throw new IllegalStateException();
if (existingHashes.length == 1) {
@ -100,6 +96,7 @@ public class SearchIndexImpl {
System.arraycopy(existingHashes, idx + 1, newHashes, idx, newHashes.length - idx);
keywords.put(keyword, newHashes);
}
}
}

View File

@ -186,4 +186,18 @@ class SearchIndexTest {
add("settings.gradle")
}
}
@Test
void testHashCollision() {
/* k1.png and io.png have the same String::hashCode */
initIndex([])
index.add("io.png")
assert index.search(["png"]) == ['io.png']
index.add("k1.png")
assert index.search(["png"]) == ['io.png', 'k1.png']
index.remove("k1.png")
assert index.search(["png"]) == ["io.png"]
index.add("k1.png")
assert index.search(["png"]) == ['io.png', 'k1.png']
}
}