Archive for June, 2009
Putting Autocomplete Data Structures to the Test
Posted by stevedaskam in Data Structures on June 7, 2009
I ran a simple test to compare the Radix Tree implementation I found on the web with the Trie implementation that I coded from my last post along with a binary search. I loaded all of these structures with over 50,000 random strings, and then performed 5000 prefix searches on each. Below are the results:
5000 prefix searches took the following amount of time…
Binary Search: 6 ms (1.2 microseconds per search)
Trie Search: 38 ms (7.6 microseconds per search)
Radix Trie Search: 59 ms (11.8 microseconds per search)
Overall, they’re all pretty decent solutions. But as far as speed goes, it looks like the binary search algorithm is the winner here, with my humble Trie structure coming in second, and the Radix Trie implementation coming in last. This just goes to show you that sometimes the simplest implementations are the best.
The code that I ran my test with is shown below.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import ds.tree.RadixTree;
import ds.tree.RadixTreeImpl;
public class Main {
Set<String> valueSet = new TreeSet<String>();
Trie<String> trie = new TrieImpl<String>();
RadixTree<String> radixTree = new RadixTreeImpl<String>();
/**
* Load up the trie with lots of random strings
*/
public void loadTrie() {
// Load the Trie and a Set
for (int i = 0; i < 51000; i++) {
String val = getRandomString();
valueSet.add(val);
trie.add(val, val);
}
// Load the RadixTree
String[] strArry = new String[valueSet.size()];
valueSet.toArray(strArry);
for (int i = 0; i < strArry.length; i++) {
radixTree.insert(strArry[i], strArry[i]);
}
}
/**
* Test a prefix search
*/
public void testSearch() {
String[] strArry = new String[valueSet.size()];
valueSet.toArray(strArry);
// Test Trie Access
long start = System.currentTimeMillis();
for(int i = 0; i < 5000; i++) {
trie.search(strArry[i].substring(0, 3));
}
long end = System.currentTimeMillis();
long duration = end - start;
System.out.println("Trie Access = " + duration);
// Test RadixTree Access
start = System.currentTimeMillis();
for(int i = 0; i < 5000; i++) {
radixTree.searchPrefix(strArry[i].substring(0, 3), 10000);
}
end = System.currentTimeMillis();
duration = end - start;
System.out.println("RadixTrie Access = " + duration);
// Test Binary Search Access
start = System.currentTimeMillis();
for(int i = 0; i < 5000; i++) {
List<String> list = new ArrayList<String>();
String prefix = strArry[i].substring(0, 3);
int pos = Arrays.binarySearch(strArry, prefix);
while (pos >= 0 && pos < strArry.length) {
if (strArry[pos].startsWith(prefix)) {
list.add(strArry[pos]);
} else {
break;
}
pos++;
}
}
end = System.currentTimeMillis();
duration = end - start;
System.out.println("Binary Search Access = " + duration);
}
/**
* Returns a random string with length between
* 5 and 10.
*/
private String getRandomString() {
StringBuilder sb = new StringBuilder();
int len = getRandomNumber(5, 10);
for (int i = 0; i < len; i++) {
sb.append(getRandomChar());
}
return sb.toString();
}
/**
* Returns a random number between start and end
*/
private int getRandomNumber(int start, int end) {
//get the range, casting to long to avoid overflow problems
long range = (long)end - (long)start + 1;
// compute a fraction of the range, 0 <= frac < range
long fraction = (long)(range * new Random().nextDouble());
return (int)(fraction + start);
}
/**
* Returns a random letter of the alphabet.
*/
private char getRandomChar() {
String alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int pos = (int)(Math.random()*26);
return alphabet.charAt(pos);
}
public static void main(String[] args) {
Main main = new Main();
main.loadTrie();
main.testSearch();
}
}

