@inproceedings{9755e7c58b38411c9609004f9bb5e6de,
title = "Efficient exact edit similarity query processing with the asymmetric signature scheme",
abstract = "Given a query string Q, an edit similarity search finds all strings in a database whose edit distance with Q is no more than a given threshold τ. Most existing method answering edit similarity queries rely on a signature scheme to generate candidates given the query string. We observe that the number of signatures generated by existing methods is far greater than the lower bound, and this results in high query time and index space complexities. In this paper, we show that the minimum signature size lower bound is τ +1. We then propose asymmetric signature schemes that achieve this lower bound. We develop efficient query processing algorithms based on the new scheme. Several dynamic programming-based candidate pruning methods are also developed to further speed up the performance. We have conducted a comprehensive experimental study involving nine state-of-the-art algorithms. The experiment results clearly demonstrate the efficiency of our methods.",
keywords = "Q-gram, approximate pattern matching, edit distance, similarity join, similarity search",
author = "Jianbin Qin and Wei Wang and Yifei Lu and Chuan Xiao and Xuemin Lin",
year = "2011",
doi = "10.1145/1989323.1989431",
language = "English",
isbn = "9781450306614",
series = "Proceedings of the ACM SIGMOD International Conference on Management of Data",
publisher = "Association for Computing Machinery",
pages = "1033--1044",
booktitle = "Proceedings of SIGMOD 2011 and PODS 2011",
note = "2011 ACM SIGMOD and 30th PODS 2011 Conference ; Conference date: 12-06-2011 Through 16-06-2011",
}