@inproceedings{9f1840305bc949dba68b3a41fbc0a5e3,
title = "Compression of Boolean inverted files by document ordering",
abstract = "Boolean queries are used to search a document collection for the documents that contain specific terms, independently of the frequency of a term in the document. To perform such queries, a search engine maintains an inverted file, which lists for each keyword the documents containing it. The size of such a file is comparable with that of the document collection, which is a considerable storage overhead. We show how the inverted file can be compressed by ordering the documents in the collection in a specific way. Finding the near-optimal order can be recast as a Hamming-distance traveling salesman problem.",
keywords = "Boolean search, Information retrieval, Inverted file size, Traveling salesman problem",
author = "Alexander Gelbukh and Han, {Sang Yong} and Grigori Sidorov",
note = "Publisher Copyright: {\textcopyright} 2003 IEEE.; International Conference on Natural Language Processing and Knowledge Engineering, NLP-KE 2003 ; Conference date: 26-10-2003 Through 29-10-2003",
year = "2003",
doi = "10.1109/NLPKE.2003.1275907",
language = "Ingl{\'e}s",
series = "NLP-KE 2003 - 2003 International Conference on Natural Language Processing and Knowledge Engineering, Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "244--249",
editor = "Chengqing Zong",
booktitle = "NLP-KE 2003 - 2003 International Conference on Natural Language Processing and Knowledge Engineering, Proceedings",
address = "Estados Unidos",
}