@inproceedings{ede82c3aeb5e4b47a4c22f73fa05124e,
title = "Improving cross-topic authorship attribution: The role of pre-processing",
abstract = "The effectiveness of character n-gram features for representing the stylistic properties of a text has been demonstrated in various independent Authorship Attribution (AA) studies. Moreover, it has been shown that some categories of character n-grams perform better than others both under single and cross-topic AA conditions. In this work, we present an improved algorithm for cross-topic AA. We demonstrate that the effectiveness of character n-grams representation can be significantly enhanced by performing simple pre-processing steps and appropriately tuning the number of features, especially in cross-topic conditions.",
keywords = "Authorship attribution, Character n-grams, Cross-topic, Machine learning, Pre-processing",
author = "Ilia Markov and Efstathios Stamatatos and Grigori Sidorov",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 18th International Conference on Computational Linguistics and Intelligent Text Processing, CICLing 2017 ; Conference date: 17-04-2017 Through 23-04-2017",
year = "2018",
doi = "10.1007/978-3-319-77116-8_21",
language = "Ingl{\'e}s",
isbn = "9783319771151",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "289--302",
editor = "Alexander Gelbukh",
booktitle = "Computational Linguistics and Intelligent Text Processing - 18th International Conference, CICLing 2017, Revised Selected Papers",
address = "Alemania",
}