msc-smolak/bibliography.bib

1154 lines
44 KiB
BibTeX
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

@incollection { gonito2016,
title = {Gonito.net -- Open Platform for Research Competition, Cooperation and Reproducibility},
author = "Grali{\'n}ski, Filip and Jaworski, Rafa{\l} and Borchmann, {\L}ukasz and Wierzcho{\'n}, Piotr",
editor = "Branco, António and Calzolari , Nicoletta and Choukri, Khalid",
booktitle = {Proceedings of the 4REAL Workshop: Workshop on Research Results Reproducibility and Resources Citation in Science and Technology of Language},
year = "2016",
pages = "13-20"
}
@inproceedings{stanislawek-etal-2019-named,
title = "Named Entity Recognition - Is There a Glass Ceiling?",
author = "Stanislawek, Tomasz and
Wr{\'o}blewska, Anna and
W{\'o}jcicka, Alicja and
Ziembicki, Daniel and
Biecek, Przemyslaw",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/K19-1058",
doi = "10.18653/v1/K19-1058",
pages = "624--633",
abstract = "Recent developments in Named Entity Recognition (NER) have resulted in better and better models. However, is there a glass ceiling? Do we know which types of errors are still hard or even impossible to correct? In this paper, we present a detailed analysis of the types of errors in state-of-the-art machine learning (ML) methods. Our study illustrates weak and strong points of the Stanford, CMU, FLAIR, ELMO and BERT models, as well as their shared limitations. We also introduce new techniques for improving annotation, training process, and for checking model quality and stability.",
}
@misc{borchmann2019searching,
title={Searching for Legal Clauses by Analogy. Few-shot Semantic Retrieval Shared Task},
author={Łukasz Borchmann and Dawid Wiśniewski and Andrzej Gretkowski and Izabela Kosmala and Dawid Jurkiewicz and Łukasz Szałkiewicz and Gabriela Pałka and Karol Kaczmarek and Agnieszka Kaliska and Filip Graliński},
year={2019},
eprint={1911.03911},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{gralinski-etal-2019-geval,
title = "{GE}val: Tool for Debugging {NLP} Datasets and Models",
author = "Grali{\'n}ski, Filip and
Wr{\'o}blewska, Anna and
Stanis{\l}awek, Tomasz and
Grabowski, Kamil and
G{\'o}recki, Tomasz",
booktitle = "Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/W19-4826",
pages = "254--262",
abstract = "This paper presents a simple but general and effective method to debug the output of machine learning (ML) supervised models, including neural networks. The algorithm looks for features that lower the evaluation metric in such a way that it cannot be ascribed to chance (as measured by their p-values). Using this method {--} implemented as MLEval tool {--} you can find: (1) anomalies in test sets, (2) issues in preprocessing, (3) problems in the ML model itself. It can give you an insight into what can be improved in the datasets and/or the model. The same method can be used to compare ML models or different versions of the same model. We present the tool, the theory behind it and use cases for text-based models of various types.",
}
@inproceedings{Borchmann2018,
title = {Approaching nested named entity recognition with parallel LSTM-CRFs},
author = {Łukasz Borchmann and Andrzej Gretkowski and Filip Graliński},
editor = {Maciej Ogrodniczuk and Łukasz Kobyliński},
url = {http://www.borchmann.pl/wp-content/uploads/2018/10/borchmann-lukasz.pdf},
year = {2018},
date = {2018-10-19},
booktitle = {Proceedings of the PolEval 2018 Workshop},
pages = {63-73},
publisher = {Institute of Computer Science, Polish Academy of Science},
address = {Warszawa},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@article{DBLP:journals/corr/HewlettLJPFHKB16,
author = {Daniel Hewlett and
Alexandre Lacoste and
Llion Jones and
Illia Polosukhin and
Andrew Fandrianto and
Jay Han and
Matthew Kelcey and
David Berthelot},
title = "{WikiReading: {A} Novel Large-scale Language Understanding Task over
Wikipedia}",
journal = {CoRR},
volume = {abs/1608.03542},
year = {2016},
url = {http://arxiv.org/abs/1608.03542},
archivePrefix = {arXiv},
eprint = {1608.03542},
timestamp = {Mon, 13 Aug 2018 16:46:41 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/HewlettLJPFHKB16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{byte-level2018kenter,
title={Byte-level Machine Reading across Morphologically Varied Languages},
author={Tom Kenter and Llion Jones and Daniel Hewlett},
booktitle={Proceedings of the The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
year={2018}
}
@article{DBLP:journals/corr/SutskeverVL14,
author = {Ilya Sutskever and
Oriol Vinyals and
Quoc V. Le},
title = "{Sequence to Sequence Learning with Neural Networks}",
journal = {CoRR},
volume = {abs/1409.3215},
year = {2014},
url = {http://arxiv.org/abs/1409.3215},
archivePrefix = {arXiv},
eprint = {1409.3215},
timestamp = {Mon, 13 Aug 2018 16:48:06 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/SutskeverVL14},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-04284,
author = {Hainan Xu and
Shuoyang Ding and
Shinji Watanabe},
title = "{Improving End-to-end Speech Recognition with Pronunciation-assisted
Sub-word Modeling}",
journal = {CoRR},
volume = {abs/1811.04284},
year = {2018},
url = {http://arxiv.org/abs/1811.04284},
archivePrefix = {arXiv},
eprint = {1811.04284},
timestamp = {Fri, 23 Nov 2018 12:43:51 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1811-04284},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{holt-chisholm-2018-extracting,
title = "Extracting structured data from invoices",
author = "Holt, Xavier and
Chisholm, Andrew",
booktitle = "Proceedings of the Australasian Language Technology Association Workshop 2018",
month = dec,
year = "2018",
address = "Dunedin, New Zealand",
url = "https://www.aclweb.org/anthology/U18-1006",
pages = "53--59",
abstract = "Business documents encode a wealth of information in a format tailored to human consumption {--} i.e. aesthetically disbursed natural language text, graphics and tables. We address the task of extracting key fields (e.g. the amount due on an invoice) from a wide-variety of potentially unseen document formats. In contrast to traditional template driven extraction systems, we introduce a content-driven machine-learning approach which is both robust to noise and generalises to unseen document formats. In a comparison of our approach with alternative invoice extraction systems, we observe an absolute accuracy gain of 20{\textbackslash}{\%} across compared fields, and a 25{\textbackslash}{\%}{--}94{\textbackslash}{\%} reduction in extraction latency.",
}
@article{DBLP:journals/corr/abs-1907-11692,
author = {Yinhan Liu and
Myle Ott and
Naman Goyal and
Jingfei Du and
Mandar Joshi and
Danqi Chen and
Omer Levy and
Mike Lewis and
Luke Zettlemoyer and
Veselin Stoyanov},
title = "{RoBERTa: {A} Robustly Optimized {BERT} Pretraining Approach}",
journal = {CoRR},
volume = {abs/1907.11692},
year = {2019},
url = {http://arxiv.org/abs/1907.11692},
archivePrefix = {arXiv},
eprint = {1907.11692},
timestamp = {Thu, 01 Aug 2019 08:59:33 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-11692},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{ke2018focused,
title={Focused hierarchical rnns for conditional sequence processing},
author={Ke, Nan Rosemary and Zolna, Konrad and Sordoni, Alessandro and Lin, Zhouhan and Trischler, Adam and Bengio, Yoshua and Pineau, Joelle and Charlin, Laurent and Pal, Chris},
journal={arXiv preprint arXiv:1806.04342},
year={2018}
}
@article{DBLP:journals/corr/abs-1901-02860,
author = {Zihang Dai and
Zhilin Yang and
Yiming Yang and
Jaime G. Carbonell and
Quoc V. Le and
Ruslan Salakhutdinov},
title = "{Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}",
journal = {CoRR},
volume = {abs/1901.02860},
year = {2019},
url = {http://arxiv.org/abs/1901.02860},
archivePrefix = {arXiv},
eprint = {1901.02860},
timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1901-02860},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-08237,
author = {Zhilin Yang and
Zihang Dai and
Yiming Yang and
Jaime G. Carbonell and
Ruslan Salakhutdinov and
Quoc V. Le},
title = "{XLNet: Generalized Autoregressive Pretraining for Language Understanding}",
journal = {CoRR},
volume = {abs/1906.08237},
year = {2019},
url = {http://arxiv.org/abs/1906.08237},
archivePrefix = {arXiv},
eprint = {1906.08237},
timestamp = {Mon, 24 Jun 2019 17:28:45 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-08237},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-06170,
author = {Marcin Junczys{-}Dowmunt},
title = "{Microsoft Translator at {WMT} 2019: Towards Large-Scale Document-Level
Neural Machine Translation}",
journal = {CoRR},
volume = {abs/1907.06170},
year = {2019},
url = {http://arxiv.org/abs/1907.06170},
archivePrefix = {arXiv},
eprint = {1907.06170},
timestamp = {Wed, 17 Jul 2019 10:27:36 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-06170},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{akbik2018coling,
title={Contextual String Embeddings for Sequence Labeling},
author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
pages = {1638--1649},
year = {2018}
}
@article{luo2019named,
title={Named Entity Recognition Only from Word Embeddings},
author={Luo, Ying and Zhao, Hai and Zhan, Junlang},
journal={arXiv preprint arXiv:1909.00164},
year={2019}
}
@article{tu2018learning,
title={Learning to remember translation history with a continuous cache},
author={Tu, Zhaopeng and Liu, Yang and Shi, Shuming and Zhang, Tong},
journal={Transactions of the Association for Computational Linguistics},
volume={6},
pages={407--420},
year={2018},
publisher={MIT Press}
}
@article{miculicich2018document,
title={Document-level neural machine translation with hierarchical attention networks},
author={Miculicich, Lesly and Ram, Dhananjay and Pappas, Nikolaos and Henderson, James},
journal={arXiv preprint arXiv:1809.01576},
year={2018}
}
@article{DBLP:journals/corr/abs-1907-05242,
author = {Guillaume Lample and
Alexandre Sablayrolles and
Marc'Aurelio Ranzato and
Ludovic Denoyer and
Herv{\'{e}} J{\'{e}}gou},
title = "{Large Memory Layers with Product Keys}",
journal = {CoRR},
volume = {abs/1907.05242},
year = {2019},
url = {http://arxiv.org/abs/1907.05242},
archivePrefix = {arXiv},
eprint = {1907.05242},
timestamp = {Wed, 17 Jul 2019 10:27:36 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-05242},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-01686,
author = {Xin Zhang and
An Yang and
Sujian Li and
Yizhong Wang},
title = {Machine Reading Comprehension: a Literature Review},
journal = {CoRR},
volume = {abs/1907.01686},
year = {2019},
url = {http://arxiv.org/abs/1907.01686},
archivePrefix = {arXiv},
eprint = {1907.01686},
timestamp = {Mon, 08 Jul 2019 14:12:33 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-01686},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-04805,
author = {Jacob Devlin and
Ming{-}Wei Chang and
Kenton Lee and
Kristina Toutanova},
title = "{{BERT:} Pre-training of Deep Bidirectional Transformers for Language
Understanding}",
journal = {CoRR},
volume = {abs/1810.04805},
year = {2018},
url = {http://arxiv.org/abs/1810.04805},
archivePrefix = {arXiv},
eprint = {1810.04805},
timestamp = {Tue, 30 Oct 2018 20:39:56 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1810-04805},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-08799,
author = {Anoop R. Katti and
Christian Reisswig and
Cordula Guder and
Sebastian Brarda and
Steffen Bickel and
Johannes H{\"{o}}hne and
Jean Baptiste Faddoul},
title = {Chargrid: Towards Understanding 2D Documents},
journal = {CoRR},
volume = {abs/1809.08799},
year = {2018},
url = {http://arxiv.org/abs/1809.08799},
archivePrefix = {arXiv},
eprint = {1809.08799},
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1809-08799},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HamiltonYL17,
author = {William L. Hamilton and
Rex Ying and
Jure Leskovec},
title = {Inductive Representation Learning on Large Graphs},
journal = {CoRR},
volume = {abs/1706.02216},
year = {2017},
url = {http://arxiv.org/abs/1706.02216},
archivePrefix = {arXiv},
eprint = {1706.02216},
timestamp = {Mon, 13 Aug 2018 16:46:12 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/HamiltonYL17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-00826,
author = {Keyulu Xu and
Weihua Hu and
Jure Leskovec and
Stefanie Jegelka},
title = {How Powerful are Graph Neural Networks?},
journal = {CoRR},
volume = {abs/1810.00826},
year = {2018},
url = {http://arxiv.org/abs/1810.00826},
archivePrefix = {arXiv},
eprint = {1810.00826},
timestamp = {Tue, 30 Oct 2018 10:49:09 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1810-00826},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{matuschek2008measuring,
title={Measuring text similarity with dynamic time warping},
author={Matuschek, Michael and Schl{\"u}ter, Tim and Conrad, Stefan},
booktitle={Proceedings of the 2008 international symposium on Database engineering \& applications},
pages={263--267},
year={2008},
organization={ACM}
}
@article{simonyan2014very,
title={Very deep convolutional networks for large-scale image recognition},
author={Simonyan, Karen and Zisserman, Andrew},
journal={arXiv preprint arXiv:1409.1556},
year={2014}
}
@article{lecun2015lenet,
title={LeNet-5, convolutional neural networks},
author={LeCun, Yann and others},
journal={URL: http://yann. lecun. com/exdb/lenet},
volume={20},
pages={5},
year={2015}
}
@article{bergstra2012random,
title={Random Search for Hyper-Parameter Optimization},
author={Bergstra, James and Bengio, Yoshua},
journal={Journal of Machine Learning Research},
volume={13},
pages={281--305},
year={2012}
}
@incollection{hinton2012practical,
title={A practical guide to training restricted Boltzmann machines},
author={Hinton, Geoffrey E},
booktitle={Neural networks: Tricks of the trade},
pages={599--619},
year={2012},
publisher={Springer}
}
@book{hedges2014statistical,
title={Statistical methods for meta-analysis},
author={Hedges, Larry V and Olkin, Ingram},
year={2014},
publisher={Academic press}
}
@inproceedings{koehn2004statistical,
title={Statistical significance tests for machine translation evaluation},
author={Koehn, Philipp},
booktitle={Proceedings of the 2004 conference on empirical methods in natural language processing},
pages={388--395},
year={2004}
}
@article{DBLP:journals/corr/abs-1904-01685,
author = {Jeremy Nixon and
Mike Dusenberry and
Linchuan Zhang and
Ghassen Jerfel and
Dustin Tran},
title = {Measuring Calibration in Deep Learning},
journal = {CoRR},
volume = {abs/1904.01685},
year = {2019},
url = {http://arxiv.org/abs/1904.01685},
archivePrefix = {arXiv},
eprint = {1904.01685},
timestamp = {Wed, 24 Apr 2019 12:21:25 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1904-01685},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{settles2012active,
title={Active learning},
author={Settles, Burr},
journal={Synthesis Lectures on Artificial Intelligence and Machine Learning},
volume={6},
number={1},
pages={1--114},
year={2012},
publisher={Morgan \& Claypool Publishers}
}
@incollection{NIPS2017_7062,
title = {A Unified Approach to Interpreting Model Predictions},
author = {Lundberg, Scott M and Lee, Su-In},
booktitle = {Advances in Neural Information Processing Systems 30},
editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
pages = {4765--4774},
year = {2017},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf}
}
@article{austin2014graphical,
title={Graphical assessment of internal and external calibration of logistic regression models by using loess smoothers},
author={Austin, Peter C and Steyerberg, Ewout W},
journal={Statistics in medicine},
volume={33},
number={3},
pages={517--535},
year={2014},
publisher={Wiley Online Library}
}
@article{lan2019albert,
title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
author={Lan, Zhenzhong and Chen, Mingda and Goodman, Sebastian and Gimpel, Kevin and Sharma, Piyush and Soricut, Radu},
journal={arXiv preprint arXiv:1909.11942},
year={2019}
}
@article{jiao2019tinybert,
title={TinyBERT: Distilling BERT for Natural Language Understanding},
author={Jiao, Xiaoqi and Yin, Yichun and Shang, Lifeng and Jiang, Xin and Chen, Xiao and Li, Linlin and Wang, Fang and Liu, Qun},
journal={arXiv preprint arXiv:1909.10351},
year={2019}
}
@article{wiewel2019localizing,
title={Localizing Catastrophic Forgetting in Neural Networks},
author={Wiewel, Felix and Yang, Bin},
journal={arXiv preprint arXiv:1906.02568},
year={2019}
}
@article{hinton2015distilling,
title={Distilling the knowledge in a neural network},
author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
journal={arXiv preprint arXiv:1503.02531},
year={2015}
}
@article{hubara2017quantized,
title={Quantized neural networks: Training neural networks with low precision weights and activations},
author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
journal={The Journal of Machine Learning Research},
volume={18},
number={1},
pages={6869--6898},
year={2017},
publisher={JMLR. org}
}
@article{bao2019few,
title={Few-shot Text Classification with Distributional Signatures},
author={Bao, Yujia and Wu, Menghua and Chang, Shiyu and Barzilay, Regina},
journal={arXiv preprint arXiv:1908.06039},
year={2019}
}
@article{DBLP:journals/corr/NarayanGCS17,
author = {Shashi Narayan and
Claire Gardent and
Shay B. Cohen and
Anastasia Shimorina},
title = {Split and Rephrase},
journal = {CoRR},
volume = {abs/1707.06971},
year = {2017},
url = {http://arxiv.org/abs/1707.06971},
archivePrefix = {arXiv},
eprint = {1707.06971},
timestamp = {Mon, 13 Aug 2018 16:48:49 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/NarayanGCS17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-01038,
author = {Christina Niklaus and
Matthias Cetto and
Andr{\'{e}} Freitas and
Siegfried Handschuh},
title = {Transforming Complex Sentences into a Semantic Hierarchy},
journal = {CoRR},
volume = {abs/1906.01038},
year = {2019},
url = {http://arxiv.org/abs/1906.01038},
archivePrefix = {arXiv},
eprint = {1906.01038},
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-01038},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{goldberger2005neighbourhood,
title={Neighbourhood components analysis},
author={Goldberger, Jacob and Hinton, Geoffrey E and Roweis, Sam T and Salakhutdinov, Ruslan R},
booktitle={Advances in neural information processing systems},
pages={513--520},
year={2005}
}
@article{hyvarinen2000independent,
title={Independent component analysis: algorithms and applications},
author={Hyv{\"a}rinen, Aapo and Oja, Erkki},
journal={Neural networks},
volume={13},
number={4-5},
pages={411--430},
year={2000},
publisher={Elsevier}
}
@article{DBLP:journals/corr/abs-1804-00079,
author = {Sandeep Subramanian and
Adam Trischler and
Yoshua Bengio and
Christopher J. Pal},
title = {Learning General Purpose Distributed Sentence Representations via
Large Scale Multi-task Learning},
journal = {CoRR},
volume = {abs/1804.00079},
year = {2018},
url = {http://arxiv.org/abs/1804.00079},
archivePrefix = {arXiv},
eprint = {1804.00079},
timestamp = {Mon, 13 Aug 2018 16:47:55 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-00079},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ConneauKSBB17,
author = {Alexis Conneau and
Douwe Kiela and
Holger Schwenk and
Lo{\"{\i}}c Barrault and
Antoine Bordes},
title = {Supervised Learning of Universal Sentence Representations from Natural
Language Inference Data},
journal = {CoRR},
volume = {abs/1705.02364},
year = {2017},
url = {http://arxiv.org/abs/1705.02364},
archivePrefix = {arXiv},
eprint = {1705.02364},
timestamp = {Mon, 13 Aug 2018 16:48:46 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/ConneauKSBB17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{2019:3322640,
title = {ICAIL '19: Proceedings of the Seventeenth International Conference on Artificial Intelligence and Law},
year = {2019},
isbn = {978-1-4503-6754-7},
location = {Montreal, QC, Canada},
publisher = {ACM},
address = {New York, NY, USA},
key = {{$\!\!$}} ,
}
@article{GOODMAN2001403,
title = "A bit of progress in language modeling",
journal = "Computer Speech \& Language",
volume = "15",
number = "4",
pages = "403-434",
year = "2001",
issn = "0885-2308",
doi = "10.1006/csla.2001.0174",
OPTurl = "http://www.sciencedirect.com/science/article/pii/S0885230801901743",
author = "Joshua T. Goodman"
}
@article{DBLP:journals/corr/cs-CL-9905001,
author = {Rebecca Hwa},
title = {Supervised Grammar Induction Using Training Data with Limited Constituent Information},
journal = {CoRR},
volume = {cs.CL/9905001},
note = {Version 1},
year = {1999},
url = {http://arxiv.org/abs/cs.CL/9905001},
timestamp = {Wed, 07 Jun 2017 14:41:01 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/cs-CL-9905001},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{Jurafsky+Martin:2009a,
author = {Jurafsky, Daniel and Martin, James H.},
title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition},
publisher = {Pearson Prentice Hall},
year = 2009,
edition = {Second}
}
@inproceedings{Maxwell2008ConceptAC,
title={Concept and Context in Legal Information Retrieval},
author={K. Tamsin Maxwell and Burkhard Schafer},
booktitle={JURIX},
year={2008}
}
@misc{41224,
title = {Efficient Estimation of Word Representations in Vector Space},
author = {Tomas Mikolov and Kai Chen and Greg S. Corrado and Jeffrey Dean},
year = {2013},
URL = {http://arxiv.org/abs/1301.3781}
}
@misc{wieting2019training,
title={No Training Required: Exploring Random Encoders for Sentence Classification},
author={John Wieting and Douwe Kiela},
year={2019},
eprint={1901.10444},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{jiao-etal-2018-convolutional,
title = "Convolutional Neural Network for Universal Sentence Embeddings",
author = "Jiao, Xiaoqi and
Wang, Fang and
Feng, Dan",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/C18-1209",
pages = "2470--2481",
abstract = "This paper proposes a simple CNN model for creating general-purpose sentence embeddings that can transfer easily across domains and can also act as effective initialization for downstream tasks. Recently, averaging the embeddings of words in a sentence has proven to be a surprisingly successful and efficient way of obtaining sentence embeddings. However, these models represent a sentence, only in terms of features of words or uni-grams in it. In contrast, our model (CSE) utilizes both features of words and n-grams to encode sentences, which is actually a generalization of these bag-of-words models. The extensive experiments demonstrate that CSE performs better than average models in transfer learning setting and exceeds the state of the art in supervised learning setting by initializing the parameters with the pre-trained sentence embeddings.",
}
@misc{zhang2018learning,
title={Learning Universal Sentence Representations with Mean-Max Attention Autoencoder},
author={Minghua Zhang and Yunfang Wu and Weikang Li and Wei Li},
year={2018},
eprint={1809.06590},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{conneau2017supervised,
title={Supervised Learning of Universal Sentence Representations from Natural Language Inference Data},
author={Alexis Conneau and Douwe Kiela and Holger Schwenk and Loic Barrault and Antoine Bordes},
year={2017},
eprint={1705.02364},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{DBLP:journals/corr/abs-1804-00079,
author = {Sandeep Subramanian and
Adam Trischler and
Yoshua Bengio and
Christopher J. Pal},
title = {Learning General Purpose Distributed Sentence Representations via
Large Scale Multi-task Learning},
journal = {CoRR},
volume = {abs/1804.00079},
year = {2018},
url = {http://arxiv.org/abs/1804.00079},
archivePrefix = {arXiv},
eprint = {1804.00079},
timestamp = {Mon, 13 Aug 2018 16:47:55 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-00079},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@incollection{NIPS2015_5950,
title = {Skip-Thought Vectors},
author = {Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Ruslan R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja},
booktitle = {Advances in Neural Information Processing Systems 28},
editor = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett},
pages = {3294--3302},
year = {2015},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5950-skip-thought-vectors.pdf}
}
@misc{ionescu2019vector,
title="{Vector of Locally-Aggregated Word Embeddings (VLAWE): A Novel Document-level Representation}",
author={Radu Tudor Ionescu and Andrei M. Butnaru},
year={2019},
eprint={1902.08850},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{Yang2018ZerotrainingSE,
title={Zero-training Sentence Embedding via Orthogonal Basis},
author={Ziyi Yang and Chenguang Zhu and Weizhu Chen},
journal={ArXiv},
year={2018},
volume={abs/1810.00438}
}
@misc{shen2018baseline,
title={Baseline Needs More Love: On Simple Word-Embedding-Based Models and Associated Pooling Mechanisms},
author={Dinghan Shen and Guoyin Wang and Wenlin Wang and Martin Renqiang Min and Qinliang Su and Yizhe Zhang and Chunyuan Li and Ricardo Henao and Lawrence Carin},
year={2018},
eprint={1805.09843},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{DBLP:journals/corr/abs-1902-06423,
author = {Florian Mai and
Lukas Galke and
Ansgar Scherp},
title = {{CBOW} Is Not All You Need: Combining {CBOW} with the Compositional
Matrix Space Model},
journal = {CoRR},
volume = {abs/1902.06423},
year = {2019},
url = {http://arxiv.org/abs/1902.06423},
archivePrefix = {arXiv},
eprint = {1902.06423},
timestamp = {Tue, 21 May 2019 18:03:37 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1902-06423},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-04262,
author = {Rashmi Nagpal and
Chetna Wadhwa and
Mallika Gupta and
Samiulla Shaikh and
Sameep Mehta and
Vikram Goyal},
title = {Extracting Fairness Policies from Legal Documents},
journal = {CoRR},
volume = {abs/1809.04262},
year = {2018},
url = {http://arxiv.org/abs/1809.04262},
archivePrefix = {arXiv},
eprint = {1809.04262},
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1809-04262},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@ARTICLE {doi:10.1080/00437956.1954.11659520,
author = "Zellig S. Harris",
title = "Distributional Structure",
journal = "WORD",
year = "1954",
volume = "10",
number = "2-3",
pages = "146-162",
publisher = "Routledge",
doi = "10.1080/00437956.1954.11659520",
}
@article{Halko:2011:FSR:2078879.2078881,
author = {Halko, N. and Martinsson, P. G. and Tropp, J. A.},
title = {Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions},
journal = {SIAM Rev.},
issue_date = {May 2011},
volume = {53},
number = {2},
month = may,
year = {2011},
issn = {0036-1445},
pages = {217--288},
numpages = {72},
url = {http://dx.doi.org/10.1137/090771806},
doi = {10.1137/090771806},
acmid = {2078881},
publisher = {Society for Industrial and Applied Mathematics},
address = {Philadelphia, PA, USA},
keywords = {Johnson-Lindenstrauss lemma, dimension reduction, eigenvalue decomposition, interpolative decomposition, matrix approximation, parallel algorithm, pass-efficient algorithm, principal component analysis, random matrix, randomized algorithm, rank-revealing QR factorization, singular value decomposition, streaming algorithm},
}
@book{books/daglib/0031897,
added-at = {2013-10-06T00:00:00.000+0200},
author = {Büttcher, Stefan and Clarke, Charles L. A. and Cormack, Gordon V.},
biburl = {https://www.bibsonomy.org/bibtex/2e679957b4a1bdf252c3a33296397f434/dblp},
ee = {http://mitpress.mit.edu/books/information-retrieval},
interhash = {8aba6a7cd5e81a4c68dc1d6c9102fe7b},
intrahash = {e679957b4a1bdf252c3a33296397f434},
isbn = {978-0-262-02651-2},
keywords = {dblp},
pages = {I-XXIV, 1-606},
publisher = {MIT Press},
timestamp = {2013-10-08T11:35:46.000+0200},
title = {Information Retrieval - Implementing and Evaluating Search Engines.},
year = 2010
}
@article{Wolf2019HuggingFacesTS,
title="{HuggingFace's Transformers: State-of-the-art Natural Language Processing}",
author={Thomas Wolf and Lysandre Debut and Victor Sanh and Julien Chaumond and Clement Delangue and Anthony Moi and Pierric Cistac and Tim Rault and R'emi Louf and Morgan Funtowicz and Jamie Brew},
journal={ArXiv},
year={2019},
volume={abs/1910.03771}
}
@misc{gillick2018endtoend,
title={End-to-End Retrieval in Continuous Space},
author={Daniel Gillick and Alessandro Presta and Gaurav Singh Tomar},
year={2018},
eprint={1811.08008},
archivePrefix={arXiv},
primaryClass={cs.IR}
}
@misc{almarwani2019efficient,
title={Efficient Sentence Embedding using Discrete Cosine Transform},
author={Nada Almarwani and Hanan Aldarmaki and Mona Diab},
year={2019},
eprint={1909.03104},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{wu2018word,
title="{Word Mover's Embedding: From Word2Vec to Document Embedding}",
author={Lingfei Wu and Ian E. H. Yen and Kun Xu and Fangli Xu and Avinash Balakrishnan and Pin-Yu Chen and Pradeep Ravikumar and Michael J. Witbrock},
year={2018},
eprint={1811.01713},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{yang2019xlnet,
abstract = {With the capability of modeling bidirectional contexts, denoising
autoencoding based pretraining like BERT achieves better performance than
pretraining approaches based on autoregressive language modeling. However,
relying on corrupting the input with masks, BERT neglects dependency between
the masked positions and suffers from a pretrain-finetune discrepancy. In light
of these pros and cons, we propose XLNet, a generalized autoregressive
pretraining method that (1) enables learning bidirectional contexts by
maximizing the expected likelihood over all permutations of the factorization
order and (2) overcomes the limitations of BERT thanks to its autoregressive
formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the
state-of-the-art autoregressive model, into pretraining. Empirically, XLNet
outperforms BERT on 20 tasks, often by a large margin, and achieves
state-of-the-art results on 18 tasks including question answering, natural
language inference, sentiment analysis, and document ranking.},
added-at = {2019-07-03T19:07:32.000+0200},
author = {Yang, Zhilin and Dai, Zihang and Yang, Yiming and Carbonell, Jaime and Salakhutdinov, Ruslan and Le, Quoc V.},
biburl = {https://www.bibsonomy.org/bibtex/2b758258da935db4bc1a57b5f6c9d94c6/deepforce},
description = {[1906.08237] XLNet: Generalized Autoregressive Pretraining for Language Understanding},
interhash = {cd85caa3241071a53ea5c86eadae8de8},
intrahash = {b758258da935db4bc1a57b5f6c9d94c6},
keywords = {language_modeling nlp tpu transfer_learning},
note = {cite arxiv:1906.08237Comment: Pretrained models and code are available at https://github.com/zihangdai/xlnet},
timestamp = {2019-07-03T19:07:32.000+0200},
title = "{XLNet: Generalized Autoregressive Pretraining for Language Understanding}",
url = {http://arxiv.org/abs/1906.08237},
year = 2019
}
@InProceedings{conneau2018xnli,
author = "Conneau, Alexis
and Rinott, Ruty
and Lample, Guillaume
and Williams, Adina
and Bowman, Samuel R.
and Schwenk, Holger
and Stoyanov, Veselin",
title = "{XNLI: Evaluating Cross-lingual Sentence Representations}",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods
in Natural Language Processing",
year = "2018",
publisher = "Association for Computational Linguistics",
location = "Brussels, Belgium",
}
@inproceedings{Radford2018ImprovingLU,
title={Improving Language Understanding by Generative Pre-Training},
author={Alec Radford},
year={2018}
}
@inproceedings{reimers-2019-sentence-bert,
title = "{Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "http://arxiv.org/abs/1908.10084",
}
@INPROCEEDINGS{Pennington14glove:global,
author = {Jeffrey Pennington and Richard Socher and Christopher D. Manning},
title = "{Glove: Global vectors for word representation}",
booktitle = {In EMNLP},
year = {2014}
}
@inproceedings{Kano2017OverviewOC,
title="{Overview of COLIEE 2017}",
author={Yoshinobu Kano and Mi Young Kim and Randy Goebel and Ken Satoh},
booktitle={COLIEE@ICAIL},
year={2017}
}
@inproceedings{snli:emnlp2015,
Author = {Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher and Manning, Christopher D.},
Booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
Publisher = {Association for Computational Linguistics},
Title = {A large annotated corpus for learning natural language inference},
Year = {2015}
}
@inproceedings{Williams2017ABC,
title={A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
author={Adina Williams and Nikita Nangia and Samuel R. Bowman},
booktitle={NAACL-HLT},
year={2017}
}
@inproceedings{Peters:2018,
author={Peters, Matthew E. and Neumann, Mark and Iyyer, Mohit and Gardner, Matt and Clark, Christopher and Lee, Kenton and Zettlemoyer, Luke},
title={Deep contextualized word representations},
booktitle={Proc. of NAACL},
year={2018}
}
@inproceedings{Wang2019GeneralizingFA,
title={Generalizing from a Few Examples: A Survey on Few-Shot Learning},
author={Yaqing Wang and Quanming Yao and James Kwok and Lionel M. Ni},
year={2019}
}
@article{DBLP:journals/corr/VaswaniSPUJGKP17,
author = {Ashish Vaswani and
Noam Shazeer and
Niki Parmar and
Jakob Uszkoreit and
Llion Jones and
Aidan N. Gomez and
Lukasz Kaiser and
Illia Polosukhin},
title = {Attention Is All You Need},
journal = {CoRR},
volume = {abs/1706.03762},
year = {2017},
url = {http://arxiv.org/abs/1706.03762},
archivePrefix = {arXiv},
eprint = {1706.03762},
timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/VaswaniSPUJGKP17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{arora2017asimple,
author = {Sanjeev Arora and Yingyu Liang and Tengyu Ma},
title = {A Simple but Tough-to-Beat Baseline for Sentence Embeddings},
booktitle = {International Conference on Learning Representations},
year = {2017}
}
@inproceedings{zhao2019moverscore,
title = "{MoverScore: Text Generation Evaluating with Contextualized Embeddings and Earth Mover Distance}",
month = {August},
year = {2019},
author = {Zhao, Wei and Peyrard, Maxime and Liu, Fei and Gao, Yang and Meyer, Christian M. and Eger, Steffen},
address = {Hong Kong, China},
publisher = {Association for Computational Linguistics},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing},
}
@article{gpt2,
added-at = {2019-02-27T03:35:25.000+0100},
author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
biburl = {https://www.bibsonomy.org/bibtex/2b30710316a8cfbae687672ea1f85c193/kirk86},
description = {Language Models are Unsupervised Multitask Learners},
interhash = {ce8168300081d74707849ed488e2a458},
intrahash = {b30710316a8cfbae687672ea1f85c193},
keywords = {learning multitask},
timestamp = {2019-02-27T03:35:25.000+0100},
title = {Language Models are Unsupervised Multitask Learners},
url = {https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf},
year = 2018
}
@article{lample2019cross,
title={Cross-lingual Language Model Pretraining},
author={Lample, Guillaume and Conneau, Alexis},
journal={arXiv preprint arXiv:1901.07291},
year={2019}
}
@InProceedings{pmlr-v37-kusnerb15,
title = {From Word Embeddings To Document Distances},
author = {Matt Kusner and Yu Sun and Nicholas Kolkin and Kilian Weinberger},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning},
pages = {957--966},
year = {2015},
editor = {Francis Bach and David Blei},
volume = {37},
series = {Proceedings of Machine Learning Research},
address = {Lille, France},
month = {07--09 Jul},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v37/kusnerb15.pdf},
url = {http://proceedings.mlr.press/v37/kusnerb15.html},
abstract = {We present the Word Movers Distance (WMD), a novel distance function between text documents. Our work is based on recent results in word embeddings that learn semantically meaningful representations for words from local co-occurrences in sentences. The WMD distance measures the dissimilarity between two text documents as the minimum amount of distance that the embedded words of one document need to "travel" to reach the embedded words of another document. We show that this distance metric can be cast as an instance of the Earth Movers Distance, a well studied transportation problem for which several highly efficient solvers have been developed. Our metric has no hyperparameters and is straight-forward to implement. Further, we demonstrate on eight real world document classification data sets, in comparison with seven state-of-the-art baselines, that the WMD metric leads to unprecedented low k-nearest neighbor document classification error rates.}
}
@inproceedings{Rabelo:2019:CST:3322640.3326741,
author = {Rabelo, Juliano and Kim, Mi-Young and Goebel, Randy},
title = {Combining Similarity and Transformer Methods for Case Law Entailment},
booktitle = {Proceedings of the Seventeenth International Conference on Artificial Intelligence and Law},
series = {ICAIL '19},
year = {2019},
isbn = {978-1-4503-6754-7},
location = {Montreal, QC, Canada},
pages = {290--296},
numpages = {7},
url = {http://doi.acm.org/10.1145/3322640.3326741},
doi = {10.1145/3322640.3326741},
acmid = {3326741},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {binary classification, document similarity, imbalanced datasets, legal textual entailment},
}
@article{DBLP:journals/corr/abs-1803-11175,
author = {Daniel Cer and
Yinfei Yang and
Sheng{-}yi Kong and
Nan Hua and
Nicole Limtiaco and
Rhomni St. John and
Noah Constant and
Mario Guajardo{-}Cespedes and
Steve Yuan and
Chris Tar and
Yun{-}Hsuan Sung and
Brian Strope and
Ray Kurzweil},
title = {Universal Sentence Encoder},
journal = {CoRR},
volume = {abs/1803.11175},
year = {2018},
url = {http://arxiv.org/abs/1803.11175},
archivePrefix = {arXiv},
eprint = {1803.11175},
timestamp = {Mon, 13 Aug 2018 16:46:40 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-11175},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{senin2008dynamic,
title={Dynamic time warping algorithm review},
author={Senin, Pavel},
year={2008},
publisher={Citeseer}
}
@incollection{Sakoe:1990:DPA:108235.108244,
author = {Sakoe, Hiroaki and Chiba, Seibi},
chapter = {Dynamic Programming Algorithm Optimization for Spoken Word Recognition},
title = {Readings in Speech Recognition},
editor = {Waibel, Alex and Lee, Kai-Fu},
year = {1990},
isbn = {1-55860-124-4},
pages = {159--165},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=108235.108244},
acmid = {108244},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
}
@article{muller2007dynamic,
title={Dynamic time warping},
author={M{\"u}ller, Meinard},
journal={Information retrieval for music and motion},
pages={69--84},
year={2007},
publisher={Springer}
}