1154 lines
44 KiB
BibTeX
1154 lines
44 KiB
BibTeX
|
|
|||
|
|
|||
|
@incollection { gonito2016,
|
|||
|
title = {Gonito.net -- Open Platform for Research Competition, Cooperation and Reproducibility},
|
|||
|
author = "Grali{\'n}ski, Filip and Jaworski, Rafa{\l} and Borchmann, {\L}ukasz and Wierzcho{\'n}, Piotr",
|
|||
|
editor = "Branco, António and Calzolari , Nicoletta and Choukri, Khalid",
|
|||
|
booktitle = {Proceedings of the 4REAL Workshop: Workshop on Research Results Reproducibility and Resources Citation in Science and Technology of Language},
|
|||
|
year = "2016",
|
|||
|
pages = "13-20"
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{stanislawek-etal-2019-named,
|
|||
|
title = "Named Entity Recognition - Is There a Glass Ceiling?",
|
|||
|
author = "Stanislawek, Tomasz and
|
|||
|
Wr{\'o}blewska, Anna and
|
|||
|
W{\'o}jcicka, Alicja and
|
|||
|
Ziembicki, Daniel and
|
|||
|
Biecek, Przemyslaw",
|
|||
|
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
|
|||
|
month = nov,
|
|||
|
year = "2019",
|
|||
|
address = "Hong Kong, China",
|
|||
|
publisher = "Association for Computational Linguistics",
|
|||
|
url = "https://www.aclweb.org/anthology/K19-1058",
|
|||
|
doi = "10.18653/v1/K19-1058",
|
|||
|
pages = "624--633",
|
|||
|
abstract = "Recent developments in Named Entity Recognition (NER) have resulted in better and better models. However, is there a glass ceiling? Do we know which types of errors are still hard or even impossible to correct? In this paper, we present a detailed analysis of the types of errors in state-of-the-art machine learning (ML) methods. Our study illustrates weak and strong points of the Stanford, CMU, FLAIR, ELMO and BERT models, as well as their shared limitations. We also introduce new techniques for improving annotation, training process, and for checking model quality and stability.",
|
|||
|
}
|
|||
|
|
|||
|
@misc{borchmann2019searching,
|
|||
|
title={Searching for Legal Clauses by Analogy. Few-shot Semantic Retrieval Shared Task},
|
|||
|
author={Łukasz Borchmann and Dawid Wiśniewski and Andrzej Gretkowski and Izabela Kosmala and Dawid Jurkiewicz and Łukasz Szałkiewicz and Gabriela Pałka and Karol Kaczmarek and Agnieszka Kaliska and Filip Graliński},
|
|||
|
year={2019},
|
|||
|
eprint={1911.03911},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{gralinski-etal-2019-geval,
|
|||
|
title = "{GE}val: Tool for Debugging {NLP} Datasets and Models",
|
|||
|
author = "Grali{\'n}ski, Filip and
|
|||
|
Wr{\'o}blewska, Anna and
|
|||
|
Stanis{\l}awek, Tomasz and
|
|||
|
Grabowski, Kamil and
|
|||
|
G{\'o}recki, Tomasz",
|
|||
|
booktitle = "Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP",
|
|||
|
month = aug,
|
|||
|
year = "2019",
|
|||
|
address = "Florence, Italy",
|
|||
|
publisher = "Association for Computational Linguistics",
|
|||
|
url = "https://www.aclweb.org/anthology/W19-4826",
|
|||
|
pages = "254--262",
|
|||
|
abstract = "This paper presents a simple but general and effective method to debug the output of machine learning (ML) supervised models, including neural networks. The algorithm looks for features that lower the evaluation metric in such a way that it cannot be ascribed to chance (as measured by their p-values). Using this method {--} implemented as MLEval tool {--} you can find: (1) anomalies in test sets, (2) issues in preprocessing, (3) problems in the ML model itself. It can give you an insight into what can be improved in the datasets and/or the model. The same method can be used to compare ML models or different versions of the same model. We present the tool, the theory behind it and use cases for text-based models of various types.",
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Borchmann2018,
|
|||
|
title = {Approaching nested named entity recognition with parallel LSTM-CRFs},
|
|||
|
author = {Łukasz Borchmann and Andrzej Gretkowski and Filip Graliński},
|
|||
|
editor = {Maciej Ogrodniczuk and Łukasz Kobyliński},
|
|||
|
url = {http://www.borchmann.pl/wp-content/uploads/2018/10/borchmann-lukasz.pdf},
|
|||
|
year = {2018},
|
|||
|
date = {2018-10-19},
|
|||
|
booktitle = {Proceedings of the PolEval 2018 Workshop},
|
|||
|
pages = {63-73},
|
|||
|
publisher = {Institute of Computer Science, Polish Academy of Science},
|
|||
|
address = {Warszawa},
|
|||
|
keywords = {},
|
|||
|
pubstate = {published},
|
|||
|
tppubtype = {inproceedings}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{DBLP:journals/corr/HewlettLJPFHKB16,
|
|||
|
author = {Daniel Hewlett and
|
|||
|
Alexandre Lacoste and
|
|||
|
Llion Jones and
|
|||
|
Illia Polosukhin and
|
|||
|
Andrew Fandrianto and
|
|||
|
Jay Han and
|
|||
|
Matthew Kelcey and
|
|||
|
David Berthelot},
|
|||
|
title = "{WikiReading: {A} Novel Large-scale Language Understanding Task over
|
|||
|
Wikipedia}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1608.03542},
|
|||
|
year = {2016},
|
|||
|
url = {http://arxiv.org/abs/1608.03542},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1608.03542},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:46:41 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/HewlettLJPFHKB16},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{byte-level2018kenter,
|
|||
|
title={Byte-level Machine Reading across Morphologically Varied Languages},
|
|||
|
author={Tom Kenter and Llion Jones and Daniel Hewlett},
|
|||
|
booktitle={Proceedings of the The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
|
|||
|
year={2018}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/SutskeverVL14,
|
|||
|
author = {Ilya Sutskever and
|
|||
|
Oriol Vinyals and
|
|||
|
Quoc V. Le},
|
|||
|
title = "{Sequence to Sequence Learning with Neural Networks}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1409.3215},
|
|||
|
year = {2014},
|
|||
|
url = {http://arxiv.org/abs/1409.3215},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1409.3215},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:48:06 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/SutskeverVL14},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1811-04284,
|
|||
|
author = {Hainan Xu and
|
|||
|
Shuoyang Ding and
|
|||
|
Shinji Watanabe},
|
|||
|
title = "{Improving End-to-end Speech Recognition with Pronunciation-assisted
|
|||
|
Sub-word Modeling}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1811.04284},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1811.04284},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1811.04284},
|
|||
|
timestamp = {Fri, 23 Nov 2018 12:43:51 +0100},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1811-04284},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{holt-chisholm-2018-extracting,
|
|||
|
title = "Extracting structured data from invoices",
|
|||
|
author = "Holt, Xavier and
|
|||
|
Chisholm, Andrew",
|
|||
|
booktitle = "Proceedings of the Australasian Language Technology Association Workshop 2018",
|
|||
|
month = dec,
|
|||
|
year = "2018",
|
|||
|
address = "Dunedin, New Zealand",
|
|||
|
url = "https://www.aclweb.org/anthology/U18-1006",
|
|||
|
pages = "53--59",
|
|||
|
abstract = "Business documents encode a wealth of information in a format tailored to human consumption {--} i.e. aesthetically disbursed natural language text, graphics and tables. We address the task of extracting key fields (e.g. the amount due on an invoice) from a wide-variety of potentially unseen document formats. In contrast to traditional template driven extraction systems, we introduce a content-driven machine-learning approach which is both robust to noise and generalises to unseen document formats. In a comparison of our approach with alternative invoice extraction systems, we observe an absolute accuracy gain of 20{\textbackslash}{\%} across compared fields, and a 25{\textbackslash}{\%}{--}94{\textbackslash}{\%} reduction in extraction latency.",
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1907-11692,
|
|||
|
author = {Yinhan Liu and
|
|||
|
Myle Ott and
|
|||
|
Naman Goyal and
|
|||
|
Jingfei Du and
|
|||
|
Mandar Joshi and
|
|||
|
Danqi Chen and
|
|||
|
Omer Levy and
|
|||
|
Mike Lewis and
|
|||
|
Luke Zettlemoyer and
|
|||
|
Veselin Stoyanov},
|
|||
|
title = "{RoBERTa: {A} Robustly Optimized {BERT} Pretraining Approach}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1907.11692},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1907.11692},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1907.11692},
|
|||
|
timestamp = {Thu, 01 Aug 2019 08:59:33 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-11692},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{ke2018focused,
|
|||
|
title={Focused hierarchical rnns for conditional sequence processing},
|
|||
|
author={Ke, Nan Rosemary and Zolna, Konrad and Sordoni, Alessandro and Lin, Zhouhan and Trischler, Adam and Bengio, Yoshua and Pineau, Joelle and Charlin, Laurent and Pal, Chris},
|
|||
|
journal={arXiv preprint arXiv:1806.04342},
|
|||
|
year={2018}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1901-02860,
|
|||
|
author = {Zihang Dai and
|
|||
|
Zhilin Yang and
|
|||
|
Yiming Yang and
|
|||
|
Jaime G. Carbonell and
|
|||
|
Quoc V. Le and
|
|||
|
Ruslan Salakhutdinov},
|
|||
|
title = "{Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1901.02860},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1901.02860},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1901.02860},
|
|||
|
timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1901-02860},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1906-08237,
|
|||
|
author = {Zhilin Yang and
|
|||
|
Zihang Dai and
|
|||
|
Yiming Yang and
|
|||
|
Jaime G. Carbonell and
|
|||
|
Ruslan Salakhutdinov and
|
|||
|
Quoc V. Le},
|
|||
|
title = "{XLNet: Generalized Autoregressive Pretraining for Language Understanding}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1906.08237},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1906.08237},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1906.08237},
|
|||
|
timestamp = {Mon, 24 Jun 2019 17:28:45 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-08237},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1907-06170,
|
|||
|
author = {Marcin Junczys{-}Dowmunt},
|
|||
|
title = "{Microsoft Translator at {WMT} 2019: Towards Large-Scale Document-Level
|
|||
|
Neural Machine Translation}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1907.06170},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1907.06170},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1907.06170},
|
|||
|
timestamp = {Wed, 17 Jul 2019 10:27:36 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-06170},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{akbik2018coling,
|
|||
|
title={Contextual String Embeddings for Sequence Labeling},
|
|||
|
author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
|
|||
|
booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
|
|||
|
pages = {1638--1649},
|
|||
|
year = {2018}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{luo2019named,
|
|||
|
title={Named Entity Recognition Only from Word Embeddings},
|
|||
|
author={Luo, Ying and Zhao, Hai and Zhan, Junlang},
|
|||
|
journal={arXiv preprint arXiv:1909.00164},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{tu2018learning,
|
|||
|
title={Learning to remember translation history with a continuous cache},
|
|||
|
author={Tu, Zhaopeng and Liu, Yang and Shi, Shuming and Zhang, Tong},
|
|||
|
journal={Transactions of the Association for Computational Linguistics},
|
|||
|
volume={6},
|
|||
|
pages={407--420},
|
|||
|
year={2018},
|
|||
|
publisher={MIT Press}
|
|||
|
}
|
|||
|
|
|||
|
@article{miculicich2018document,
|
|||
|
title={Document-level neural machine translation with hierarchical attention networks},
|
|||
|
author={Miculicich, Lesly and Ram, Dhananjay and Pappas, Nikolaos and Henderson, James},
|
|||
|
journal={arXiv preprint arXiv:1809.01576},
|
|||
|
year={2018}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1907-05242,
|
|||
|
author = {Guillaume Lample and
|
|||
|
Alexandre Sablayrolles and
|
|||
|
Marc'Aurelio Ranzato and
|
|||
|
Ludovic Denoyer and
|
|||
|
Herv{\'{e}} J{\'{e}}gou},
|
|||
|
title = "{Large Memory Layers with Product Keys}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1907.05242},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1907.05242},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1907.05242},
|
|||
|
timestamp = {Wed, 17 Jul 2019 10:27:36 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-05242},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1907-01686,
|
|||
|
author = {Xin Zhang and
|
|||
|
An Yang and
|
|||
|
Sujian Li and
|
|||
|
Yizhong Wang},
|
|||
|
title = {Machine Reading Comprehension: a Literature Review},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1907.01686},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1907.01686},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1907.01686},
|
|||
|
timestamp = {Mon, 08 Jul 2019 14:12:33 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-01686},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1810-04805,
|
|||
|
author = {Jacob Devlin and
|
|||
|
Ming{-}Wei Chang and
|
|||
|
Kenton Lee and
|
|||
|
Kristina Toutanova},
|
|||
|
title = "{{BERT:} Pre-training of Deep Bidirectional Transformers for Language
|
|||
|
Understanding}",
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1810.04805},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1810.04805},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1810.04805},
|
|||
|
timestamp = {Tue, 30 Oct 2018 20:39:56 +0100},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1810-04805},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1809-08799,
|
|||
|
author = {Anoop R. Katti and
|
|||
|
Christian Reisswig and
|
|||
|
Cordula Guder and
|
|||
|
Sebastian Brarda and
|
|||
|
Steffen Bickel and
|
|||
|
Johannes H{\"{o}}hne and
|
|||
|
Jean Baptiste Faddoul},
|
|||
|
title = {Chargrid: Towards Understanding 2D Documents},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1809.08799},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1809.08799},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1809.08799},
|
|||
|
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1809-08799},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/HamiltonYL17,
|
|||
|
author = {William L. Hamilton and
|
|||
|
Rex Ying and
|
|||
|
Jure Leskovec},
|
|||
|
title = {Inductive Representation Learning on Large Graphs},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1706.02216},
|
|||
|
year = {2017},
|
|||
|
url = {http://arxiv.org/abs/1706.02216},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1706.02216},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:46:12 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/HamiltonYL17},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1810-00826,
|
|||
|
author = {Keyulu Xu and
|
|||
|
Weihua Hu and
|
|||
|
Jure Leskovec and
|
|||
|
Stefanie Jegelka},
|
|||
|
title = {How Powerful are Graph Neural Networks?},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1810.00826},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1810.00826},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1810.00826},
|
|||
|
timestamp = {Tue, 30 Oct 2018 10:49:09 +0100},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1810-00826},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{matuschek2008measuring,
|
|||
|
title={Measuring text similarity with dynamic time warping},
|
|||
|
author={Matuschek, Michael and Schl{\"u}ter, Tim and Conrad, Stefan},
|
|||
|
booktitle={Proceedings of the 2008 international symposium on Database engineering \& applications},
|
|||
|
pages={263--267},
|
|||
|
year={2008},
|
|||
|
organization={ACM}
|
|||
|
}
|
|||
|
|
|||
|
@article{simonyan2014very,
|
|||
|
title={Very deep convolutional networks for large-scale image recognition},
|
|||
|
author={Simonyan, Karen and Zisserman, Andrew},
|
|||
|
journal={arXiv preprint arXiv:1409.1556},
|
|||
|
year={2014}
|
|||
|
}
|
|||
|
|
|||
|
@article{lecun2015lenet,
|
|||
|
title={LeNet-5, convolutional neural networks},
|
|||
|
author={LeCun, Yann and others},
|
|||
|
journal={URL: http://yann. lecun. com/exdb/lenet},
|
|||
|
volume={20},
|
|||
|
pages={5},
|
|||
|
year={2015}
|
|||
|
}
|
|||
|
|
|||
|
@article{bergstra2012random,
|
|||
|
title={Random Search for Hyper-Parameter Optimization},
|
|||
|
author={Bergstra, James and Bengio, Yoshua},
|
|||
|
journal={Journal of Machine Learning Research},
|
|||
|
volume={13},
|
|||
|
pages={281--305},
|
|||
|
year={2012}
|
|||
|
}
|
|||
|
|
|||
|
@incollection{hinton2012practical,
|
|||
|
title={A practical guide to training restricted Boltzmann machines},
|
|||
|
author={Hinton, Geoffrey E},
|
|||
|
booktitle={Neural networks: Tricks of the trade},
|
|||
|
pages={599--619},
|
|||
|
year={2012},
|
|||
|
publisher={Springer}
|
|||
|
}
|
|||
|
|
|||
|
@book{hedges2014statistical,
|
|||
|
title={Statistical methods for meta-analysis},
|
|||
|
author={Hedges, Larry V and Olkin, Ingram},
|
|||
|
year={2014},
|
|||
|
publisher={Academic press}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{koehn2004statistical,
|
|||
|
title={Statistical significance tests for machine translation evaluation},
|
|||
|
author={Koehn, Philipp},
|
|||
|
booktitle={Proceedings of the 2004 conference on empirical methods in natural language processing},
|
|||
|
pages={388--395},
|
|||
|
year={2004}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1904-01685,
|
|||
|
author = {Jeremy Nixon and
|
|||
|
Mike Dusenberry and
|
|||
|
Linchuan Zhang and
|
|||
|
Ghassen Jerfel and
|
|||
|
Dustin Tran},
|
|||
|
title = {Measuring Calibration in Deep Learning},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1904.01685},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1904.01685},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1904.01685},
|
|||
|
timestamp = {Wed, 24 Apr 2019 12:21:25 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1904-01685},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{settles2012active,
|
|||
|
title={Active learning},
|
|||
|
author={Settles, Burr},
|
|||
|
journal={Synthesis Lectures on Artificial Intelligence and Machine Learning},
|
|||
|
volume={6},
|
|||
|
number={1},
|
|||
|
pages={1--114},
|
|||
|
year={2012},
|
|||
|
publisher={Morgan \& Claypool Publishers}
|
|||
|
}
|
|||
|
|
|||
|
@incollection{NIPS2017_7062,
|
|||
|
title = {A Unified Approach to Interpreting Model Predictions},
|
|||
|
author = {Lundberg, Scott M and Lee, Su-In},
|
|||
|
booktitle = {Advances in Neural Information Processing Systems 30},
|
|||
|
editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
|
|||
|
pages = {4765--4774},
|
|||
|
year = {2017},
|
|||
|
publisher = {Curran Associates, Inc.},
|
|||
|
url = {http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf}
|
|||
|
}
|
|||
|
|
|||
|
@article{austin2014graphical,
|
|||
|
title={Graphical assessment of internal and external calibration of logistic regression models by using loess smoothers},
|
|||
|
author={Austin, Peter C and Steyerberg, Ewout W},
|
|||
|
journal={Statistics in medicine},
|
|||
|
volume={33},
|
|||
|
number={3},
|
|||
|
pages={517--535},
|
|||
|
year={2014},
|
|||
|
publisher={Wiley Online Library}
|
|||
|
}
|
|||
|
|
|||
|
@article{lan2019albert,
|
|||
|
title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
|
|||
|
author={Lan, Zhenzhong and Chen, Mingda and Goodman, Sebastian and Gimpel, Kevin and Sharma, Piyush and Soricut, Radu},
|
|||
|
journal={arXiv preprint arXiv:1909.11942},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{jiao2019tinybert,
|
|||
|
title={TinyBERT: Distilling BERT for Natural Language Understanding},
|
|||
|
author={Jiao, Xiaoqi and Yin, Yichun and Shang, Lifeng and Jiang, Xin and Chen, Xiao and Li, Linlin and Wang, Fang and Liu, Qun},
|
|||
|
journal={arXiv preprint arXiv:1909.10351},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{wiewel2019localizing,
|
|||
|
title={Localizing Catastrophic Forgetting in Neural Networks},
|
|||
|
author={Wiewel, Felix and Yang, Bin},
|
|||
|
journal={arXiv preprint arXiv:1906.02568},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{hinton2015distilling,
|
|||
|
title={Distilling the knowledge in a neural network},
|
|||
|
author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
|
|||
|
journal={arXiv preprint arXiv:1503.02531},
|
|||
|
year={2015}
|
|||
|
}
|
|||
|
|
|||
|
@article{hubara2017quantized,
|
|||
|
title={Quantized neural networks: Training neural networks with low precision weights and activations},
|
|||
|
author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
|
|||
|
journal={The Journal of Machine Learning Research},
|
|||
|
volume={18},
|
|||
|
number={1},
|
|||
|
pages={6869--6898},
|
|||
|
year={2017},
|
|||
|
publisher={JMLR. org}
|
|||
|
}
|
|||
|
|
|||
|
@article{bao2019few,
|
|||
|
title={Few-shot Text Classification with Distributional Signatures},
|
|||
|
author={Bao, Yujia and Wu, Menghua and Chang, Shiyu and Barzilay, Regina},
|
|||
|
journal={arXiv preprint arXiv:1908.06039},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/NarayanGCS17,
|
|||
|
author = {Shashi Narayan and
|
|||
|
Claire Gardent and
|
|||
|
Shay B. Cohen and
|
|||
|
Anastasia Shimorina},
|
|||
|
title = {Split and Rephrase},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1707.06971},
|
|||
|
year = {2017},
|
|||
|
url = {http://arxiv.org/abs/1707.06971},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1707.06971},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:48:49 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/NarayanGCS17},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1906-01038,
|
|||
|
author = {Christina Niklaus and
|
|||
|
Matthias Cetto and
|
|||
|
Andr{\'{e}} Freitas and
|
|||
|
Siegfried Handschuh},
|
|||
|
title = {Transforming Complex Sentences into a Semantic Hierarchy},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1906.01038},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1906.01038},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1906.01038},
|
|||
|
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-01038},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{goldberger2005neighbourhood,
|
|||
|
title={Neighbourhood components analysis},
|
|||
|
author={Goldberger, Jacob and Hinton, Geoffrey E and Roweis, Sam T and Salakhutdinov, Ruslan R},
|
|||
|
booktitle={Advances in neural information processing systems},
|
|||
|
pages={513--520},
|
|||
|
year={2005}
|
|||
|
}
|
|||
|
|
|||
|
@article{hyvarinen2000independent,
|
|||
|
title={Independent component analysis: algorithms and applications},
|
|||
|
author={Hyv{\"a}rinen, Aapo and Oja, Erkki},
|
|||
|
journal={Neural networks},
|
|||
|
volume={13},
|
|||
|
number={4-5},
|
|||
|
pages={411--430},
|
|||
|
year={2000},
|
|||
|
publisher={Elsevier}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1804-00079,
|
|||
|
author = {Sandeep Subramanian and
|
|||
|
Adam Trischler and
|
|||
|
Yoshua Bengio and
|
|||
|
Christopher J. Pal},
|
|||
|
title = {Learning General Purpose Distributed Sentence Representations via
|
|||
|
Large Scale Multi-task Learning},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1804.00079},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1804.00079},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1804.00079},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:47:55 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-00079},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/ConneauKSBB17,
|
|||
|
author = {Alexis Conneau and
|
|||
|
Douwe Kiela and
|
|||
|
Holger Schwenk and
|
|||
|
Lo{\"{\i}}c Barrault and
|
|||
|
Antoine Bordes},
|
|||
|
title = {Supervised Learning of Universal Sentence Representations from Natural
|
|||
|
Language Inference Data},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1705.02364},
|
|||
|
year = {2017},
|
|||
|
url = {http://arxiv.org/abs/1705.02364},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1705.02364},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:48:46 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/ConneauKSBB17},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
@proceedings{2019:3322640,
|
|||
|
title = {ICAIL '19: Proceedings of the Seventeenth International Conference on Artificial Intelligence and Law},
|
|||
|
year = {2019},
|
|||
|
isbn = {978-1-4503-6754-7},
|
|||
|
location = {Montreal, QC, Canada},
|
|||
|
publisher = {ACM},
|
|||
|
address = {New York, NY, USA},
|
|||
|
key = {{$\!\!$}} ,
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{GOODMAN2001403,
|
|||
|
title = "A bit of progress in language modeling",
|
|||
|
journal = "Computer Speech \& Language",
|
|||
|
volume = "15",
|
|||
|
number = "4",
|
|||
|
pages = "403-434",
|
|||
|
year = "2001",
|
|||
|
issn = "0885-2308",
|
|||
|
doi = "10.1006/csla.2001.0174",
|
|||
|
OPTurl = "http://www.sciencedirect.com/science/article/pii/S0885230801901743",
|
|||
|
author = "Joshua T. Goodman"
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/cs-CL-9905001,
|
|||
|
author = {Rebecca Hwa},
|
|||
|
title = {Supervised Grammar Induction Using Training Data with Limited Constituent Information},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {cs.CL/9905001},
|
|||
|
note = {Version 1},
|
|||
|
year = {1999},
|
|||
|
url = {http://arxiv.org/abs/cs.CL/9905001},
|
|||
|
timestamp = {Wed, 07 Jun 2017 14:41:01 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/cs-CL-9905001},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@book{Jurafsky+Martin:2009a,
|
|||
|
author = {Jurafsky, Daniel and Martin, James H.},
|
|||
|
title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition},
|
|||
|
publisher = {Pearson Prentice Hall},
|
|||
|
year = 2009,
|
|||
|
edition = {Second}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Maxwell2008ConceptAC,
|
|||
|
title={Concept and Context in Legal Information Retrieval},
|
|||
|
author={K. Tamsin Maxwell and Burkhard Schafer},
|
|||
|
booktitle={JURIX},
|
|||
|
year={2008}
|
|||
|
}
|
|||
|
|
|||
|
@misc{41224,
|
|||
|
title = {Efficient Estimation of Word Representations in Vector Space},
|
|||
|
author = {Tomas Mikolov and Kai Chen and Greg S. Corrado and Jeffrey Dean},
|
|||
|
year = {2013},
|
|||
|
URL = {http://arxiv.org/abs/1301.3781}
|
|||
|
}
|
|||
|
|
|||
|
@misc{wieting2019training,
|
|||
|
title={No Training Required: Exploring Random Encoders for Sentence Classification},
|
|||
|
author={John Wieting and Douwe Kiela},
|
|||
|
year={2019},
|
|||
|
eprint={1901.10444},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{jiao-etal-2018-convolutional,
|
|||
|
title = "Convolutional Neural Network for Universal Sentence Embeddings",
|
|||
|
author = "Jiao, Xiaoqi and
|
|||
|
Wang, Fang and
|
|||
|
Feng, Dan",
|
|||
|
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
|
|||
|
month = aug,
|
|||
|
year = "2018",
|
|||
|
address = "Santa Fe, New Mexico, USA",
|
|||
|
publisher = "Association for Computational Linguistics",
|
|||
|
url = "https://www.aclweb.org/anthology/C18-1209",
|
|||
|
pages = "2470--2481",
|
|||
|
abstract = "This paper proposes a simple CNN model for creating general-purpose sentence embeddings that can transfer easily across domains and can also act as effective initialization for downstream tasks. Recently, averaging the embeddings of words in a sentence has proven to be a surprisingly successful and efficient way of obtaining sentence embeddings. However, these models represent a sentence, only in terms of features of words or uni-grams in it. In contrast, our model (CSE) utilizes both features of words and n-grams to encode sentences, which is actually a generalization of these bag-of-words models. The extensive experiments demonstrate that CSE performs better than average models in transfer learning setting and exceeds the state of the art in supervised learning setting by initializing the parameters with the pre-trained sentence embeddings.",
|
|||
|
}
|
|||
|
|
|||
|
@misc{zhang2018learning,
|
|||
|
title={Learning Universal Sentence Representations with Mean-Max Attention Autoencoder},
|
|||
|
author={Minghua Zhang and Yunfang Wu and Weikang Li and Wei Li},
|
|||
|
year={2018},
|
|||
|
eprint={1809.06590},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@misc{conneau2017supervised,
|
|||
|
title={Supervised Learning of Universal Sentence Representations from Natural Language Inference Data},
|
|||
|
author={Alexis Conneau and Douwe Kiela and Holger Schwenk and Loic Barrault and Antoine Bordes},
|
|||
|
year={2017},
|
|||
|
eprint={1705.02364},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1804-00079,
|
|||
|
author = {Sandeep Subramanian and
|
|||
|
Adam Trischler and
|
|||
|
Yoshua Bengio and
|
|||
|
Christopher J. Pal},
|
|||
|
title = {Learning General Purpose Distributed Sentence Representations via
|
|||
|
Large Scale Multi-task Learning},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1804.00079},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1804.00079},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1804.00079},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:47:55 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-00079},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@incollection{NIPS2015_5950,
|
|||
|
title = {Skip-Thought Vectors},
|
|||
|
author = {Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Ruslan R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja},
|
|||
|
booktitle = {Advances in Neural Information Processing Systems 28},
|
|||
|
editor = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett},
|
|||
|
pages = {3294--3302},
|
|||
|
year = {2015},
|
|||
|
publisher = {Curran Associates, Inc.},
|
|||
|
url = {http://papers.nips.cc/paper/5950-skip-thought-vectors.pdf}
|
|||
|
}
|
|||
|
|
|||
|
@misc{ionescu2019vector,
|
|||
|
title="{Vector of Locally-Aggregated Word Embeddings (VLAWE): A Novel Document-level Representation}",
|
|||
|
author={Radu Tudor Ionescu and Andrei M. Butnaru},
|
|||
|
year={2019},
|
|||
|
eprint={1902.08850},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@article{Yang2018ZerotrainingSE,
|
|||
|
title={Zero-training Sentence Embedding via Orthogonal Basis},
|
|||
|
author={Ziyi Yang and Chenguang Zhu and Weizhu Chen},
|
|||
|
journal={ArXiv},
|
|||
|
year={2018},
|
|||
|
volume={abs/1810.00438}
|
|||
|
}
|
|||
|
|
|||
|
@misc{shen2018baseline,
|
|||
|
title={Baseline Needs More Love: On Simple Word-Embedding-Based Models and Associated Pooling Mechanisms},
|
|||
|
author={Dinghan Shen and Guoyin Wang and Wenlin Wang and Martin Renqiang Min and Qinliang Su and Yizhe Zhang and Chunyuan Li and Ricardo Henao and Lawrence Carin},
|
|||
|
year={2018},
|
|||
|
eprint={1805.09843},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1902-06423,
|
|||
|
author = {Florian Mai and
|
|||
|
Lukas Galke and
|
|||
|
Ansgar Scherp},
|
|||
|
title = {{CBOW} Is Not All You Need: Combining {CBOW} with the Compositional
|
|||
|
Matrix Space Model},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1902.06423},
|
|||
|
year = {2019},
|
|||
|
url = {http://arxiv.org/abs/1902.06423},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1902.06423},
|
|||
|
timestamp = {Tue, 21 May 2019 18:03:37 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1902-06423},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1809-04262,
|
|||
|
author = {Rashmi Nagpal and
|
|||
|
Chetna Wadhwa and
|
|||
|
Mallika Gupta and
|
|||
|
Samiulla Shaikh and
|
|||
|
Sameep Mehta and
|
|||
|
Vikram Goyal},
|
|||
|
title = {Extracting Fairness Policies from Legal Documents},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1809.04262},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1809.04262},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1809.04262},
|
|||
|
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1809-04262},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@ARTICLE {doi:10.1080/00437956.1954.11659520,
|
|||
|
author = "Zellig S. Harris",
|
|||
|
title = "Distributional Structure",
|
|||
|
journal = "WORD",
|
|||
|
year = "1954",
|
|||
|
volume = "10",
|
|||
|
number = "2-3",
|
|||
|
pages = "146-162",
|
|||
|
publisher = "Routledge",
|
|||
|
doi = "10.1080/00437956.1954.11659520",
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{Halko:2011:FSR:2078879.2078881,
|
|||
|
author = {Halko, N. and Martinsson, P. G. and Tropp, J. A.},
|
|||
|
title = {Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions},
|
|||
|
journal = {SIAM Rev.},
|
|||
|
issue_date = {May 2011},
|
|||
|
volume = {53},
|
|||
|
number = {2},
|
|||
|
month = may,
|
|||
|
year = {2011},
|
|||
|
issn = {0036-1445},
|
|||
|
pages = {217--288},
|
|||
|
numpages = {72},
|
|||
|
url = {http://dx.doi.org/10.1137/090771806},
|
|||
|
doi = {10.1137/090771806},
|
|||
|
acmid = {2078881},
|
|||
|
publisher = {Society for Industrial and Applied Mathematics},
|
|||
|
address = {Philadelphia, PA, USA},
|
|||
|
keywords = {Johnson-Lindenstrauss lemma, dimension reduction, eigenvalue decomposition, interpolative decomposition, matrix approximation, parallel algorithm, pass-efficient algorithm, principal component analysis, random matrix, randomized algorithm, rank-revealing QR factorization, singular value decomposition, streaming algorithm},
|
|||
|
}
|
|||
|
|
|||
|
@book{books/daglib/0031897,
|
|||
|
added-at = {2013-10-06T00:00:00.000+0200},
|
|||
|
author = {Büttcher, Stefan and Clarke, Charles L. A. and Cormack, Gordon V.},
|
|||
|
biburl = {https://www.bibsonomy.org/bibtex/2e679957b4a1bdf252c3a33296397f434/dblp},
|
|||
|
ee = {http://mitpress.mit.edu/books/information-retrieval},
|
|||
|
interhash = {8aba6a7cd5e81a4c68dc1d6c9102fe7b},
|
|||
|
intrahash = {e679957b4a1bdf252c3a33296397f434},
|
|||
|
isbn = {978-0-262-02651-2},
|
|||
|
keywords = {dblp},
|
|||
|
pages = {I-XXIV, 1-606},
|
|||
|
publisher = {MIT Press},
|
|||
|
timestamp = {2013-10-08T11:35:46.000+0200},
|
|||
|
title = {Information Retrieval - Implementing and Evaluating Search Engines.},
|
|||
|
year = 2010
|
|||
|
}
|
|||
|
|
|||
|
@article{Wolf2019HuggingFacesTS,
|
|||
|
title="{HuggingFace's Transformers: State-of-the-art Natural Language Processing}",
|
|||
|
author={Thomas Wolf and Lysandre Debut and Victor Sanh and Julien Chaumond and Clement Delangue and Anthony Moi and Pierric Cistac and Tim Rault and R'emi Louf and Morgan Funtowicz and Jamie Brew},
|
|||
|
journal={ArXiv},
|
|||
|
year={2019},
|
|||
|
volume={abs/1910.03771}
|
|||
|
}
|
|||
|
|
|||
|
@misc{gillick2018endtoend,
|
|||
|
title={End-to-End Retrieval in Continuous Space},
|
|||
|
author={Daniel Gillick and Alessandro Presta and Gaurav Singh Tomar},
|
|||
|
year={2018},
|
|||
|
eprint={1811.08008},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.IR}
|
|||
|
}
|
|||
|
|
|||
|
@misc{almarwani2019efficient,
|
|||
|
title={Efficient Sentence Embedding using Discrete Cosine Transform},
|
|||
|
author={Nada Almarwani and Hanan Aldarmaki and Mona Diab},
|
|||
|
year={2019},
|
|||
|
eprint={1909.03104},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@misc{wu2018word,
|
|||
|
title="{Word Mover's Embedding: From Word2Vec to Document Embedding}",
|
|||
|
author={Lingfei Wu and Ian E. H. Yen and Kun Xu and Fangli Xu and Avinash Balakrishnan and Pin-Yu Chen and Pradeep Ravikumar and Michael J. Witbrock},
|
|||
|
year={2018},
|
|||
|
eprint={1811.01713},
|
|||
|
archivePrefix={arXiv},
|
|||
|
primaryClass={cs.CL}
|
|||
|
}
|
|||
|
|
|||
|
@misc{yang2019xlnet,
|
|||
|
abstract = {With the capability of modeling bidirectional contexts, denoising
|
|||
|
autoencoding based pretraining like BERT achieves better performance than
|
|||
|
pretraining approaches based on autoregressive language modeling. However,
|
|||
|
relying on corrupting the input with masks, BERT neglects dependency between
|
|||
|
the masked positions and suffers from a pretrain-finetune discrepancy. In light
|
|||
|
of these pros and cons, we propose XLNet, a generalized autoregressive
|
|||
|
pretraining method that (1) enables learning bidirectional contexts by
|
|||
|
maximizing the expected likelihood over all permutations of the factorization
|
|||
|
order and (2) overcomes the limitations of BERT thanks to its autoregressive
|
|||
|
formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the
|
|||
|
state-of-the-art autoregressive model, into pretraining. Empirically, XLNet
|
|||
|
outperforms BERT on 20 tasks, often by a large margin, and achieves
|
|||
|
state-of-the-art results on 18 tasks including question answering, natural
|
|||
|
language inference, sentiment analysis, and document ranking.},
|
|||
|
added-at = {2019-07-03T19:07:32.000+0200},
|
|||
|
author = {Yang, Zhilin and Dai, Zihang and Yang, Yiming and Carbonell, Jaime and Salakhutdinov, Ruslan and Le, Quoc V.},
|
|||
|
biburl = {https://www.bibsonomy.org/bibtex/2b758258da935db4bc1a57b5f6c9d94c6/deepforce},
|
|||
|
description = {[1906.08237] XLNet: Generalized Autoregressive Pretraining for Language Understanding},
|
|||
|
interhash = {cd85caa3241071a53ea5c86eadae8de8},
|
|||
|
intrahash = {b758258da935db4bc1a57b5f6c9d94c6},
|
|||
|
keywords = {language_modeling nlp tpu transfer_learning},
|
|||
|
note = {cite arxiv:1906.08237Comment: Pretrained models and code are available at https://github.com/zihangdai/xlnet},
|
|||
|
timestamp = {2019-07-03T19:07:32.000+0200},
|
|||
|
title = "{XLNet: Generalized Autoregressive Pretraining for Language Understanding}",
|
|||
|
url = {http://arxiv.org/abs/1906.08237},
|
|||
|
year = 2019
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@InProceedings{conneau2018xnli,
|
|||
|
author = "Conneau, Alexis
|
|||
|
and Rinott, Ruty
|
|||
|
and Lample, Guillaume
|
|||
|
and Williams, Adina
|
|||
|
and Bowman, Samuel R.
|
|||
|
and Schwenk, Holger
|
|||
|
and Stoyanov, Veselin",
|
|||
|
title = "{XNLI: Evaluating Cross-lingual Sentence Representations}",
|
|||
|
booktitle = "Proceedings of the 2018 Conference on Empirical Methods
|
|||
|
in Natural Language Processing",
|
|||
|
year = "2018",
|
|||
|
publisher = "Association for Computational Linguistics",
|
|||
|
location = "Brussels, Belgium",
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Radford2018ImprovingLU,
|
|||
|
title={Improving Language Understanding by Generative Pre-Training},
|
|||
|
author={Alec Radford},
|
|||
|
year={2018}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{reimers-2019-sentence-bert,
|
|||
|
title = "{Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}",
|
|||
|
author = "Reimers, Nils and Gurevych, Iryna",
|
|||
|
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|||
|
month = "11",
|
|||
|
year = "2019",
|
|||
|
publisher = "Association for Computational Linguistics",
|
|||
|
url = "http://arxiv.org/abs/1908.10084",
|
|||
|
}
|
|||
|
|
|||
|
@INPROCEEDINGS{Pennington14glove:global,
|
|||
|
author = {Jeffrey Pennington and Richard Socher and Christopher D. Manning},
|
|||
|
title = "{Glove: Global vectors for word representation}",
|
|||
|
booktitle = {In EMNLP},
|
|||
|
year = {2014}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Kano2017OverviewOC,
|
|||
|
title="{Overview of COLIEE 2017}",
|
|||
|
author={Yoshinobu Kano and Mi Young Kim and Randy Goebel and Ken Satoh},
|
|||
|
booktitle={COLIEE@ICAIL},
|
|||
|
year={2017}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{snli:emnlp2015,
|
|||
|
Author = {Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher and Manning, Christopher D.},
|
|||
|
Booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
|
|||
|
Publisher = {Association for Computational Linguistics},
|
|||
|
Title = {A large annotated corpus for learning natural language inference},
|
|||
|
Year = {2015}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Williams2017ABC,
|
|||
|
title={A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
|
|||
|
author={Adina Williams and Nikita Nangia and Samuel R. Bowman},
|
|||
|
booktitle={NAACL-HLT},
|
|||
|
year={2017}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Peters:2018,
|
|||
|
author={Peters, Matthew E. and Neumann, Mark and Iyyer, Mohit and Gardner, Matt and Clark, Christopher and Lee, Kenton and Zettlemoyer, Luke},
|
|||
|
title={Deep contextualized word representations},
|
|||
|
booktitle={Proc. of NAACL},
|
|||
|
year={2018}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Wang2019GeneralizingFA,
|
|||
|
title={Generalizing from a Few Examples: A Survey on Few-Shot Learning},
|
|||
|
author={Yaqing Wang and Quanming Yao and James Kwok and Lionel M. Ni},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/VaswaniSPUJGKP17,
|
|||
|
author = {Ashish Vaswani and
|
|||
|
Noam Shazeer and
|
|||
|
Niki Parmar and
|
|||
|
Jakob Uszkoreit and
|
|||
|
Llion Jones and
|
|||
|
Aidan N. Gomez and
|
|||
|
Lukasz Kaiser and
|
|||
|
Illia Polosukhin},
|
|||
|
title = {Attention Is All You Need},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1706.03762},
|
|||
|
year = {2017},
|
|||
|
url = {http://arxiv.org/abs/1706.03762},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1706.03762},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/VaswaniSPUJGKP17},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{arora2017asimple,
|
|||
|
author = {Sanjeev Arora and Yingyu Liang and Tengyu Ma},
|
|||
|
title = {A Simple but Tough-to-Beat Baseline for Sentence Embeddings},
|
|||
|
booktitle = {International Conference on Learning Representations},
|
|||
|
year = {2017}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@inproceedings{zhao2019moverscore,
|
|||
|
title = "{MoverScore: Text Generation Evaluating with Contextualized Embeddings and Earth Mover Distance}",
|
|||
|
month = {August},
|
|||
|
year = {2019},
|
|||
|
author = {Zhao, Wei and Peyrard, Maxime and Liu, Fei and Gao, Yang and Meyer, Christian M. and Eger, Steffen},
|
|||
|
address = {Hong Kong, China},
|
|||
|
publisher = {Association for Computational Linguistics},
|
|||
|
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing},
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@article{gpt2,
|
|||
|
added-at = {2019-02-27T03:35:25.000+0100},
|
|||
|
author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
|
|||
|
biburl = {https://www.bibsonomy.org/bibtex/2b30710316a8cfbae687672ea1f85c193/kirk86},
|
|||
|
description = {Language Models are Unsupervised Multitask Learners},
|
|||
|
interhash = {ce8168300081d74707849ed488e2a458},
|
|||
|
intrahash = {b30710316a8cfbae687672ea1f85c193},
|
|||
|
keywords = {learning multitask},
|
|||
|
timestamp = {2019-02-27T03:35:25.000+0100},
|
|||
|
title = {Language Models are Unsupervised Multitask Learners},
|
|||
|
url = {https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf},
|
|||
|
year = 2018
|
|||
|
}
|
|||
|
|
|||
|
@article{lample2019cross,
|
|||
|
title={Cross-lingual Language Model Pretraining},
|
|||
|
author={Lample, Guillaume and Conneau, Alexis},
|
|||
|
journal={arXiv preprint arXiv:1901.07291},
|
|||
|
year={2019}
|
|||
|
}
|
|||
|
|
|||
|
@InProceedings{pmlr-v37-kusnerb15,
|
|||
|
title = {From Word Embeddings To Document Distances},
|
|||
|
author = {Matt Kusner and Yu Sun and Nicholas Kolkin and Kilian Weinberger},
|
|||
|
booktitle = {Proceedings of the 32nd International Conference on Machine Learning},
|
|||
|
pages = {957--966},
|
|||
|
year = {2015},
|
|||
|
editor = {Francis Bach and David Blei},
|
|||
|
volume = {37},
|
|||
|
series = {Proceedings of Machine Learning Research},
|
|||
|
address = {Lille, France},
|
|||
|
month = {07--09 Jul},
|
|||
|
publisher = {PMLR},
|
|||
|
pdf = {http://proceedings.mlr.press/v37/kusnerb15.pdf},
|
|||
|
url = {http://proceedings.mlr.press/v37/kusnerb15.html},
|
|||
|
abstract = {We present the Word Mover’s Distance (WMD), a novel distance function between text documents. Our work is based on recent results in word embeddings that learn semantically meaningful representations for words from local co-occurrences in sentences. The WMD distance measures the dissimilarity between two text documents as the minimum amount of distance that the embedded words of one document need to "travel" to reach the embedded words of another document. We show that this distance metric can be cast as an instance of the Earth Mover’s Distance, a well studied transportation problem for which several highly efficient solvers have been developed. Our metric has no hyperparameters and is straight-forward to implement. Further, we demonstrate on eight real world document classification data sets, in comparison with seven state-of-the-art baselines, that the WMD metric leads to unprecedented low k-nearest neighbor document classification error rates.}
|
|||
|
}
|
|||
|
|
|||
|
@inproceedings{Rabelo:2019:CST:3322640.3326741,
|
|||
|
author = {Rabelo, Juliano and Kim, Mi-Young and Goebel, Randy},
|
|||
|
title = {Combining Similarity and Transformer Methods for Case Law Entailment},
|
|||
|
booktitle = {Proceedings of the Seventeenth International Conference on Artificial Intelligence and Law},
|
|||
|
series = {ICAIL '19},
|
|||
|
year = {2019},
|
|||
|
isbn = {978-1-4503-6754-7},
|
|||
|
location = {Montreal, QC, Canada},
|
|||
|
pages = {290--296},
|
|||
|
numpages = {7},
|
|||
|
url = {http://doi.acm.org/10.1145/3322640.3326741},
|
|||
|
doi = {10.1145/3322640.3326741},
|
|||
|
acmid = {3326741},
|
|||
|
publisher = {ACM},
|
|||
|
address = {New York, NY, USA},
|
|||
|
keywords = {binary classification, document similarity, imbalanced datasets, legal textual entailment},
|
|||
|
}
|
|||
|
|
|||
|
@article{DBLP:journals/corr/abs-1803-11175,
|
|||
|
author = {Daniel Cer and
|
|||
|
Yinfei Yang and
|
|||
|
Sheng{-}yi Kong and
|
|||
|
Nan Hua and
|
|||
|
Nicole Limtiaco and
|
|||
|
Rhomni St. John and
|
|||
|
Noah Constant and
|
|||
|
Mario Guajardo{-}Cespedes and
|
|||
|
Steve Yuan and
|
|||
|
Chris Tar and
|
|||
|
Yun{-}Hsuan Sung and
|
|||
|
Brian Strope and
|
|||
|
Ray Kurzweil},
|
|||
|
title = {Universal Sentence Encoder},
|
|||
|
journal = {CoRR},
|
|||
|
volume = {abs/1803.11175},
|
|||
|
year = {2018},
|
|||
|
url = {http://arxiv.org/abs/1803.11175},
|
|||
|
archivePrefix = {arXiv},
|
|||
|
eprint = {1803.11175},
|
|||
|
timestamp = {Mon, 13 Aug 2018 16:46:40 +0200},
|
|||
|
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-11175},
|
|||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|||
|
}
|
|||
|
|
|||
|
@article{senin2008dynamic,
|
|||
|
title={Dynamic time warping algorithm review},
|
|||
|
author={Senin, Pavel},
|
|||
|
year={2008},
|
|||
|
publisher={Citeseer}
|
|||
|
}
|
|||
|
|
|||
|
@incollection{Sakoe:1990:DPA:108235.108244,
|
|||
|
author = {Sakoe, Hiroaki and Chiba, Seibi},
|
|||
|
chapter = {Dynamic Programming Algorithm Optimization for Spoken Word Recognition},
|
|||
|
title = {Readings in Speech Recognition},
|
|||
|
editor = {Waibel, Alex and Lee, Kai-Fu},
|
|||
|
year = {1990},
|
|||
|
isbn = {1-55860-124-4},
|
|||
|
pages = {159--165},
|
|||
|
numpages = {7},
|
|||
|
url = {http://dl.acm.org/citation.cfm?id=108235.108244},
|
|||
|
acmid = {108244},
|
|||
|
publisher = {Morgan Kaufmann Publishers Inc.},
|
|||
|
address = {San Francisco, CA, USA},
|
|||
|
}
|
|||
|
|
|||
|
@article{muller2007dynamic,
|
|||
|
title={Dynamic time warping},
|
|||
|
author={M{\"u}ller, Meinard},
|
|||
|
journal={Information retrieval for music and motion},
|
|||
|
pages={69--84},
|
|||
|
year={2007},
|
|||
|
publisher={Springer}
|
|||
|
}
|
|||
|
|