<?xml version="1.0" encoding="UTF-8"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcq="http://purl.org/dc/terms/"><records count="1" morepages="false" start="1" end="1"><record rownumber="1"><dc:product_type>Conference Paper</dc:product_type><dc:title>ProofLang: the Language of arXiv Proofs</dc:title><dc:creator>Hammer, Henry; Noda, Nanako; Stone, Christopher A</dc:creator><dc:corporate_author/><dc:editor/><dc:description>The ProofLang Corpus includes 3.7M proofs (558 million words) mechanically extracted from papers that were posted on arXiv.org between 1992 and 2020. The focus of this corpus is proofs, rather than the explanatory text that surrounds them, and more specifically on the language used in such proofs. Specific mathematical content is filtered out, resulting in sentences such as Let MATH be the restriction of MATH to MATH. This dataset reflects how people prefer to write (informal) proofs, and is also amenable to statistical analyses and experiments with Natural Language Processing (NLP) techniques.</dc:description><dc:publisher>Springer, Cham</dc:publisher><dc:date>2023-08-08</dc:date><dc:nsf_par_id>10499930</dc:nsf_par_id><dc:journal_name>16th Conference on Intelligent Computer Mathematics (CICM 2022)</dc:journal_name><dc:journal_volume/><dc:journal_issue/><dc:page_range_or_elocation/><dc:issn/><dc:isbn>978-3-031-42753-4</dc:isbn><dc:doi>https://doi.org/10.1007/978-3-031-42753-4_19</dc:doi><dcq:identifierAwardId>1950885</dcq:identifierAwardId><dc:subject/><dc:version_number/><dc:location>Cambridge, UK</dc:location><dc:rights/><dc:institution/><dc:sponsoring_org>National Science Foundation</dc:sponsoring_org></record></records></rdf:RDF>