My research is in natural language processing, the subfield of computer science that aims to enable computers to understand and produce human language. I focus mainly on language translation, and am interested in syntactic parsing and other areas as well.
Teaching
Recent and selected publications
David Chiang.
Transformers in uniform
TC\(^0\).
arXiv:2409.13629.
PDF
BibTeX
@misc{chiang:2024,
author = "Chiang, David",
title = "Transformers in Uniform {TC$^0$}",
url = "https://arxiv.org/abs/2409.13629",
note = "{arXiv}:2409.13629"
}
Andy Yang, David Chiang, and Dana Angluin.
Masked hard-attention transformers recognize exactly the star-free languages.
In
Proc. NeurIPS. 2024.
To appear.
PDF
BibTeX
@inproceedings{angluin+:2024,
author = "Yang, Andy and Chiang, David and Angluin, Dana",
title = "Masked Hard-Attention Transformers Recognize Exactly the Star-Free Languages",
booktitle = "Proc. NeurIPS",
url = "https://arxiv.org/abs/2310.13897",
year = "2024",
note = "To appear"
}
Ken Sible and David Chiang.
Improving rare word translation with dictionaries and attention masking.
In
Proc. AMTA. 2024.
PDF
BibTeX
@inproceedings{sible-chiang-2024,
author = "Sible, Ken and Chiang, David",
title = "Improving Rare Word Translation With Dictionaries and Attention Masking",
booktitle = "Proc. AMTA",
year = "2024",
url = "https://arxiv.org/abs/2408.09075"
}
Andy Yang and David Chiang.
Counting like transformers: compiling temporal counting logic into softmax transformers.
In
Proc. CoLM. 2024.
PDF
BibTeX
@inproceedings{yang-chiang-2024-counting,
author = "Yang, Andy and Chiang, David",
title = "Counting Like Transformers: Compiling Temporal Counting Logic Into Softmax Transformers",
booktitle = "Proc. CoLM",
year = "2024",
url = "https://openreview.net/forum?id=FmhPg4UJ9K"
}
Aarohi Srivastava and David Chiang.
We're calling an intervention: taking a closer look at language model adaptation to different types of linguistic variation.
2024.
arXiv:2404.07304.
PDF
BibTeX
@misc{srivastava-chiang-2024,
author = "Srivastava, Aarohi and Chiang, David",
title = "We're Calling an Intervention: Taking a Closer Look at Language Model Adaptation to Different Types of Linguistic Variation",
year = "2024",
url = "https://arxiv.org/abs/2404.07304",
note = "arXiv:2404.07304"
}
Lena Strobl, Dana Angluin, David Chiang, Jonathan Rawski, and Ashish Sabharwal.
Transformers as transducers.
Transactions of the Association for Computational Linguistics, 2024.
To appear.
PDF
BibTeX
@article{strobl-etal-2024-transducers,
author = "Strobl, Lena and Angluin, Dana and Chiang, David and Rawski, Jonathan and Sabharwal, Ashish",
title = "Transformers as Transducers",
journal = "Transactions of the Association for Computational Linguistics",
year = "2024",
note = "To appear",
url = "https://arxiv.org/abs/2404.02040"
}
Chihiro Taguchi and David Chiang.
Language complexity and speech recognition accuracy: orthographic complexity hurts, phonological complexity doesn't.
In
Proc. ACL. 2024.
Outstanding Paper Award and Senior Area Chair Award.
PDF
BibTeX
@inproceedings{taguchi-chiang-2024-complexity,
author = "Taguchi, Chihiro and Chiang, David",
title = "Language Complexity and Speech Recognition Accuracy: Orthographic Complexity Hurts, Phonological Complexity Doesn't",
year = "2024",
url = "https://aclanthology.org/2024.acl-long.827/",
booktitle = "Proc. ACL"
}
Fahim Faisal, Orevaoghene Ahia, Aarohi Srivastava, Kabir Ahuja, David Chiang, Yulia Tsvetkov, and Antonios Anastasopoulos.
DIALECTBENCH: a
NLP benchmark for dialects, varieties, and closely-related languages.
In
Proc. ACL. 2024.
Social Impact Award.
PDF
BibTeX
@inproceedings{faisal+:2024,
author = "Faisal, Fahim and Ahia, Orevaoghene and Srivastava, Aarohi and Ahuja, Kabir and Chiang, David and Tsvetkov, Yulia and Anastasopoulos, Antonios",
title = "{DIALECTBENCH}: A {NLP} Benchmark for Dialects, Varieties, and Closely-Related Languages",
year = "2024",
booktitle = "Proc. ACL",
url = "https://aclanthology.org/2024.acl-long.777/"
}
Stephen Bothwell, Brian DuSell, David Chiang, and Brian Krostenko.
PILA: a historical-linguistic dataset of
Proto-
Italic and
Latin.
In
Proc. LREC-COLING, 12749–12760. 2024.
PDF
BibTeX
@inproceedings{bothwell+:2024,
author = "Bothwell, Stephen and DuSell, Brian and Chiang, David and Krostenko, Brian",
title = "{PILA}: A Historical-Linguistic Dataset of {P}roto-{I}talic and {L}atin",
booktitle = "Proc. LREC-COLING",
pages = "12749--12760",
year = "2024",
url = "https://aclanthology.org/2024.lrec-main.1116/"
}
Chihiro Taguchi, Jefferson Saransig, Dayana Vel
ásquez, and David Chiang.
KILLKAN: the automatic speech recognition dataset for
Kichwa with morphosyntactic information.
In
Proc. LREC-COLING, 9753–9763. 2024.
PDF
BibTeX
@inproceedings{taguchi+:2024,
author = "Taguchi, Chihiro and Saransig, Jefferson and Vel{\'a}squez, Dayana and Chiang, David",
title = "{KILLKAN}: The Automatic Speech Recognition Dataset for {K}ichwa with Morphosyntactic Information",
booktitle = "Proc. LREC-COLING",
pages = "9753--9763",
year = "2024",
url = "https://aclanthology.org/2024.lrec-main.852/"
}
Lena Strobl, William Merrill, Gail Weiss, David Chiang, and Dana Angluin.
What formal languages can transformers express?
A survey.
Transactions of the Association for Computational Linguistics, 12:543–561, 2024.
doi:10.1162/tacl_a_00663.
DOI
BibTeX
@article{strobl-etal-2024-survey,
author = "Strobl, Lena and Merrill, William and Weiss, Gail and Chiang, David and Angluin, Dana",
title = "What Formal Languages Can Transformers Express? {A} Survey",
year = "2024",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
pages = "543--561",
doi = "10.1162/tacl\_a\_00663"
}
Brian DuSell and David Chiang.
Stack attention: improving the ability of transformers to model hierarchical patterns.
In
Proc. ICLR. 2024.
Spotlight paper.
PDF
BibTeX
@inproceedings{dusell+chiang:2024attention,
author = "DuSell, Brian and Chiang, David",
title = "Stack Attention: Improving the Ability of Transformers to Model Hierarchical Patterns",
year = "2024",
booktitle = "Proc. ICLR",
url = "https://openreview.net/pdf?id=XVhm3X8Fum"
}
Stephen Bothwell, Justin DeBenedetto, Theresa Crnkovich, Hildegund M
üller, and David Chiang.
Introducing rhetorical parallelism detection: a new task with datasets, metrics, and baselines.
In
Proc. EMNLP, 5007–5039. 2023.
doi:10.18653/v1/2023.emnlp-main.305.
PDF
BibTeX
@inproceedings{bothwell+:2023,
author = {Bothwell, Stephen and DeBenedetto, Justin and Crnkovich, Theresa and M{\"u}ller, Hildegund and Chiang, David},
title = "Introducing Rhetorical Parallelism Detection: A New Task with Datasets, Metrics, and Baselines",
booktitle = "Proc. EMNLP",
year = "2023",
url = "https://aclanthology.org/2023.emnlp-main.305",
doi = "10.18653/v1/2023.emnlp-main.305",
pages = "5007--5039"
}
Aarohi Srivastava and David Chiang.
BERTwich: extending
BERT's capabilities to model dialectal and noisy text.
In
Findings of ACL: EMNLP. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023,
author = "Srivastava, Aarohi and Chiang, David",
title = "{BERTwich}: Extending {BERT}'s Capabilities to Model Dialectal and Noisy Text",
booktitle = "Findings of ACL: EMNLP",
year = "2023",
url = "https://aclanthology.org/2023.findings-emnlp.1037/"
}
Chihiro Taguchi, Yusuke Sakai, Parisa Haghani, and David Chiang.
Universal automatic phonetic transcription into the
International
Phonetic
Alphabet.
In
Proc. INTERSPEECH. 2023.
doi:10.21437/Interspeech.2023-2584.
PDF
BibTeX
@inproceedings{taguchi+:2023,
author = "Taguchi, Chihiro and Sakai, Yusuke and Haghani, Parisa and Chiang, David",
title = "Universal Automatic Phonetic Transcription into the {I}nternational {P}honetic {A}lphabet",
booktitle = "Proc. INTERSPEECH",
year = "2023",
url = "https://arxiv.org/abs/2308.03917",
doi = "10.21437/Interspeech.2023-2584"
}
David Chiang, Peter Cholak, and Anand Pillay.
Tighter bounds on the expressivity of transformer encoders.
In
Proc. ICML, 5544–5562. 2023.
PDF
BibTeX
@inproceedings{chiang+cholak+pillay:2023,
author = "Chiang, David and Cholak, Peter and Pillay, Anand",
title = "Tighter Bounds on the Expressivity of Transformer Encoders",
booktitle = "Proc. ICML",
url = "https://proceedings.mlr.press/v202/chiang23a.html",
year = "2023",
pages = "5544--5562"
}
Aarohi Srivastava and David Chiang.
Fine-tuning
BERT with character-level noise for zero-shot transfer to dialects and closely-related languages.
In
Proc. Workshop on NLP for Similar Languages, Varieties and Dialects. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023fine,
author = "Srivastava, Aarohi and Chiang, David",
title = "Fine-Tuning {BERT} with Character-Level Noise for Zero-Shot Transfer to Dialects and Closely-Related Languages",
year = "2023",
booktitle = "Proc. Workshop on NLP for Similar Languages, Varieties and Dialects",
url = "https://aclanthology.org/2023.vardial-1.16/"
}
David Chiang, Colin McDonald, and Chung-chieh Shan.
Exact recursive probabilistic programming.
PACMPL, 2023.
doi:10.1145/3586050.
PDF
BibTeX
@article{chiang+mcdonald+shan:2023,
author = "Chiang, David and McDonald, Colin and Shan, Chung-chieh",
title = "Exact Recursive Probabilistic Programming",
journal = "PACMPL",
volume = "7",
number = "OOPSLA1",
article = "98",
xmonth = "April",
url = "https://dl.acm.org/doi/10.1145/3586050",
year = "2023",
doi = "10.1145/3586050"
}
David Chiang, Alexander M. Rush, and Boaz Barak.
Named tensor notation.
Transactions on Machine Learning Research, January 2023.
PDF
BibTeX
@article{chiang+rush+barak:2023,
author = "Chiang, David and Rush, Alexander M. and Barak, Boaz",
title = "Named Tensor Notation",
year = "2023",
month = "January",
journal = "Transactions on Machine Learning Research",
url = "https://openreview.net/pdf?id=hVT7SHlilx"
}
Alexandra Butoi, Brian DuSell, Tim Vieira, Ryan Cotterell, and David Chiang.
Algorithms for weighted pushdown automata.
In Yoav Goldberg, Zornitsa Kozareva, and Yue Zhang, editors,
Proc. EMNLP, 9669–9680. 2022.
doi:10.18653/v1/2022.emnlp-main.656.
PDF
BibTeX
@inproceedings{butoi-etal-2022-algorithms,
author = "Butoi, Alexandra and DuSell, Brian and Vieira, Tim and Cotterell, Ryan and Chiang, David",
editor = "Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue",
title = "Algorithms for Weighted Pushdown Automata",
booktitle = "Proc. EMNLP",
year = "2022",
url = "https://aclanthology.org/2022.emnlp-main.656",
doi = "10.18653/v1/2022.emnlp-main.656",
pages = "9669--9680"
}
David Chiang and Peter Cholak.
Overcoming a theoretical limitation of self-attention.
In Smaranda Muresan, Preslav Nakov, and Aline Villavicencio, editors,
Proc. ACL, volume 1, 7654–7664. 2022.
doi:10.18653/v1/2022.acl-long.527.
PDF
BibTeX
@inproceedings{chiang-cholak-2022-overcoming,
author = "Chiang, David and Cholak, Peter",
editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline",
title = "Overcoming a Theoretical Limitation of Self-Attention",
booktitle = "Proc. ACL",
year = "2022",
url = "https://aclanthology.org/2022.acl-long.527",
doi = "10.18653/v1/2022.acl-long.527",
pages = "7654--7664",
volume = "1"
}
full list