My research is in natural language processing, the subfield of computer science that aims to enable computers to understand and produce human language. I focus mainly on language translation, and am interested in syntactic parsing and other areas as well.
Teaching
Recent and selected publications
Andy Yang, Lena Strobl, David Chiang, and Dana Angluin.
Simulating hard attention using soft attention.
arXiv:2412.09925.
PDF
BibTeX
@misc{yang-etal-2024-softmax,
author = "Yang, Andy and Strobl, Lena and Chiang, David and Angluin, Dana",
title = "Simulating Hard Attention Using Soft Attention",
note = "{arXiv}:2412.09925",
url = "https://arxiv.org/abs/2412.09925"
}
David Chiang.
Transformers in uniform
TC\(^0\).
arXiv:2409.13629.
PDF
BibTeX
@misc{chiang:2024,
author = "Chiang, David",
title = "Transformers in Uniform {TC$^0$}",
url = "https://arxiv.org/abs/2409.13629",
note = "{arXiv}:2409.13629"
}
Andy Yang, David Chiang, and Dana Angluin.
Masked hard-attention transformers recognize exactly the star-free languages.
In
Proc. NeurIPS. 2024.
To appear.
PDF
BibTeX
@inproceedings{angluin+:2024,
author = "Yang, Andy and Chiang, David and Angluin, Dana",
title = "Masked Hard-Attention Transformers Recognize Exactly the Star-Free Languages",
booktitle = "Proc. NeurIPS",
url = "https://arxiv.org/abs/2310.13897",
year = "2024",
note = "To appear"
}
Ken Sible and David Chiang.
Improving rare word translation with dictionaries and attention masking.
In
Proc. AMTA. 2024.
PDF
BibTeX
@inproceedings{sible-chiang-2024,
author = "Sible, Ken and Chiang, David",
title = "Improving Rare Word Translation With Dictionaries and Attention Masking",
booktitle = "Proc. AMTA",
year = "2024",
url = "https://arxiv.org/abs/2408.09075"
}
Andy Yang and David Chiang.
Counting like transformers: compiling temporal counting logic into softmax transformers.
In
Proc. CoLM. 2024.
PDF
BibTeX
@inproceedings{yang-chiang-2024-counting,
author = "Yang, Andy and Chiang, David",
title = "Counting Like Transformers: Compiling Temporal Counting Logic Into Softmax Transformers",
booktitle = "Proc. CoLM",
year = "2024",
url = "https://openreview.net/forum?id=FmhPg4UJ9K"
}
Aarohi Srivastava and David Chiang.
We're calling an intervention: taking a closer look at language model adaptation to different types of linguistic variation.
2024.
arXiv:2404.07304.
PDF
BibTeX
@misc{srivastava-chiang-2024,
author = "Srivastava, Aarohi and Chiang, David",
title = "We're Calling an Intervention: Taking a Closer Look at Language Model Adaptation to Different Types of Linguistic Variation",
year = "2024",
url = "https://arxiv.org/abs/2404.07304",
note = "arXiv:2404.07304"
}
Lena Strobl, Dana Angluin, David Chiang, Jonathan Rawski, and Ashish Sabharwal.
Transformers as transducers.
Transactions of the Association for Computational Linguistics, 2024.
To appear.
PDF
BibTeX
@article{strobl-etal-2024-transducers,
author = "Strobl, Lena and Angluin, Dana and Chiang, David and Rawski, Jonathan and Sabharwal, Ashish",
title = "Transformers as Transducers",
journal = "Transactions of the Association for Computational Linguistics",
year = "2024",
note = "To appear",
url = "https://arxiv.org/abs/2404.02040"
}
Chihiro Taguchi and David Chiang.
Language complexity and speech recognition accuracy: orthographic complexity hurts, phonological complexity doesn't.
In
Proc. ACL. 2024.
Outstanding Paper Award and Senior Area Chair Award.
PDF
BibTeX
@inproceedings{taguchi-chiang-2024-complexity,
author = "Taguchi, Chihiro and Chiang, David",
title = "Language Complexity and Speech Recognition Accuracy: Orthographic Complexity Hurts, Phonological Complexity Doesn't",
year = "2024",
url = "https://aclanthology.org/2024.acl-long.827/",
booktitle = "Proc. ACL"
}
Fahim Faisal, Orevaoghene Ahia, Aarohi Srivastava, Kabir Ahuja, David Chiang, Yulia Tsvetkov, and Antonios Anastasopoulos.
DIALECTBENCH: a
NLP benchmark for dialects, varieties, and closely-related languages.
In
Proc. ACL. 2024.
Social Impact Award.
PDF
BibTeX
@inproceedings{faisal+:2024,
author = "Faisal, Fahim and Ahia, Orevaoghene and Srivastava, Aarohi and Ahuja, Kabir and Chiang, David and Tsvetkov, Yulia and Anastasopoulos, Antonios",
title = "{DIALECTBENCH}: A {NLP} Benchmark for Dialects, Varieties, and Closely-Related Languages",
year = "2024",
booktitle = "Proc. ACL",
url = "https://aclanthology.org/2024.acl-long.777/"
}
Stephen Bothwell, Brian DuSell, David Chiang, and Brian Krostenko.
PILA: a historical-linguistic dataset of
Proto-
Italic and
Latin.
In
Proc. LREC-COLING, 12749–12760. 2024.
PDF
BibTeX
@inproceedings{bothwell+:2024,
author = "Bothwell, Stephen and DuSell, Brian and Chiang, David and Krostenko, Brian",
title = "{PILA}: A Historical-Linguistic Dataset of {P}roto-{I}talic and {L}atin",
booktitle = "Proc. LREC-COLING",
pages = "12749--12760",
year = "2024",
url = "https://aclanthology.org/2024.lrec-main.1116/"
}
Chihiro Taguchi, Jefferson Saransig, Dayana Vel
ásquez, and David Chiang.
KILLKAN: the automatic speech recognition dataset for
Kichwa with morphosyntactic information.
In
Proc. LREC-COLING, 9753–9763. 2024.
PDF
BibTeX
@inproceedings{taguchi+:2024,
author = "Taguchi, Chihiro and Saransig, Jefferson and Vel{\'a}squez, Dayana and Chiang, David",
title = "{KILLKAN}: The Automatic Speech Recognition Dataset for {K}ichwa with Morphosyntactic Information",
booktitle = "Proc. LREC-COLING",
pages = "9753--9763",
year = "2024",
url = "https://aclanthology.org/2024.lrec-main.852/"
}
Lena Strobl, William Merrill, Gail Weiss, David Chiang, and Dana Angluin.
What formal languages can transformers express?
A survey.
Transactions of the Association for Computational Linguistics, 12:543–561, 2024.
doi:10.1162/tacl_a_00663.
DOI
BibTeX
@article{strobl-etal-2024-survey,
author = "Strobl, Lena and Merrill, William and Weiss, Gail and Chiang, David and Angluin, Dana",
title = "What Formal Languages Can Transformers Express? {A} Survey",
year = "2024",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
pages = "543--561",
doi = "10.1162/tacl\_a\_00663"
}
Brian DuSell and David Chiang.
Stack attention: improving the ability of transformers to model hierarchical patterns.
In
Proc. ICLR. 2024.
Spotlight paper.
PDF
BibTeX
@inproceedings{dusell+chiang:2024attention,
author = "DuSell, Brian and Chiang, David",
title = "Stack Attention: Improving the Ability of Transformers to Model Hierarchical Patterns",
year = "2024",
booktitle = "Proc. ICLR",
url = "https://openreview.net/pdf?id=XVhm3X8Fum"
}
Stephen Bothwell, Justin DeBenedetto, Theresa Crnkovich, Hildegund M
üller, and David Chiang.
Introducing rhetorical parallelism detection: a new task with datasets, metrics, and baselines.
In
Proc. EMNLP, 5007–5039. 2023.
doi:10.18653/v1/2023.emnlp-main.305.
PDF
BibTeX
@inproceedings{bothwell+:2023,
author = {Bothwell, Stephen and DeBenedetto, Justin and Crnkovich, Theresa and M{\"u}ller, Hildegund and Chiang, David},
title = "Introducing Rhetorical Parallelism Detection: A New Task with Datasets, Metrics, and Baselines",
booktitle = "Proc. EMNLP",
year = "2023",
url = "https://aclanthology.org/2023.emnlp-main.305",
doi = "10.18653/v1/2023.emnlp-main.305",
pages = "5007--5039"
}
Aarohi Srivastava and David Chiang.
BERTwich: extending
BERT's capabilities to model dialectal and noisy text.
In
Findings of ACL: EMNLP. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023,
author = "Srivastava, Aarohi and Chiang, David",
title = "{BERTwich}: Extending {BERT}'s Capabilities to Model Dialectal and Noisy Text",
booktitle = "Findings of ACL: EMNLP",
year = "2023",
url = "https://aclanthology.org/2023.findings-emnlp.1037/"
}
Chihiro Taguchi, Yusuke Sakai, Parisa Haghani, and David Chiang.
Universal automatic phonetic transcription into the
International
Phonetic
Alphabet.
In
Proc. INTERSPEECH. 2023.
doi:10.21437/Interspeech.2023-2584.
PDF
BibTeX
@inproceedings{taguchi+:2023,
author = "Taguchi, Chihiro and Sakai, Yusuke and Haghani, Parisa and Chiang, David",
title = "Universal Automatic Phonetic Transcription into the {I}nternational {P}honetic {A}lphabet",
booktitle = "Proc. INTERSPEECH",
year = "2023",
url = "https://arxiv.org/abs/2308.03917",
doi = "10.21437/Interspeech.2023-2584"
}
David Chiang, Peter Cholak, and Anand Pillay.
Tighter bounds on the expressivity of transformer encoders.
In
Proc. ICML, 5544–5562. 2023.
PDF
BibTeX
@inproceedings{chiang+cholak+pillay:2023,
author = "Chiang, David and Cholak, Peter and Pillay, Anand",
title = "Tighter Bounds on the Expressivity of Transformer Encoders",
booktitle = "Proc. ICML",
url = "https://proceedings.mlr.press/v202/chiang23a.html",
year = "2023",
pages = "5544--5562"
}
Aarohi Srivastava and David Chiang.
Fine-tuning
BERT with character-level noise for zero-shot transfer to dialects and closely-related languages.
In
Proc. Workshop on NLP for Similar Languages, Varieties and Dialects. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023fine,
author = "Srivastava, Aarohi and Chiang, David",
title = "Fine-Tuning {BERT} with Character-Level Noise for Zero-Shot Transfer to Dialects and Closely-Related Languages",
year = "2023",
booktitle = "Proc. Workshop on NLP for Similar Languages, Varieties and Dialects",
url = "https://aclanthology.org/2023.vardial-1.16/"
}
David Chiang, Colin McDonald, and Chung-chieh Shan.
Exact recursive probabilistic programming.
PACMPL, 2023.
doi:10.1145/3586050.
PDF
BibTeX
@article{chiang+mcdonald+shan:2023,
author = "Chiang, David and McDonald, Colin and Shan, Chung-chieh",
title = "Exact Recursive Probabilistic Programming",
journal = "PACMPL",
volume = "7",
number = "OOPSLA1",
article = "98",
xmonth = "April",
url = "https://dl.acm.org/doi/10.1145/3586050",
year = "2023",
doi = "10.1145/3586050"
}
David Chiang, Alexander M. Rush, and Boaz Barak.
Named tensor notation.
Transactions on Machine Learning Research, January 2023.
PDF
BibTeX
@article{chiang+rush+barak:2023,
author = "Chiang, David and Rush, Alexander M. and Barak, Boaz",
title = "Named Tensor Notation",
year = "2023",
month = "January",
journal = "Transactions on Machine Learning Research",
url = "https://openreview.net/pdf?id=hVT7SHlilx"
}
Alexandra Butoi, Brian DuSell, Tim Vieira, Ryan Cotterell, and David Chiang.
Algorithms for weighted pushdown automata.
In Yoav Goldberg, Zornitsa Kozareva, and Yue Zhang, editors,
Proc. EMNLP, 9669–9680. 2022.
doi:10.18653/v1/2022.emnlp-main.656.
PDF
BibTeX
@inproceedings{butoi-etal-2022-algorithms,
author = "Butoi, Alexandra and DuSell, Brian and Vieira, Tim and Cotterell, Ryan and Chiang, David",
editor = "Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue",
title = "Algorithms for Weighted Pushdown Automata",
booktitle = "Proc. EMNLP",
year = "2022",
url = "https://aclanthology.org/2022.emnlp-main.656",
doi = "10.18653/v1/2022.emnlp-main.656",
pages = "9669--9680"
}
David Chiang and Peter Cholak.
Overcoming a theoretical limitation of self-attention.
In Smaranda Muresan, Preslav Nakov, and Aline Villavicencio, editors,
Proc. ACL, volume 1, 7654–7664. 2022.
doi:10.18653/v1/2022.acl-long.527.
PDF
BibTeX
@inproceedings{chiang-cholak-2022-overcoming,
author = "Chiang, David and Cholak, Peter",
editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline",
title = "Overcoming a Theoretical Limitation of Self-Attention",
booktitle = "Proc. ACL",
year = "2022",
url = "https://aclanthology.org/2022.acl-long.527",
doi = "10.18653/v1/2022.acl-long.527",
pages = "7654--7664",
volume = "1"
}
full list