_bibliography/papers.bib

---
---

@article{lin2025telerag,
  title       = {TeleRAG: Efficient Retrieval-Augmented Generation Inference with Lookahead Retrieval},
  author      = {Chien-Yu Lin* and Keisuke Kamahori* and Yiyu Liu and Xiaoxiang Shi and Madhav Kashyap and Yile Gu and Rulin Shao and Zihao Ye and Kan Zhu and Stephanie Wang and Arvind Krishnamurthy and Rohan Kadekodi and Luis Ceze and Baris Kasikci},
  journal     = {In submission to OSDI 2025},
  year        = {2025},
  selected    = {true}
}

@inproceedings{chang2024palu,
  author      = {Chang*, Chi-Chih and Lin*, Wei-Cheng and Lin*, Chien-Yu and Chen, Chong-Yan and Hu, Yu-Fang and Wang, Pei-Shuo and Huang, Ning-Chi and Ceze, Luis and Abdelfattah, Mohamed S. and Wu, Kai-Chiang},
  booktitle   = {Proceedings of International Conference on Learning Representations (ICLR)},
  title       = {Palu: Compressing KV-Cache with Low-Rank Projection},
  year        = {2025},
  venue_url   = {https://arxiv.org/abs/2407.21118},
  pdf         = {https://arxiv.org/pdf/2407.21118.pdf},
  preview     = {palu_concept.png},
  bibtex_show = {true},
  selected    = {true}
}

@misc{zhu2024nanoflow,
  title       = {NanoFlow: Towards Optimal Large Language Model Serving Throughput}, 
  author      = {Kan Zhu and Yilong Zhao and Liangyu Zhao and Gefei Zuo and Yile Gu and Dedong Xie and Yufei Gao and Qinyu Xu and Tian Tang and Zihao Ye and Keisuke Kamahori and Chien-Yu Lin and Stephanie Wang and Arvind Krishnamurthy and Baris Kasikci},
  journal     = {In submission to OSDI 2025},
  year        = {2024},
  eprint      = {2408.12757},
  pdf         = {https://arxiv.org/abs/2408.12757}, 
  code        = {https://github.com/efeslab/Nanoflow},
  preview     = {nanoflow2.png},
  bibtex_show = {true},
  selected    = {false},
}

@article{lu2024efficient,
  title       = {Efficient Encoder-Decoder Transformer Decoding for Decomposable Tasks},
  author      = {Bo-Ru Lu and Nikita Haduong and Chien-Yu Lin and Hao Cheng and Noah A. Smith and Mari Ostendorf},
  journal     = {arXiv preprint arXiv:2403.13112},
  year        = {2024},
  pdf         = {https://arxiv.org/abs/2403.13112},
  preview     = {},
  bibtex_show = {true},
  selected    = {false},
}

@inproceedings{zhao2024atom,
  author      = {Zhao, Yilong and Lin, Chien-Yu and Zhu, Kan and Ye, Zihao and Chen, Lequn and Zheng, Size and Ceze, Luis and Krishnamurthy, Arvind and Chen, Tianqi and Kasikci, Baris},
  booktitle   = {Proceedings of Machine Learning and Systems (MLSys)},
  pages       = {196--209},
  title       = {Atom: Low-Bit Quantization for Efficient and Accurate LLM Serving},
  url         = {https://proceedings.mlsys.org/paper_files/paper/2024/file/5edb57c05c81d04beb716ef1d542fe9e-Paper-Conference.pdf},
  volume      = {6},
  year        = {2024},
  preview     = {atom2.png},
  pdf         = {https://arxiv.org/abs/2310.19102},
  code        = {https://github.com/efeslab/Atom},
  bibtex_show = {true},
  selected    = {true},
}

@inproceedings{lin2024fastsrnerf,
  author      = {Chien-Yu Lin and Qichen Fu and Thomas Merth and Karren Yang and Anurag Ranjan},
  title       = {FastSR-NeRF: Improving NeRF Efficiency on Consumer Devices with A Simple Super-Resolution Pipeline},
  booktitle   = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
  highlight   = {Oral (Top 2.6%)},
  month       = {January},
  year        = {2024},
  pages       = {2482-2491},
  pdf         = {https://arxiv.org/abs/2312.11537},
  preview     = {},
  venue_type  = {Conference},
  selected    = {true},
  bibtex_show = {true},
}

@inproceedings{lin2022spin,
  author      = {Chien-Yu Lin* and Anish Prabhu* and Thomas Merth and Sachin Mehta and Anurag Ranjan and Maxwell Horton and Mohammad Rastegari},
  title       = {SPIN: An Empirical Evaluation on Sharing Parameters of Isotropic Networks},
  booktitle   = {Proceedings the 17th European Conference on Computer Vision (ECCV)},
  year        = {2022},
  preview     = {},
  code        = {https://github.com/apple/ml-spin},
  video       = {https://homes.cs.washington.edu/~cyulin/media/SPIN_ECCV_2022_Video_final_trim_1080.mp4},
  pdf         = {https://arxiv.org/abs/2207.10237},
  venue_type  = {Conference},
  selected    = {true},
  bibtex_show = {true},
}

@article{lin2021esspmm,
  title       = {Accelerating Spmm Kernel with Cache-First Edge Sampling for Graph Neural Networks},
  author      = {Chien-Yu Lin and Liang Luo and Luis Ceze},
  journal     = {arXiv preprint arXiv:2104.10716},
  year        = {2021},
  venue_type  = {arXiv},
  pdf         = {https://arxiv.org/abs/2104.10716},
  preview     = {},
  venue_type  = {ArXiv},
  bibtex_show = {true},
  selected    = {false},
}

@article{lai2019enhancing,
  title       = {Enhancing Utilization of SIMD-Like Accelerator for Sparse Convolutional Neural Networks},
  author      = {Bo-Cheng Lai and Jyun-Wei Pan and Chien-Yu Lin},
  journal     = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
  year        = {2019},
  pdf         = {https://ieeexplore.ieee.org/document/8644034},
  preview     = {},
  bibtex_show = {true},
  selected    = {false},
}

@inproceedings{lin2018supporting,
  author      = {Chien-Yu Lin and Bo-Cheng Lai},
  title       = {Supporting compressed-sparse activations and weights on SIMD-like accelerator for sparse convolutional neural networks},
  booktitle   = {Proceedings of the 23rd Asia and South Pacific Design Automation Conference (ASP-DAC)},
  year        = {2018},
  preview     = {},
  pdf         = {https://ieeexplore.ieee.org/document/8297290},
  slides      = {https://www.aspdac.com/aspdac2018/archive/pdf/2B-1.pdf},
  selected    = {true},
  bibtex_show = {true},
}