index.html

<!DOCTYPE HTML>
<!--
	Yifan Yang (杨亦凡)
	html5up.net | @ajlkn
	Free for personal and commercial use under the CCA 3.0 license (html5up.net/license)
-->
<html>
	<head>
		<title>Yifan Yang (杨亦凡)</title>
		<meta charset="utf-8" />
		<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no" />
		<link rel="stylesheet" href="assets/css/main.css" />
	</head>
	<body class="is-preload">

		<!-- Wrapper -->
			<div id="wrapper">

				<!-- Main -->
					<div id="main">
						<div class="inner">

							<!-- Header -->
								<header id="header">
									<a href="index.html" class="logo"><h2>Yifan Yang</h2></a>
								</header>

							<!-- Banner -->
								<section id="banner">
									<span class="image object">
                                       				        <img src="images/pic001.jpg" width="100px" padding-bottom="71.4%" alt=""/>
                                    					</span>
									<p>
										Ph.D. student,<br />
										Shanghai Jiao Tong University.<br />
										800 Dongchuan RD. Minhang District,<br />
										Shanghai, China.
									</p>
								</section>

							<!-- Section -->
							<section>
								<header class="major">
									<h2>Biography</h2>
								</header>
								<p>
								    	I am a Ph.D. student at Shanghai Jiao Tong University (SJTU), a member of <a href="https://x-lance.sjtu.edu.cn/">Cross Media (X-)Language Intelligence Lab (X-LANCE)</a> in the Department of Computer Science and Engineering, supervised by Prof. <a href="https://chenxie95.github.io/">Xie Chen</a>, and under the leadership of Prof. <a href="https://x-lance.sjtu.edu.cn/members/kai_yu">Kai Yu</a>. As the second Ph.D. student supervised by Prof. Chen, I am dedicating these 5 years to contribute to the speech processing field.
								</p>
								<p>
									I worked at Xiaomi AI lab as an algorithm engineer intern during my senior undergraduate year, developing <a href="https://github.com/k2-fsa">the Next-gen Kaldi</a> under the leadership of <a href="http://danielpovey.com/">Daniel Povey</a>.
								</p>
								<p>
									My recent work focuses on the following research topics. If you would like to discuss anything, please feel free to contact me.
								</p>
								<ul>
									<li>
										<p>Speech representation learning from continuous to discrete</p>
									</li>
									<li>
										<p>Low-resource languages speech recognition with in-the-wild data</p>
									</li>
									<li>
										<p>Optimizing key issues in end-to-end speech recognition</p>
									</li>
								</ul>

								<h3>Education</h3>
								<ul>
									<li>
										<p>Ph.D., Computer Science and Technology, Shanghai Jiao Tong University, 2023.09-</p>
									</li>
									<li>
										<p>B.E., Computer Science and Technology, Tianjin University, 2019.09-2023.07</p>
										<p>GPA: 3.91/4.0, Rank: 1/139. [<a href="https://yfyeung.github.io/CV/Transcript-en-undergraduate.pdf">Transcript</a>]</p>
									</li>
								</ul>

								<h3>Experiences</h3>
								<ul>
									<li>
										<p>Research Intern, CS SPEECH & TRANSLATION Group, <a href="https://www.msra.cn">Microsoft Research Asia (MSRA)</a>, 2024.03.05-2024.12.09</p>
										<p>Co-supervised by <a href="https://scholar.google.com/citations?user=6mNya-wAAAAJ&hl=en">Shujie Liu</a> and <a href="https://scholar.google.com/citations?user=grUvupMAAAAJ&hl=en">Jinyu Li</a>.</p>
										<p>Investigate advanced zero-shot text-to-speech and streaming text-to-speech.</p>
									</li>
								</ul>
								<ul>
									<li>
										<p>Machine Learning Engineer Intern, The Next-gen Kaldi Team, Xiaomi AI Lab, 2022.11.01-2023.08.28</p>
										<p>Investigate advanced and efficient open-source E2E Automatic Speech Recognition.</p>
										<p>Develop <a href="https://github.com/k2-fsa">the Next-gen Kaldi</a>, including <a href="https://github.com/k2-fsa/icefall">Icefall</a>, <a href="https://github.com/lhotse-speech/lhotse">Lhotse</a>, <a href="https://github.com/k2-fsa/k2">k2</a>.</p>
										<p>Supervised by <a href="http://danielpovey.com/">Daniel Povey</a>.</p>
									</li>
								</ul>

								<h3>News</h3>
								<ul>
									<li>
										<p>[2024.06] 3 papers are accepted by INTERSPEECH2024.</p>
									</li>
									<li>
										<p>[2024.03] I join CS SPEECH & TRANSLATION group in <a href="https://www.msra.cn">Microsoft Research Asia (MSRA)</a>.</p>
									</li>
									<li>
										<p>[2024.01] <a href="https://arxiv.org/pdf/2310.11230.pdf">Zipformer</a> is accepted for <span style="color:red; font-weight:bold;">oral</span> presentation by ICLR2024. Congratulations!</p>
									</li>
									<li>
										<p>[2023.12] 3 papers are accepted by ICASSP2024.</p>
									</li>
									<li>
									        <p>[2023.09] I start to pursue my Ph.D. at Shanghai Jiao Tong University.</p>
									</li>
									<li>
										<p>[2023.06] I earn my Bachelor's degree in engineering with an excellent student title.</p>
									</li>
									<li>
										<p>[2023.05] 2 papers are accepted by INTERSPEECH2023.</p>
									</li>
									<li>
										<p>[2022.11] I join the Next-gen Kaldi team in Xiaomi.</p>
									</li>
									<li>
										<p>[2022.06] I join <a href="https://x-lance.sjtu.edu.cn/">X-LANCE</a>.</p>
									</li>
								</ul>

								<header class="major">
									<h2>Research</h2>
								</header>
								
								<h3>Selected Publications</h3>
								<p>Check out full publications on <a href="https://scholar.google.com/citations?hl=zh-CN&user=slhAlQ0AAAAJ">Google Scholar</a>.</p>
								<h4>Efficient End-to-end Speech Recognition</h4>
								<ul>
									<li>
										<p><a href="https://arxiv.org/pdf/2310.11230.pdf">Zipformer: A faster and better encoder for automatic speech recognition</a></p>
										<p>Zengwei Yao, Liyong Guo, Xiaoyu Yang, Wei Kang, Fangjun Kuang, <b>Yifan Yang</b>, Zengrui Jin, Long Lin, Daniel Povey</p>
										<p><span style="color:red; font-weight:bold;">Oral</span> in Proc. ICLR, 2024</p>
									</li>
									<li>
										<p><a href="https://www.isca-speech.org/archive/pdfs/interspeech_2023/yang23l_interspeech.pdf">Blank-regularized CTC for Frame Skipping in Neural Transducer</a></p>
										<p><b>Yifan Yang</b>, Xiaoyu Yang, Liyong Guo, Zengwei Yao, Wei Kang, Fangjun Kuang, Long Lin, Xie Chen, Daniel Povey</p>
										<p>Proc. Interspeech, 2023</p>
									</li>
									<li>
										<p><a href="https://arxiv.org/pdf/2309.07414.pdf">PromptASR for contextualized ASR with controllable style</a></p>
										<p>Xiaoyu Yang, Wei Kang, Zengwei Yao, <b>Yifan Yang</b>, Liyong Guo, Fangjun Kuang, Long Lin, Daniel Povey</p>
										<p><span style="color:red; font-weight:bold;">Oral</span> in Proc. ICASSP, 2024</p>
									</li>
								</ul>
								<h4>Speech Recognition Dataset</h4>
								<ul>
								        <li>
										<p><a href="https://arxiv.org/pdf/2406.11546">GigaSpeech 2: An Evolving, Large-Scale and Multi-domain ASR Corpus for Low-Resource Languages with Automated Crawling, Transcription and Refinement</a></p>
										<b>Yifan Yang</b>, Zheshu Song, Jianheng Zhuo, Mingyu Cui, Jinpeng Li, Bo Yang, Yexing Du, Ziyang Ma, Xunying Liu, Ziyuan Wang, Ke Li, Shuai Fan, Kai Yu, Wei-Qiang Zhang, Guoguo Chen, Xie Chen</p>
										<p>Preprint in arXiv, 2024</p>
										<p>GigaSpeech 2 powers <a href="https://blog.opentyphoon.ai/typhoon-audio-preview-release-6fbb3f938287">Typhoon-Audio</a>, which represents the state-of-the-art open-source audio language model for Thai tasks.</p>
									        <p>[<a href="https://huggingface.co/datasets/speechcolab/gigaspeech2">Dataset</a>] [<a href="https://github.com/SpeechColab/GigaSpeech2">Code</a>]</p>
									</li>
									<li>
										<p><a href="https://www.isca-archive.org/interspeech_2024/jin24_interspeech.pdf">LibriheavyMix: A 20,000-Hour Dataset for Single-Channel Reverberant Multi-Talker Speech Separation, ASR and Speaker Diarization</a></p>
										<p>Zengrui Jin*, <b>Yifan Yang*</b>, Mohan Shi*, Wei Kang, Xiaoyu Yang, Zengwei Yao, Fangjun Kuang, Liyong Guo, Lingwei Meng, Long Lin, Yong Xu, Shi-Xiong Zhang, Daniel Povey</p>
										<p><span style="color:red; font-weight:bold;">Oral</span> in Proc. INTERSPEECH, 2024</p>
										<p>[<a href="https://huggingface.co/zrjin?search_datasets=libriheavymix">Dataset</a>]</p>
									</li>
									<li>
										<p><a href="https://arxiv.org/pdf/2309.08105.pdf">Libriheavy: a 50,000 hours ASR corpus with punctuation casing and context</a></p>
										<p>Wei Kang, Xiaoyu Yang, Zengwei Yao, Fangjun Kuang, <b>Yifan Yang</b>, Liyong Guo, Long Lin, Daniel Povey</p>
										<p><span style="color:red; font-weight:bold;">Oral</span> in Proc. ICASSP, 2024</p>
										<p>[<a href="https://huggingface.co/datasets/pkufool/libriheavy">Dataset</a>] [<a href="https://github.com/k2-fsa/libriheavy">Code</a>]</p>
									</li>
								</ul>
								<h4>Discretized Speech Representation</h4>
								<ul>
									<li>
										<p><a href="https://arxiv.org/pdf/2309.07377.pdf">Towards Universal Speech Discrete Tokens: A Case Study for ASR and TTS</a></p>
										<p><b>Yifan Yang</b>, Feiyu Shen, Chenpeng Du, Ziyang Ma, Kai Yu, Daniel Povey, Xie Chen</p>
										<p><span style="color:red; font-weight:bold;">Oral</span> in Proc. ICASSP, 2024</p>
									</li>
								</ul>
								
								<h3>Open-Source Projects</h3>
								<ul>
									<li>
										<p><a href="https://github.com/k2-fsa/icefall">Icefall: The recipes of the Next-gen Kaldi</a></p>
									</li>
									<li>
										<p><a href="https://github.com/lhotse-speech/lhotse">Lhotse: Tools for handling speech data in machine learning projects</a></p>
									</li>
								</ul>

								<h3>Competitions</h3>
								<ul>
									<li>
										 <p>Ranked 7/36 in <a href="https://icmcasr.org/">ICASSP2024 ICMC-ASR Grand Challenge</a> Track I, 2023.12</p>
									</li>
								</ul>

								<h3>Awards</h3>
								<ul>
									<li>
										<p>Chu Xin Scholarship, Tianjin University, 2022</p>
									</li>
									<li>
										<p><a href="http://www.bsef.baosteel.com/#/aboutus">Baosteel Scholarship</a>, Baosteel Education Foundation, 2021</p>
									</li>
									<li>
										<p>"Bingchang Zhuang" Scholarship, Tianjin University, 2020</p>
									</li>
								</ul>
								
								<h3>Academic Service</h3>
								<ul>
									<li>
										<p>[Conference Reviewer]  The Thirteenth International Conference on Learning Representations (ICLR 2025)</p>
									</li>
									<li>
										<p>[Conference Reviewer]  International Conference on Computational Linguistics (COLING 2025, LREC-COLING 2024)</p>
									</li>
									<li>
										<p>[Conference Reviewer]  2024 IEEE Spoken Language Technology Workshop (SLT 2024)</p>
									</li>
									<li>
										<p>[Conference Reviewer]  International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2025, 2024)</p>
									</li>
									<li>
										<p>[Conference Reviewer]  ACL Rolling Review (ACL ARR 2024 October, 2024 June, 2023 October)</p>
									</li>
									<li>
										<p>[Conference Reviewer]  The 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP 2022)</p>
									</li>
								</ul>

								<h3>Teaching Assistance</h3>
								<ul>
									<li>
										<p>SJTU CS1501 Programming</p>
									</li>
								</ul>

								<ul>
									<a href='https://clustrmaps.com/site/1but5'  title='Visit tracker'><img src='//clustrmaps.com/map_v2.png?cl=ffffff&w=a&t=tt&d=xmZZKuR9JgwM-nnqvhx7hQETXCchJo7zQhRldlQGf6s'/></a>
								</ul>
							</section>
						</div>
					</div>

				<!-- Sidebar -->
					<div id="sidebar">
						<div class="inner">

							<!-- Menu -->
								<nav id="menu">
									<header class="major">
										<h2>Menu</h2>
									</header>
									<ul>
										<li><a href="index.html">Homepage</a></li>
									</ul>
								</nav>

							<!-- Section -->
								<nav id="menu">
									<header class="major">
										<h2>About me</h2>
									</header>
									<ul>
										<li><a href="https://scholar.google.com/citations?hl=zh-CN&user=slhAlQ0AAAAJ">Scholar</a></li>
										<li><a href="https://github.com/yfyeung/">GitHub</a></li>
										<li><a href="https://huggingface.co/yfyeung">Huggingface</a></li>
										<li><a href="https://www.linkedin.com/in/yifan-yang-290ba624b/">LinkedIn</a></li>
									</ul>
								</nav>

							<!-- Section -->
								<section>
									<header class="major">
										<h2>Get in touch</h2>
									</header>
									<ul class="contact">
										<li class="icon solid fa-envelope"><a href="mailto:yifanyeung@sjtu.edu.cn">yifanyeung@sjtu.edu.cn</a></li>
										<li class="icon brands fa-weixin"><a href="images/wechat.JPG">WeChat</a></li>
									</ul>
								</section>

							<!-- Footer -->
								<footer id="footer">
									<p class="copyright">&copy; All rights reserved. Demo Images: <a href="https://unsplash.com">Unsplash</a>. Design: <a href="https://html5up.net">HTML5 UP</a>.</p>
								</footer>

						</div>
					</div>
			</div>
		
		
		<!-- Scripts -->
			<script src="assets/js/jquery.min.js"></script>
			<script src="assets/js/browser.min.js"></script>
			<script src="assets/js/breakpoints.min.js"></script>
			<script src="assets/js/util.js"></script>
			<script src="assets/js/main.js"></script>

	</body>
</html>