d2ltvm.bib


@Article{	  Chen.Li.Li.ea.2015,
  title		= {MXNet: A flexible and efficient machine learning library
		  for heterogeneous distributed systems},
  author	= {Chen, Tianqi and Li, Mu and Li, Yutian and Lin, Min and
		  Wang, Naiyan and Wang, Minjie and Xiao, Tianjun and Xu,
		  Bing and Zhang, Chiyuan and Zhang, Zheng},
  journal	= {arXiv preprint arXiv:1512.01274},
  year		= {2015}
}

@InProceedings{	  Chen.Moreau.Jiang.ea.2018,
  title		= {TVM: An automated end-to-end optimizing compiler for deep
		  learning},
  author	= {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and
		  Zheng, Lianmin and Yan, Eddie and Shen, Haichen and Cowan,
		  Meghan and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and
		  others},
  booktitle	= {13th USENIX Symposium on Operating Systems Design and
		  Implementation (OSDI 18)},
  pages		= {578--594},
  year		= {2018}
}

@Article{	  Roesch.Lyubomirsky.Kirisame.ea.2019,
  title		= {Relay: A High-Level IR for Deep Learning},
  author	= {Roesch, Jared and Lyubomirsky, Steven and Kirisame, Marisa
		  and Pollock, Josh and Weber, Logan and Jiang, Ziheng and
		  Chen, Tianqi and Moreau, Thierry and Tatlock, Zachary},
  journal	= {arXiv preprint arXiv:1904.08368},
  year		= {2019}
}

@InProceedings{	  Wang.Chen.Liu.ea.2019,
  title		= {A Unified Optimization Approach for CNN Model Inference on
		  Integrated GPUs},
  author	= {Wang, Leyuan and Chen, Zhi and Liu, Yizhi and Wang, Yao
		  and Zheng, Lianmin and Li, Mu and Wang, Yida},
  booktitle	= {Proceedings of the 48th International Conference on
		  Parallel Processing},
  pages		= {99},
  year		= {2019},
  organization	= {ACM}
}

@InProceedings{	  Liu.Wang.Yu.ea.2019,
  title		= {Optimizing CNN Model Inference on CPUs},
  author	= {Liu, Yizhi and Wang, Yao and Yu, Ruofei and Li, Mu and
		  Sharma, Vin and Wang, Yida},
  booktitle	= {2019 USENIX Annual Technical Conference (USENIX ATC 19)},
  pages		= {1025--1040},
  year		= {2019}
}

@InProceedings{	  Jiang.Chen.Li.2018,
  title		= {Efficient Deep Learning Inference on Edge Devices},
  author	= {Jiang, Ziheng and Chen, Tianqi and Li, Mu},
  booktitle	= {SysML Conference},
  year		= {2018}
}

@InProceedings{	  Lai.Seznec.2013,
  title		= {Performance upper bound analysis and optimization of SGEMM
		  on Fermi and Kepler GPUs},
  author	= {Lai, Junjie and Seznec, Andre},
  booktitle	= {Proceedings of the 2013 IEEE/ACM International Symposium
		  on Code Generation and Optimization (CGO)},
  pages		= {1--10},
  year		= {2013},
  organization	= {IEEE}
}

@Article{	  Nath.Tomov.Dongarra.2010,
  title		= {An improved MAGMA GEMM for Fermi graphics processing
		  units},
  author	= {Nath, Rajib and Tomov, Stanimire and Dongarra, Jack},
  journal	= {The International Journal of High Performance Computing
		  Applications},
  volume	= {24},
  number	= {4},
  pages		= {511--515},
  year		= {2010},
  publisher	= {SAGE Publications Sage UK: London, England}
}

@InProceedings{	  Ragan-Kelley.Barnes.Adams.ea.2013,
  author	= {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams,
		  Andrew and Paris, Sylvain and Durand, Fr{\'e}do and
		  Amarasinghe, Saman},
  title		= {Halide: A Language and Compiler for Optimizing
		  Parallelism, Locality, and Recomputation in Image
		  Processing Pipelines},
  booktitle	= {Proceedings of the 34th ACM SIGPLAN Conference on
		  Programming Language Design and Implementation},
  series	= {PLDI '13},
  year		= {2013},
  location	= {Seattle, Washington, USA},
  pages		= {519--530},
  numpages	= {12},
  publisher	= {ACM}
}

@Article{	  Howard.Zhu.Chen.ea.2017,
  title		= {Mobilenets: Efficient convolutional neural networks for
		  mobile vision applications},
  author	= {Howard, Andrew G and Zhu, Menglong and Chen, Bo and
		  Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias
		  and Andreetto, Marco and Adam, Hartwig},
  journal	= {arXiv preprint arXiv:1704.04861},
  year		= {2017}
}