-
Notifications
You must be signed in to change notification settings - Fork 97
/
d2ltvm.bib
111 lines (101 loc) · 3.82 KB
/
d2ltvm.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
@Article{ Chen.Li.Li.ea.2015,
title = {MXNet: A flexible and efficient machine learning library
for heterogeneous distributed systems},
author = {Chen, Tianqi and Li, Mu and Li, Yutian and Lin, Min and
Wang, Naiyan and Wang, Minjie and Xiao, Tianjun and Xu,
Bing and Zhang, Chiyuan and Zhang, Zheng},
journal = {arXiv preprint arXiv:1512.01274},
year = {2015}
}
@InProceedings{ Chen.Moreau.Jiang.ea.2018,
title = {TVM: An automated end-to-end optimizing compiler for deep
learning},
author = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and
Zheng, Lianmin and Yan, Eddie and Shen, Haichen and Cowan,
Meghan and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and
others},
booktitle = {13th USENIX Symposium on Operating Systems Design and
Implementation (OSDI 18)},
pages = {578--594},
year = {2018}
}
@Article{ Roesch.Lyubomirsky.Kirisame.ea.2019,
title = {Relay: A High-Level IR for Deep Learning},
author = {Roesch, Jared and Lyubomirsky, Steven and Kirisame, Marisa
and Pollock, Josh and Weber, Logan and Jiang, Ziheng and
Chen, Tianqi and Moreau, Thierry and Tatlock, Zachary},
journal = {arXiv preprint arXiv:1904.08368},
year = {2019}
}
@InProceedings{ Wang.Chen.Liu.ea.2019,
title = {A Unified Optimization Approach for CNN Model Inference on
Integrated GPUs},
author = {Wang, Leyuan and Chen, Zhi and Liu, Yizhi and Wang, Yao
and Zheng, Lianmin and Li, Mu and Wang, Yida},
booktitle = {Proceedings of the 48th International Conference on
Parallel Processing},
pages = {99},
year = {2019},
organization = {ACM}
}
@InProceedings{ Liu.Wang.Yu.ea.2019,
title = {Optimizing CNN Model Inference on CPUs},
author = {Liu, Yizhi and Wang, Yao and Yu, Ruofei and Li, Mu and
Sharma, Vin and Wang, Yida},
booktitle = {2019 USENIX Annual Technical Conference (USENIX ATC 19)},
pages = {1025--1040},
year = {2019}
}
@InProceedings{ Jiang.Chen.Li.2018,
title = {Efficient Deep Learning Inference on Edge Devices},
author = {Jiang, Ziheng and Chen, Tianqi and Li, Mu},
booktitle = {SysML Conference},
year = {2018}
}
@InProceedings{ Lai.Seznec.2013,
title = {Performance upper bound analysis and optimization of SGEMM
on Fermi and Kepler GPUs},
author = {Lai, Junjie and Seznec, Andre},
booktitle = {Proceedings of the 2013 IEEE/ACM International Symposium
on Code Generation and Optimization (CGO)},
pages = {1--10},
year = {2013},
organization = {IEEE}
}
@Article{ Nath.Tomov.Dongarra.2010,
title = {An improved MAGMA GEMM for Fermi graphics processing
units},
author = {Nath, Rajib and Tomov, Stanimire and Dongarra, Jack},
journal = {The International Journal of High Performance Computing
Applications},
volume = {24},
number = {4},
pages = {511--515},
year = {2010},
publisher = {SAGE Publications Sage UK: London, England}
}
@InProceedings{ Ragan-Kelley.Barnes.Adams.ea.2013,
author = {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams,
Andrew and Paris, Sylvain and Durand, Fr{\'e}do and
Amarasinghe, Saman},
title = {Halide: A Language and Compiler for Optimizing
Parallelism, Locality, and Recomputation in Image
Processing Pipelines},
booktitle = {Proceedings of the 34th ACM SIGPLAN Conference on
Programming Language Design and Implementation},
series = {PLDI '13},
year = {2013},
location = {Seattle, Washington, USA},
pages = {519--530},
numpages = {12},
publisher = {ACM}
}
@Article{ Howard.Zhu.Chen.ea.2017,
title = {Mobilenets: Efficient convolutional neural networks for
mobile vision applications},
author = {Howard, Andrew G and Zhu, Menglong and Chen, Bo and
Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias
and Andreetto, Marco and Adam, Hartwig},
journal = {arXiv preprint arXiv:1704.04861},
year = {2017}
}