@InProceedings{ Ahmed.Aly.Gonzalez.ea.2012,
title = {Scalable inference in latent variable models},
author = {Ahmed, Amr and Aly, Moahmed and Gonzalez, Joseph and
Narayanamurthy, Shravan and Smola, Alexander J},
booktitle = {Proceedings of the fifth ACM international conference on
Web search and data mining},
pages = {123--132},
year = {2012},
organization = {ACM}
@Article{ Aji.McEliece.2000,
title = {The generalized distributive law},
author = {Aji, Srinivas M and McEliece, Robert J},
journal = {IEEE transactions on Information Theory},
volume = {46},
number = {2},
pages = {325--343},
year = {2000},
publisher = {IEEE}
@Article{ Ba.Kiros.Hinton.2016,
title = {Layer normalization},
author = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey
journal = {arXiv preprint arXiv:1607.06450},
year = {2016}
@Article{ Bahdanau.Cho.Bengio.2014,
title = {Neural machine translation by jointly learning to align
and translate},
author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
journal = {arXiv preprint arXiv:1409.0473},
year = {2014}
@InProceedings{ Bay.Tuytelaars.Van-Gool.2006,
title = {Surf: Speeded up robust features},
author = {Bay, Herbert and Tuytelaars, Tinne and Van Gool, Luc},
booktitle = {European conference on computer vision},
pages = {404--417},
year = {2006},
organization = {Springer}
@Article{ Bengio.Ducharme.Vincent.ea.2003,
title = {A neural probabilistic language model},
author = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent,
Pascal and Jauvin, Christian},
journal = {Journal of machine learning research},
volume = {3},
number = {Feb},
pages = {1137--1155},
year = {2003}
@Article{ Bishop.1995,
title = {Training with noise is equivalent to Tikhonov
author = {Bishop, Chris M},
journal = {Neural computation},
volume = {7},
number = {1},
pages = {108--116},
year = {1995},
publisher = {MIT Press}
@Book{ Bishop.2006,
title = {Pattern recognition and machine learning},
author = {Bishop, Christopher M},
year = {2006},
publisher = {springer}
@Article{ Bojanowski.Grave.Joulin.ea.2017,
title = {Enriching word vectors with subword information},
author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand
and Mikolov, Tomas},
journal = {Transactions of the Association for Computational
volume = {5},
pages = {135--146},
year = {2017},
publisher = {MIT Press}
@Book{ Bollobas.1999,
title = {Linear analysis},
author = {Bollob{\'a}s, B},
year = {1999},
publisher = {Cambridge University Press, Cambridge}
@Article{ Bowman.Angeli.Potts.ea.2015,
title = {A large annotated corpus for learning natural language
author = {Bowman, Samuel R and Angeli, Gabor and Potts, Christopher
and Manning, Christopher D},
journal = {arXiv preprint arXiv:1508.05326},
year = {2015}
@Book{ Boyd.Vandenberghe.2004,
address = {Cambridge, England},
author = {Stephen Boyd and Lieven Vandenberghe},
publisher = {Cambridge University Press},
title = {Convex Optimization},
year = 2004
@InProceedings{ Brown.Cocke.Della-Pietra.ea.1988,
title = {A statistical approach to language translation},
author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
and Della Pietra, Vincent J and Jelinek, Frederick and
Mercer, Robert L and Roossin, Paul},
booktitle = {Coling Budapest 1988 Volume 1: International Conference on
Computational Linguistics},
year = {1988}
@Article{ Brown.Cocke.Della-Pietra.ea.1990,
title = {A statistical approach to machine translation},
author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
and Della Pietra, Vincent J and Jelinek, Frederick and
Lafferty, John and Mercer, Robert L and Roossin, Paul S},
journal = {Computational linguistics},
volume = {16},
number = {2},
pages = {79--85},
year = {1990}
@InProceedings{ Brown.Sandholm.2017,
title = {Libratus: The Superhuman AI for No-Limit Poker.},
author = {Brown, Noam and Sandholm, Tuomas},
booktitle = {IJCAI},
pages = {5226--5228},
year = {2017}
@Article{ Campbell.Hoane-Jr.Hsu.2002,
title = {Deep blue},
author = {Campbell, Murray and Hoane Jr, A Joseph and Hsu,
journal = {Artificial intelligence},
volume = {134},
number = {1-2},
pages = {57--83},
year = {2002},
publisher = {Elsevier}
@InCollection{ Canny.1987,
title = {A computational approach to edge detection},
author = {Canny, John},
booktitle = {Readings in computer vision},
pages = {184--203},
year = {1987},
publisher = {Elsevier}
@InProceedings{ Cer.Diab.Agirre.ea.2017,
title = {SemEval-2017 Task 1: Semantic Textual Similarity
Multilingual and Crosslingual Focused Evaluation},
author = {Cer, Daniel and Diab, Mona and Agirre, Eneko and
Lopez-Gazpio, I{\~n}igo and Specia, Lucia},
booktitle = {Proceedings of the 11th International Workshop on Semantic
Evaluation (SemEval-2017)},
pages = {1--14},
year = {2017}
@InProceedings{ Cheng.Dong.Lapata.2016,
title = {Long Short-Term Memory-Networks for Machine Reading},
author = {Cheng, Jianpeng and Dong, Li and Lapata, Mirella},
booktitle = {Proceedings of the 2016 Conference on Empirical Methods in
Natural Language Processing},
pages = {551--561},
year = {2016}
@Article{ Cho.Van-Merrienboer.Bahdanau.ea.2014,
title = {On the properties of neural machine translation:
Encoder-decoder approaches},
author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau,
Dzmitry and Bengio, Yoshua},
journal = {arXiv preprint arXiv:1409.1259},
year = {2014}
@Article{ Cho.Van-Merrienboer.Gulcehre.ea.2014,
title = {Learning phrase representations using RNN encoder-decoder
for statistical machine translation},
author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre,
Caglar and Bahdanau, Dzmitry and Bougares, Fethi and
Schwenk, Holger and Bengio, Yoshua},
journal = {arXiv preprint arXiv:1406.1078},
year = {2014}
@Book{ Chowdhury.2010,
title = {Introduction to modern information retrieval},
author = {Chowdhury, Gobinda G},
year = {2010},
publisher = {Facet publishing}
@Article{ Chung.Gulcehre.Cho.ea.2014,
title = {Empirical evaluation of gated recurrent neural networks on
sequence modeling},
author = {Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun
and Bengio, Yoshua},
journal = {arXiv preprint arXiv:1412.3555},
year = {2014}
@Article{ Csiszar.2008,
title = {Axiomatic characterizations of information measures},
author = {Csisz{\'a}r, Imre},
journal = {Entropy},
volume = {10},
number = {3},
pages = {261--273},
year = {2008},
publisher = {Molecular Diversity Preservation International}
@InProceedings{ Dalal.Triggs.2005,
title = {Histograms of oriented gradients for human detection},
author = {Dalal, Navneet and Triggs, Bill},
booktitle = {2005 IEEE computer society conference on computer vision
and pattern recognition (CVPR'05)},
volume = {1},
pages = {886--893},
year = {2005},
organization = {IEEE}
@Article{ De-Cock.2011,
title = {Ames, Iowa: Alternative to the Boston housing data as an
end of semester regression project},
author = {De Cock, Dean},
journal = {Journal of Statistics Education},
volume = {19},
number = {3},
year = {2011},
publisher = {Taylor \& Francis}
@InProceedings{ DeCandia.Hastorun.Jampani.ea.2007,
title = {Dynamo: Amazon's highly available key-value store},
author = {DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan
and Kakulapati, Gunavardhan and Lakshman, Avinash and
Pilchin, Alex and Sivasubramanian, Swaminathan and
Vosshall, Peter and Vogels, Werner},
booktitle = {ACM SIGOPS operating systems review},
volume = {41},
number = {6},
pages = {205--220},
year = {2007},
organization = {ACM}
@Article{ Devlin.Chang.Lee.ea.2018,
title = {Bert: Pre-training of deep bidirectional transformers for
language understanding},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
Toutanova, Kristina},
journal = {arXiv preprint arXiv:1810.04805},
year = {2018}
@InProceedings{ Dosovitskiy.Beyer.Kolesnikov.ea.2021,
title = {An image is worth 16x16 words: Transformers for image
recognition at scale},
author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov,
Alexander and Weissenborn, Dirk and Zhai, Xiaohua and
Unterthiner, Thomas and Dehghani, Mostafa and Minderer,
Matthias and Heigold, Georg and Gelly, Sylvain and others},
booktitle = {International Conference on Learning Representations},
year = {2021}
@InCollection{ Doucet.De-Freitas.Gordon.2001,
title = {An introduction to sequential Monte Carlo methods},
author = {Doucet, Arnaud and De Freitas, Nando and Gordon, Neil},
booktitle = {Sequential Monte Carlo methods in practice},
pages = {3--14},
year = {2001},
publisher = {Springer}
@Article{ Duchi.Hazan.Singer.2011,
title = {Adaptive subgradient methods for online learning and
stochastic optimization},
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
journal = {Journal of Machine Learning Research},
volume = {12},
number = {Jul},
pages = {2121--2159},
year = {2011}
@Article{ Dumoulin.Visin.2016,
title = {A guide to convolution arithmetic for deep learning},
author = {Dumoulin, Vincent and Visin, Francesco},
journal = {arXiv preprint arXiv:1603.07285},
year = {2016}
@Article{ Edelman.Ostrovsky.Schwarz.2007,
title = {Internet advertising and the generalized second-price
auction: Selling billions of dollars worth of keywords},
author = {Edelman, Benjamin and Ostrovsky, Michael and Schwarz,
journal = {American economic review},
volume = {97},
number = {1},
pages = {242--259},
year = {2007}
@InProceedings{ Flammarion.Bach.2015,
title = {From averaging to acceleration, there is only a
author = {Flammarion, Nicolas and Bach, Francis},
booktitle = {Conference on Learning Theory},
pages = {658--695},
year = {2015}
@InProceedings{ Gatys.Ecker.Bethge.2016,
title = {Image style transfer using convolutional neural networks},
author = {Gatys, Leon A and Ecker, Alexander S and Bethge,
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {2414--2423},
year = {2016}
@Article{ Ginibre.1965,
title = {Statistical ensembles of complex, quaternion, and real
author = {Ginibre, Jean},
journal = {Journal of Mathematical Physics},
volume = {6},
number = {3},
pages = {440--449},
year = {1965},
publisher = {AIP}
@InProceedings{ Girshick.2015,
title = {Fast r-cnn},
author = {Girshick, Ross},
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {1440--1448},
year = {2015}
@InProceedings{ Girshick.Donahue.Darrell.ea.2014,
title = {Rich feature hierarchies for accurate object detection and
semantic segmentation},
author = {Girshick, Ross and Donahue, Jeff and Darrell, Trevor and
Malik, Jitendra},
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {580--587},
year = {2014}
@InProceedings{ Glorot.Bengio.2010,
title = {Understanding the difficulty of training deep feedforward
neural networks},
author = {Glorot, Xavier and Bengio, Yoshua},
booktitle = {Proceedings of the thirteenth international conference on
artificial intelligence and statistics},
pages = {249--256},
year = {2010}
@Article{ Goh.2017,
author = {Goh, Gabriel},
title = {Why Momentum Really Works},
journal = {Distill},
year = {2017},
url = {http://distill.pub/2017/momentum},
doi = {10.23915/distill.00006}
@Article{ Goldberg.Nichols.Oki.ea.1992,
title = {Using collaborative filtering to weave an information
author = {Goldberg, David and Nichols, David and Oki, Brian M and
Terry, Douglas},
journal = {Communications of the ACM},
volume = {35},
number = {12},
pages = {61--71},
year = {1992},
publisher = {Association for Computing Machinery, Inc.}
@Book{ Goodfellow.Bengio.Courville.2016,
title = {Deep Learning},
author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher = {MIT Press},
note = {\url{http://www.deeplearningbook.org}},
year = {2016}
@InProceedings{ Goodfellow.Pouget-Abadie.Mirza.ea.2014,
title = {Generative adversarial nets},
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi
and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and
Courville, Aaron and Bengio, Yoshua},
booktitle = {Advances in neural information processing systems},
pages = {2672--2680},
year = {2014}
@Article{ Gotmare.Keskar.Xiong.ea.2018,
title = {A Closer Look at Deep Learning Heuristics: Learning rate
restarts, Warmup and Distillation},
author = {Gotmare, Akhilesh and Keskar, Nitish Shirish and Xiong,
Caiming and Socher, Richard},
journal = {arXiv preprint arXiv:1810.13243},
year = {2018}
@Article{ Graves.2013,
title = {Generating sequences with recurrent neural networks},
author = {Graves, Alex},
journal = {arXiv preprint arXiv:1308.0850},
year = {2013}
@Article{ Graves.Schmidhuber.2005,
title = {Framewise phoneme classification with bidirectional LSTM
and other neural network architectures},
author = {Graves, Alex and Schmidhuber, J{\"u}rgen},
journal = {Neural networks},
volume = {18},
number = {5-6},
pages = {602--610},
year = {2005},
publisher = {Elsevier}
@InCollection{ Gunawardana.Shani.2015,
title = {Evaluating recommender systems},
author = {Gunawardana, Asela and Shani, Guy},
booktitle = {Recommender systems handbook},
pages = {265--308},
year = {2015},
publisher = {Springer}
@InProceedings{ Guo.Tang.Ye.ea.2017,
title = {DeepFM: a factorization-machine based neural network for
CTR prediction},
author = {Guo, Huifeng and Tang, Ruiming and Ye, Yunming and Li,
Zhenguo and He, Xiuqiang},
booktitle = {Proceedings of the 26th International Joint Conference on
Artificial Intelligence},
pages = {1725--1731},
year = {2017},
organization = {AAAI Press}
@Article{ Hadjis.Zhang.Mitliagkas.ea.2016,
title = {Omnivore: An optimizer for multi-device deep learning on
cpus and gpus},
author = {Hadjis, Stefan and Zhang, Ce and Mitliagkas, Ioannis and
Iter, Dan and R{\'e}, Christopher},
journal = {arXiv preprint arXiv:1606.04487},
year = {2016}
@InProceedings{ Hazan.Rakhlin.Bartlett.2008,
title = {Adaptive online gradient descent},
author = {Hazan, Elad and Rakhlin, Alexander and Bartlett, Peter L},
booktitle = {Advances in Neural Information Processing Systems},
pages = {65--72},
year = {2008}
@InProceedings{ He.Chua.2017,
title = {Neural factorization machines for sparse predictive
author = {He, Xiangnan and Chua, Tat-Seng},
booktitle = {Proceedings of the 40th International ACM SIGIR conference
on Research and Development in Information Retrieval},
pages = {355--364},
year = {2017},
organization = {ACM}
@InProceedings{ He.Gkioxari.Dollar.ea.2017,
title = {Mask r-cnn},
author = {He, Kaiming and Gkioxari, Georgia and Doll{\'a}r, Piotr
and Girshick, Ross},
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {2961--2969},
year = {2017}
@InProceedings{ He.Liao.Zhang.ea.2017,
title = {Neural collaborative filtering},
author = {He, Xiangnan and Liao, Lizi and Zhang, Hanwang and Nie,
Liqiang and Hu, Xia and Chua, Tat-Seng},
booktitle = {Proceedings of the 26th international conference on world
wide web},
pages = {173--182},
year = {2017},
organization = {International World Wide Web Conferences Steering
@InProceedings{ He.Zhang.Ren.ea.2015,
title = {Delving deep into rectifiers: Surpassing human-level
performance on imagenet classification},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {1026--1034},
year = {2015}
@InProceedings{ He.Zhang.Ren.ea.2016,
title = {Deep residual learning for image recognition},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {770--778},
year = {2016}
@InProceedings{ He.Zhang.Ren.ea.2016*1,
title = {Identity mappings in deep residual networks},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
booktitle = {European conference on computer vision},
pages = {630--645},
year = {2016},
organization = {Springer}
@Book{ Hebb.Hebb.1949,
title = {The organization of behavior},
author = {Hebb, Donald Olding and Hebb, DO},
volume = {65},
year = {1949},
publisher = {Wiley New York}
@Article{ Hendrycks.Gimpel.2016,
title = {Gaussian error linear units (gelus)},
author = {Hendrycks, Dan and Gimpel, Kevin},
journal = {arXiv preprint arXiv:1606.08415},
year = {2016}
@Book{ Hennessy.Patterson.2011,
title = {Computer architecture: a quantitative approach},
author = {Hennessy, John L and Patterson, David A},
year = {2011},
publisher = {Elsevier}
@InProceedings{ Herlocker.Konstan.Borchers.ea.1999,
title = {An algorithmic framework for performing collaborative
author = {Herlocker, Jonathan L and Konstan, Joseph A and Borchers,
Al and Riedl, John},
booktitle = {22nd Annual International ACM SIGIR Conference on Research
and Development in Information Retrieval, SIGIR 1999},
pages = {230--237},
year = {1999},
organization = {Association for Computing Machinery, Inc}
@Article{ Hidasi.Karatzoglou.Baltrunas.ea.2015,
title = {Session-based recommendations with recurrent neural
author = {Hidasi, Bal{\'a}zs and Karatzoglou, Alexandros and
Baltrunas, Linas and Tikk, Domonkos},
journal = {arXiv preprint arXiv:1511.06939},
year = {2015}
@Misc{ Hochreiter.Bengio.Frasconi.ea.2001,
title = {Gradient flow in recurrent nets: the difficulty of
learning long-term dependencies},
author = {Hochreiter, Sepp and Bengio, Yoshua and Frasconi, Paolo
and Schmidhuber, J{\"u}rgen and others},
year = {2001},
publisher = {A field guide to dynamical recurrent neural networks. IEEE
@Article{ Hochreiter.Schmidhuber.1997,
title = {Long short-term memory},
author = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
journal = {Neural computation},
volume = {9},
number = {8},
pages = {1735--1780},
year = {1997},
publisher = {MIT Press}
@InProceedings{ Hoyer.Janzing.Mooij.ea.2009,
title = {Nonlinear causal discovery with additive noise models},
author = {Hoyer, Patrik O and Janzing, Dominik and Mooij, Joris M
and Peters, Jonas and Sch{\"o}lkopf, Bernhard},
booktitle = {Advances in neural information processing systems},
pages = {689--696},
year = {2009}
@InProceedings{ Hu.Koren.Volinsky.2008,
title = {Collaborative filtering for implicit feedback datasets},
author = {Hu, Yifan and Koren, Yehuda and Volinsky, Chris},
booktitle = {2008 Eighth IEEE International Conference on Data Mining},
pages = {263--272},
year = {2008},
organization = {Ieee}
@Article{ Hu.Lee.Aggarwal.2020,
title = {Text Style Transfer: A Review and Experiment Evaluation},
author = {Hu, Zhiqiang and Lee, Roy Ka-Wei and Aggarwal, Charu C},
journal = {arXiv preprint arXiv:2010.12742},
year = {2020}
@InProceedings{ Hu.Shen.Sun.2018,
title = {Squeeze-and-excitation networks},
author = {Hu, Jie and Shen, Li and Sun, Gang},
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {7132--7141},
year = {2018}
@InProceedings{ Huang.Liu.Van-Der-Maaten.ea.2017,
title = {Densely connected convolutional networks},
author = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and
Weinberger, Kilian Q},
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {4700--4708},
year = {2017}
@InProceedings{ Ioffe.2017,
title = {Batch renormalization: Towards reducing minibatch
dependence in batch-normalized models},
author = {Ioffe, Sergey},
booktitle = {Advances in neural information processing systems},
pages = {1945--1953},
year = {2017}
@Article{ Ioffe.Szegedy.2015,
title = {Batch normalization: Accelerating deep network training by
reducing internal covariate shift},
author = {Ioffe, Sergey and Szegedy, Christian},
journal = {arXiv preprint arXiv:1502.03167},
year = {2015}
@Article{ Izmailov.Podoprikhin.Garipov.ea.2018,
title = {Averaging weights leads to wider optima and better
author = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov,
Timur and Vetrov, Dmitry and Wilson, Andrew Gordon},
journal = {arXiv preprint arXiv:1803.05407},
year = {2018}
@Book{ Jaeger.2002,
title = {Tutorial on training recurrent neural networks, covering
BPPT, RTRL, EKF and the" echo state network" approach},
author = {Jaeger, Herbert},
volume = {5},
year = {2002},
publisher = {GMD-Forschungszentrum Informationstechnik Bonn}
@Book{ James.2007,
title = {The principles of psychology},
author = {James, William},
volume = {1},
year = {2007},
publisher = {Cosimo, Inc.}
@Article{ Jia.Song.He.ea.2018,
title = {Highly scalable deep learning training system with
mixed-precision: Training imagenet in four minutes},
author = {Jia, Xianyan and Song, Shutao and He, Wei and Wang,
Yangzihao and Rong, Haidong and Zhou, Feihu and Xie,
Liqiang and Guo, Zhenyu and Yang, Yuanzhou and Yu, Liwei
and others},
journal = {arXiv preprint arXiv:1807.11205},
year = {2018}
@InProceedings{ Jouppi.Young.Patil.ea.2017,
title = {In-datacenter performance analysis of a tensor processing
author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and
Patterson, David and Agrawal, Gaurav and Bajwa, Raminder
and Bates, Sarah and Bhatia, Suresh and Boden, Nan and
Borchers, Al and others},
booktitle = {2017 ACM/IEEE 44th Annual International Symposium on
Computer Architecture (ISCA)},
pages = {1--12},
year = {2017},
organization = {IEEE}
@Article{ Karras.Aila.Laine.ea.2017,
title = {Progressive growing of gans for improved quality,
stability, and variation},
author = {Karras, Tero and Aila, Timo and Laine, Samuli and
Lehtinen, Jaakko},
journal = {arXiv preprint arXiv:1710.10196},
year = {2017}
@Article{ Kim.2014,
title = {Convolutional neural networks for sentence
author = {Kim, Yoon},
journal = {arXiv preprint arXiv:1408.5882},
year = {2014}
@Article{ Kingma.Ba.2014,
title = {Adam: A method for stochastic optimization},
author = {Kingma, Diederik P and Ba, Jimmy},
journal = {arXiv preprint arXiv:1412.6980},
year = {2014}
@Book{ Koller.Friedman.2009,
title = {Probabilistic graphical models: principles and
author = {Koller, Daphne and Friedman, Nir},
year = {2009},
publisher = {MIT press}
@Article{ Kolter.2008,
title = {Linear Algebra Review and Reference},
author = {Kolter, Zico},
journal = {Available online: http},
year = {2008}
@InProceedings{ Koren.2009,
title = {Collaborative filtering with temporal dynamics},
author = {Koren, Yehuda},
booktitle = {Proceedings of the 15th ACM SIGKDD international
conference on Knowledge discovery and data mining},
pages = {447--456},
year = {2009},
organization = {ACM}
@Article{ Koren.Bell.Volinsky.2009,
title = {Matrix factorization techniques for recommender systems},
author = {Koren, Yehuda and Bell, Robert and Volinsky, Chris},
journal = {Computer},
number = {8},
pages = {30--37},
year = {2009},
publisher = {IEEE}
@InProceedings{ Krizhevsky.Sutskever.Hinton.2012,
title = {Imagenet classification with deep convolutional neural
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey
booktitle = {Advances in neural information processing systems},
pages = {1097--1105},
year = {2012}
@Article{ Kung.1988,
title = {VLSI array processors},
author = {Kung, Sun Yuan},
journal = {Englewood Cliffs, NJ, Prentice Hall, 1988, 685 p. Research
supported by the Semiconductor Research Corp., SDIO, NSF,
and US Navy.},
year = {1988}
@Article{ LeCun.Bottou.Bengio.ea.1998,
title = {Gradient-based learning applied to document recognition},
author = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and
Haffner, Patrick and others},
journal = {Proceedings of the IEEE},
volume = {86},
number = {11},
pages = {2278--2324},
year = {1998},
publisher = {Taipei, Taiwan}
@PhDThesis{ Li.2017,
title = {Scaling Distributed Machine Learning with System and
Algorithm Co-design},
author = {Li, Mu},
year = {2017},
school = {PhD Thesis, CMU}
@InProceedings{ Li.Andersen.Park.ea.2014,
title = {Scaling distributed machine learning with the parameter
author = {Li, Mu and Andersen, David G and Park, Jun Woo and Smola,
Alexander J and Ahmed, Amr and Josifovski, Vanja and Long,
James and Shekita, Eugene J and Su, Bor-Yiing},
booktitle = {11th $\{$USENIX$\}$ Symposium on Operating Systems Design
and Implementation ($\{$OSDI$\}$ 14)},
pages = {583--598},
year = {2014}
@Article{ Lin.Chen.Yan.2013,
title = {Network in network},
author = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
journal = {arXiv preprint arXiv:1312.4400},
year = {2013}
@Article{ Lin.Feng.Santos.ea.2017,
title = {A structured self-attentive sentence embedding},
author = {Lin, Zhouhan and Feng, Minwei and Santos, Cicero Nogueira
dos and Yu, Mo and Xiang, Bing and Zhou, Bowen and Bengio,
journal = {arXiv preprint arXiv:1703.03130},
year = {2017}
@InProceedings{ Lin.Goyal.Girshick.ea.2017,
title = {Focal loss for dense object detection},
author = {Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He,
Kaiming and Doll{\'a}r, Piotr},
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {2980--2988},
year = {2017}
@Article{ Lin.Lv.Zhu.ea.2010,
title = {Imagenet classification: fast descriptor coding and
large-scale svm training},
author = {Lin, Yuanqing and Lv, F and Zhu, S and Yang, M and Cour, T
and Yu, K and Cao, L and Li, Z and Tsai, MH and Zhou, X and
journal = {Large scale visual recognition challenge},
year = {2010}
@Article{ Lipton.Steinhardt.2018,
title = {Troubling trends in machine learning scholarship},
author = {Lipton, Zachary C and Steinhardt, Jacob},
journal = {arXiv preprint arXiv:1807.03341},
year = {2018}
@InProceedings{ Liu.Anguelov.Erhan.ea.2016,
title = {Ssd: Single shot multibox detector},
author = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and
Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and
Berg, Alexander C},
booktitle = {European conference on computer vision},
pages = {21--37},
year = {2016},
organization = {Springer}
@Article{ Liu.Ott.Goyal.ea.2019,
title = {Roberta: A robustly optimized bert pretraining approach},
author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei
and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis,
Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
journal = {arXiv preprint arXiv:1907.11692},
year = {2019}
@InProceedings{ Long.Shelhamer.Darrell.2015,
title = {Fully convolutional networks for semantic segmentation},
author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {3431--3440},
year = {2015}
@Article{ Loshchilov.Hutter.2016,
title = {Sgdr: Stochastic gradient descent with warm restarts},
author = {Loshchilov, Ilya and Hutter, Frank},
journal = {arXiv preprint arXiv:1608.03983},
year = {2016}
@Article{ Lowe.2004,
title = {Distinctive image features from scale-invariant
author = {Lowe, David G},
journal = {International journal of computer vision},
volume = {60},
number = {2},
pages = {91--110},
year = {2004},
publisher = {Springer}
@Article{ Luo.Wang.Shao.ea.2018,
title = {Towards understanding regularization in batch
author = {Luo, Ping and Wang, Xinjiang and Shao, Wenqi and Peng,
journal = {arXiv preprint},
year = {2018}
@InProceedings{ Maas.Daly.Pham.ea.2011,
title = {Learning word vectors for sentiment analysis},
author = {Maas, Andrew L and Daly, Raymond E and Pham, Peter T and
Huang, Dan and Ng, Andrew Y and Potts, Christopher},
booktitle = {Proceedings of the 49th annual meeting of the association
for computational linguistics: Human language
technologies-volume 1},
pages = {142--150},
year = {2011},
organization = {Association for Computational Linguistics}
@InProceedings{ McCann.Bradbury.Xiong.ea.2017,
title = {Learned in translation: Contextualized word vectors},
author = {McCann, Bryan and Bradbury, James and Xiong, Caiming and
Socher, Richard},
booktitle = {Advances in Neural Information Processing Systems},
pages = {6294--6305},
year = {2017}
@Article{ McCulloch.Pitts.1943,
title = {A logical calculus of the ideas immanent in nervous
author = {McCulloch, Warren S and Pitts, Walter},
journal = {The bulletin of mathematical biophysics},
volume = {5},
number = {4},
pages = {115--133},
year = {1943},
publisher = {Springer}
@InProceedings{ McMahan.Holt.Sculley.ea.2013,
title = {Ad click prediction: a view from the trenches},
author = {McMahan, H Brendan and Holt, Gary and Sculley, David and
Young, Michael and Ebner, Dietmar and Grady, Julian and
Nie, Lan and Phillips, Todd and Davydov, Eugene and
Golovin, Daniel and others},
booktitle = {Proceedings of the 19th ACM SIGKDD international
conference on Knowledge discovery and data mining},
pages = {1222--1230},
year = {2013},
organization = {ACM}
@Article{ Merity.Xiong.Bradbury.ea.2016,
title = {Pointer sentinel mixture models},
author = {Merity, Stephen and Xiong, Caiming and Bradbury, James and
Socher, Richard},
journal = {arXiv preprint arXiv:1609.07843},
year = {2016}
@Article{ Mikolov.Chen.Corrado.ea.2013,
title = {Efficient estimation of word representations in vector
author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean,
journal = {arXiv preprint arXiv:1301.3781},
year = {2013}
@InProceedings{ Mikolov.Sutskever.Chen.ea.2013,
title = {Distributed representations of words and phrases and their
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and
Corrado, Greg S and Dean, Jeff},
booktitle = {Advances in neural information processing systems},
pages = {3111--3119},
year = {2013}
@InProceedings{ Mirhoseini.Pham.Le.ea.2017,
title = {Device placement optimization with reinforcement
author = {Mirhoseini, Azalia and Pham, Hieu and Le, Quoc V and
Steiner, Benoit and Larsen, Rasmus and Zhou, Yuefeng and
Kumar, Naveen and Norouzi, Mohammad and Bengio, Samy and
Dean, Jeff},
booktitle = {Proceedings of the 34th International Conference on
Machine Learning-Volume 70},
pages = {2430--2439},
year = {2017},
organization = {JMLR. org}
@InProceedings{ Mnih.Heess.Graves.ea.2014,
title = {Recurrent models of visual attention},
author = {Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and
booktitle = {Advances in neural information processing systems},
pages = {2204--2212},
year = {2014}
@Article{ Morey.Hoekstra.Rouder.ea.2016,
title = {The fallacy of placing confidence in confidence
author = {Morey, Richard D and Hoekstra, Rink and Rouder, Jeffrey N
and Lee, Michael D and Wagenmakers, Eric-Jan},
journal = {Psychonomic bulletin \& review},
volume = {23},
number = {1},
pages = {103--123},
year = {2016},
publisher = {Springer}
@Article{ Nadaraya.1964,
title = {On estimating regression},
author = {Nadaraya, Elizbar A},
journal = {Theory of Probability \& Its Applications},
volume = {9},
number = {1},
pages = {141--142},
year = {1964},
publisher = {SIAM}
@Book{ Nesterov.2018,
title = {Lectures on convex optimization},
author = {Nesterov, Yurii},
volume = {137},
year = {2018},
publisher = {Springer}
@Misc{ Nesterov.Vial.2000,
title = {Confidence level solutions for stochastic programming,
Stochastic Programming E-Print Series},
author = {Nesterov, Yu and Vial, J-Ph},
year = {2000}
@Article{ Neyman.1937,
title = {Outline of a theory of statistical estimation based on the
classical theory of probability},
author = {Neyman, Jerzy},
journal = {Philosophical Transactions of the Royal Society of London.
Series A, Mathematical and Physical Sciences},
volume = {236},
number = {767},
pages = {333--380},
year = {1937},
publisher = {The Royal Society London}
@InProceedings{ Papineni.Roukos.Ward.ea.2002,
title = {BLEU: a method for automatic evaluation of machine
author = {Papineni, Kishore and Roukos, Salim and Ward, Todd and
Zhu, Wei-Jing},
booktitle = {Proceedings of the 40th annual meeting of the Association
for Computational Linguistics},
pages = {311--318},
year = {2002}
@Article{ Parikh.Tackstrom.Das.ea.2016,
title = {A decomposable attention model for natural language
author = {Parikh, Ankur P and T{\"a}ckstr{\"o}m, Oscar and Das,
Dipanjan and Uszkoreit, Jakob},
journal = {arXiv preprint arXiv:1606.01933},
year = {2016}
@InProceedings{ Park.Liu.Wang.ea.2019,
title = {Semantic image synthesis with spatially-adaptive
author = {Park, Taesung and Liu, Ming-Yu and Wang, Ting-Chun and
Zhu, Jun-Yan},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and
Pattern Recognition},
pages = {2337--2346},
year = {2019}
@Article{ Paulus.Xiong.Socher.2017,
title = {A deep reinforced model for abstractive summarization},
author = {Paulus, Romain and Xiong, Caiming and Socher, Richard},
journal = {arXiv preprint arXiv:1705.04304},
year = {2017}
@InProceedings{ Pennington.Schoenholz.Ganguli.2017,
title = {Resurrecting the sigmoid in deep learning through
dynamical isometry: theory and practice},
author = {Pennington, Jeffrey and Schoenholz, Samuel and Ganguli,
booktitle = {Advances in neural information processing systems},
pages = {4785--4795},
year = {2017}
@InProceedings{ Pennington.Socher.Manning.2014,
title = {Glove: Global vectors for word representation},
author = {Pennington, Jeffrey and Socher, Richard and Manning,
booktitle = {Proceedings of the 2014 conference on empirical methods in
natural language processing (EMNLP)},
pages = {1532--1543},
year = {2014}
@InProceedings{ Peters.Ammar.Bhagavatula.ea.2017,
title = {Semi-supervised sequence tagging with bidirectional
language models},
author = {Peters, Matthew and Ammar, Waleed and Bhagavatula, Chandra
and Power, Russell},
booktitle = {Proceedings of the 55th Annual Meeting of the Association
for Computational Linguistics (Volume 1: Long Papers)},
pages = {1756--1765},
year = {2017}
@Book{ Peters.Janzing.Scholkopf.2017,
title = {Elements of causal inference: foundations and learning
author = {Peters, Jonas and Janzing, Dominik and Sch{\"o}lkopf,
year = {2017},
publisher = {MIT press}
@InProceedings{ Peters.Neumann.Iyyer.ea.2018,
title = {Deep Contextualized Word Representations},
author = {Peters, Matthew and Neumann, Mark and Iyyer, Mohit and
Gardner, Matt and Clark, Christopher and Lee, Kenton and
Zettlemoyer, Luke},
booktitle = {Proceedings of the 2018 Conference of the North American
Chapter of the Association for Computational Linguistics:
Human Language Technologies, Volume 1 (Long Papers)},
pages = {2227--2237},
year = {2018}
@Article{ Petersen.Pedersen.ea.2008,
title = {The matrix cookbook},
author = {Petersen, Kaare Brandt and Pedersen, Michael Syskind and
journal = {Technical University of Denmark},
volume = {7},
number = {15},
pages = {510},
year = {2008}
@Article{ Polyak.1964,
title = {Some methods of speeding up the convergence of iteration
author = {Polyak, Boris T},
journal = {USSR Computational Mathematics and Mathematical Physics},
volume = {4},
number = {5},
pages = {1--17},
year = {1964},
publisher = {Elsevier}
@Article{ Quadrana.Cremonesi.Jannach.2018,
title = {Sequence-aware recommender systems},
author = {Quadrana, Massimo and Cremonesi, Paolo and Jannach,
journal = {ACM Computing Surveys (CSUR)},
volume = {51},
number = {4},
pages = {66},
year = {2018},
publisher = {ACM}
@Article{ Radford.Metz.Chintala.2015,
title = {Unsupervised representation learning with deep
convolutional generative adversarial networks},
author = {Radford, Alec and Metz, Luke and Chintala, Soumith},
journal = {arXiv preprint arXiv:1511.06434},
year = {2015}
@Article{ Radford.Narasimhan.Salimans.ea.2018,
title = {Improving language understanding by generative
author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim
and Sutskever, Ilya},
journal = {OpenAI},
year = {2018}
@Article{ Radford.Wu.Child.ea.2019,
title = {Language models are unsupervised multitask learners},
author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan,
David and Amodei, Dario and Sutskever, Ilya},
journal = {OpenAI Blog},
volume = {1},
number = {8},
pages = {9},
year = {2019}
@Article{ Rajpurkar.Zhang.Lopyrev.ea.2016,
title = {Squad: 100,000+ questions for machine comprehension of
author = {Rajpurkar, Pranav and Zhang, Jian and Lopyrev, Konstantin
and Liang, Percy},
journal = {arXiv preprint arXiv:1606.05250},
year = {2016}
@Article{ Reddi.Kale.Kumar.2019,
title = {On the convergence of Adam and beyond},
author = {Reddi, Sashank J and Kale, Satyen and Kumar, Sanjiv},
journal = {arXiv preprint arXiv:1904.09237},
year = {2019}
@Article{ Reed.De-Freitas.2015,
title = {Neural programmer-interpreters},
author = {Reed, Scott and De Freitas, Nando},
journal = {arXiv preprint arXiv:1511.06279},
year = {2015}
@InProceedings{ Ren.He.Girshick.ea.2015,
title = {Faster r-cnn: Towards real-time object detection with
region proposal networks},
author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun,
booktitle = {Advances in neural information processing systems},
pages = {91--99},
year = {2015}
@InProceedings{ Rendle.2010,
title = {Factorization machines},
author = {Rendle, Steffen},
booktitle = {2010 IEEE International Conference on Data Mining},
pages = {995--1000},
year = {2010},
organization = {IEEE}
@InProceedings{ Rendle.Freudenthaler.Gantner.ea.2009,
title = {BPR: Bayesian personalized ranking from implicit
author = {Rendle, Steffen and Freudenthaler, Christoph and Gantner,
Zeno and Schmidt-Thieme, Lars},
booktitle = {Proceedings of the twenty-fifth conference on uncertainty
in artificial intelligence},
pages = {452--461},
year = {2009},
organization = {AUAI Press}
@Article{ Rumelhart.Hinton.Williams.ea.1988,
title = {Learning representations by back-propagating errors},
author = {Rumelhart, David E and Hinton, Geoffrey E and Williams,
Ronald J and others},
journal = {Cognitive modeling},
volume = {5},
number = {3},
pages = {1},
year = {1988}
@Book{ Russell.Norvig.2016,
title = {Artificial intelligence: a modern approach},
author = {Russell, Stuart J and Norvig, Peter},
year = {2016},
publisher = {Malaysia; Pearson Education Limited,}
@Article{ Salton.Wong.Yang.1975,
title = {A vector space model for automatic indexing},
author = {Salton, Gerard and Wong, Anita and Yang, Chung-Shu},
journal = {Communications of the ACM},
volume = {18},
number = {11},
pages = {613--620},
year = {1975},
publisher = {ACM}
@InProceedings{ Santurkar.Tsipras.Ilyas.ea.2018,
title = {How does batch normalization help optimization?},
author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew
and Madry, Aleksander},
booktitle = {Advances in Neural Information Processing Systems},
pages = {2483--2493},
year = {2018}
@Article{ Sarwar.Karypis.Konstan.ea.2001,
title = {Item-based collaborative filtering recommendation
author = {Sarwar, Badrul Munir and Karypis, George and Konstan,
Joseph A and Riedl, John and others},
journal = {Www},
volume = {1},
pages = {285--295},
year = {2001}
@InProceedings{ Schein.Popescul.Ungar.ea.2002,
title = {Methods and metrics for cold-start recommendations},
author = {Schein, Andrew I and Popescul, Alexandrin and Ungar, Lyle
H and Pennock, David M},
booktitle = {Proceedings of the 25th annual international ACM SIGIR
conference on Research and development in information
pages = {253--260},
year = {2002},
organization = {ACM}
@Article{ Schuster.Paliwal.1997,
title = {Bidirectional recurrent neural networks},
author = {Schuster, Mike and Paliwal, Kuldip K},
journal = {IEEE Transactions on Signal Processing},
volume = {45},
number = {11},
pages = {2673--2681},
year = {1997},
publisher = {IEEE}
@InProceedings{ Sedhain.Menon.Sanner.ea.2015,
title = {Autorec: Autoencoders meet collaborative filtering},
author = {Sedhain, Suvash and Menon, Aditya Krishna and Sanner,
Scott and Xie, Lexing},
booktitle = {Proceedings of the 24th International Conference on World
Wide Web},
pages = {111--112},
year = {2015},
organization = {ACM}
@Article{ Sennrich.Haddow.Birch.2015,
title = {Neural machine translation of rare words with subword
author = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
journal = {arXiv preprint arXiv:1508.07909},
year = {2015}
@Article{ Sergeev.Del-Balso.2018,
title = {Horovod: fast and easy distributed deep learning in
author = {Sergeev, Alexander and Del Balso, Mike},
journal = {arXiv preprint arXiv:1802.05799},
year = {2018}
@Article{ Shannon.1948,
author = {Shannon, Claude Elwood},
journal = {The Bell System Technical Journal},
month = {7},
number = 3,
pages = {379--423},
publisher = {Nokia Bell Labs},
title = {A Mathematical Theory of Communication},
volume = 27,
year = 1948
@InProceedings{ Shao.Yao.Sun.ea.2020,
title = {ControlVAE: Controllable Variational Autoencoder},
author = {Shao, Huajie and Yao, Shuochao and Sun, Dachun and Zhang,
Aston and Liu, Shengzhong and Liu, Dongxin and Wang, Jun
and Abdelzaher, Tarek},
booktitle = {Proceedings of the 37th International Conference on
Machine Learning},
year = {2020},
organization = {JMLR. org}
@Article{ Silver.Huang.Maddison.ea.2016,
title = {Mastering the game of Go with deep neural networks and
tree search},
author = {Silver, David and Huang, Aja and Maddison, Chris J and
Guez, Arthur and Sifre, Laurent and Van Den Driessche,
George and Schrittwieser, Julian and Antonoglou, Ioannis
and Panneershelvam, Veda and Lanctot, Marc and others},
journal = {nature},
volume = {529},
number = {7587},
pages = {484},
year = {2016},
publisher = {Nature Publishing Group}
@Article{ Simonyan.Zisserman.2014,
title = {Very deep convolutional networks for large-scale image
author = {Simonyan, Karen and Zisserman, Andrew},
journal = {arXiv preprint arXiv:1409.1556},
year = {2014}
@Article{ Smola.Narayanamurthy.2010,
title = {An architecture for parallel topic models},
author = {Smola, Alexander and Narayanamurthy, Shravan},
journal = {Proceedings of the VLDB Endowment},
volume = {3},
number = {1-2},
pages = {703--710},
year = {2010},
publisher = {VLDB Endowment}
@Article{ Srivastava.Hinton.Krizhevsky.ea.2014,
title = {Dropout: a simple way to prevent neural networks from
author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky,
Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
journal = {The Journal of Machine Learning Research},
volume = {15},
number = {1},
pages = {1929--1958},
year = {2014},
publisher = {JMLR. org}
@Book{ Strang.1993,
title = {Introduction to linear algebra},
author = {Strang, Gilbert},
volume = {3},
year = {1993},
publisher = {Wellesley-Cambridge Press Wellesley, MA}
@Article{ Su.Khoshgoftaar.2009,
title = {A survey of collaborative filtering techniques},
author = {Su, Xiaoyuan and Khoshgoftaar, Taghi M},
journal = {Advances in artificial intelligence},
volume = {2009},
year = {2009},
publisher = {Hindawi}
@InProceedings{ Sukhbaatar.Weston.Fergus.ea.2015,
title = {End-to-end memory networks},
author = {Sukhbaatar, Sainbayar and Weston, Jason and Fergus, Rob
and others},
booktitle = {Advances in neural information processing systems},
pages = {2440--2448},
year = {2015}
@InProceedings{ Sutskever.Martens.Dahl.ea.2013,
title = {On the importance of initialization and momentum in deep
author = {Sutskever, Ilya and Martens, James and Dahl, George and
Hinton, Geoffrey},
booktitle = {International conference on machine learning},
pages = {1139--1147},
year = {2013}
@InProceedings{ Sutskever.Vinyals.Le.2014,
title = {Sequence to sequence learning with neural networks},
author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
booktitle = {Advances in neural information processing systems},
pages = {3104--3112},
year = {2014}
@InProceedings{ Szegedy.Ioffe.Vanhoucke.ea.2017,
title = {Inception-v4, inception-resnet and the impact of residual
connections on learning},
author = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke,
Vincent and Alemi, Alexander A},
booktitle = {Thirty-First AAAI Conference on Artificial Intelligence},
year = {2017}
@InProceedings{ Szegedy.Liu.Jia.ea.2015,
title = {Going deeper with convolutions},
author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and
Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and
Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich,
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {1--9},
year = {2015}
@InProceedings{ Szegedy.Vanhoucke.Ioffe.ea.2016,
title = {Rethinking the inception architecture for computer
author = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe,
Sergey and Shlens, Jon and Wojna, Zbigniew},
booktitle = {Proceedings of the IEEE conference on computer vision and
pattern recognition},
pages = {2818--2826},
year = {2016}
@Article{ Tallec.Ollivier.2017,
title = {Unbiasing truncated backpropagation through time},
author = {Tallec, Corentin and Ollivier, Yann},
journal = {arXiv preprint arXiv:1705.08209},
year = {2017}
@InProceedings{ Tang.Wang.2018,
title = {Personalized top-n sequential recommendation via
convolutional sequence embedding},
author = {Tang, Jiaxi and Wang, Ke},
booktitle = {Proceedings of the Eleventh ACM International Conference
on Web Search and Data Mining},
pages = {565--573},
year = {2018},
organization = {ACM}
@Article{ Tay.Dehghani.Bahri.ea.2020,
title = {Efficient transformers: A survey},
author = {Tay, Yi and Dehghani, Mostafa and Bahri, Dara and Metzler,
journal = {arXiv preprint arXiv:2009.06732},
year = {2020}
@Article{ Teye.Azizpour.Smith.2018,
title = {Bayesian uncertainty estimation for batch normalized deep
author = {Teye, Mattias and Azizpour, Hossein and Smith, Kevin},
journal = {arXiv preprint arXiv:1802.06455},
year = {2018}
@Article{ Tieleman.Hinton.2012,
title = {Lecture 6.5-rmsprop: Divide the gradient by a running
average of its recent magnitude},
author = {Tieleman, Tijmen and Hinton, Geoffrey},
journal = {COURSERA: Neural networks for machine learning},
volume = {4},
number = {2},
pages = {26--31},
year = {2012}
@Article{ Toscher.Jahrer.Bell.2009,
title = {The bigchaos solution to the netflix grand prize},
author = {T{\"o}scher, Andreas and Jahrer, Michael and Bell, Robert
journal = {Netflix prize documentation},
pages = {1--52},
year = {2009}
@Article{ Treisman.Gelade.1980,
title = {A feature-integration theory of attention},
author = {Treisman, Anne M and Gelade, Garry},
journal = {Cognitive psychology},
volume = {12},
number = {1},
pages = {97--136},
year = {1980},
publisher = {Elsevier}
@Article{ Turing.1950,
title = {Computing machinery and intelligence},
author = {Turing, Alan},
journal = {Mind},
volume = {59},
number = {236},
pages = {433},
year = {1950}
@Article{ Uijlings.Van-De-Sande.Gevers.ea.2013,
title = {Selective search for object recognition},
author = {Uijlings, Jasper RR and Van De Sande, Koen EA and Gevers,
Theo and Smeulders, Arnold WM},
journal = {International journal of computer vision},
volume = {104},
number = {2},
pages = {154--171},
year = {2013},
publisher = {Springer}
@Book{ Van-Loan.Golub.1983,
title = {Matrix computations},
author = {Van Loan, Charles F and Golub, Gene H},
year = {1983},
publisher = {Johns Hopkins University Press}
@InProceedings{ Vaswani.Shazeer.Parmar.ea.2017,
title = {Attention is all you need},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and
Kaiser, {\L}ukasz and Polosukhin, Illia},
booktitle = {Advances in neural information processing systems},
pages = {5998--6008},
year = {2017}
@InProceedings{ Wang.Davidson.Pan.ea.2016,
title = {Gunrock: A high-performance graph processing library on
the GPU},
author = {Wang, Yangzihao and Davidson, Andrew and Pan, Yuechao and
Wu, Yuduo and Riffel, Andy and Owens, John D},
booktitle = {ACM SIGPLAN Notices},
volume = {51},
number = {8},
pages = {11},
year = {2016},
organization = {ACM}
@Article{ Wang.Li.Liberty.ea.2018,
title = {Optimal Message Scheduling for Aggregation},
author = {Wang, Leyuan and Li, Mu and Liberty, Edo and Smola, Alex
journal = {NETWORKS},
volume = {2},
number = {3},
pages = {2--3},
year = {2018}
@Article{ Warstadt.Singh.Bowman.2019,
title = {Neural network acceptability judgments},
author = {Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R},
journal = {Transactions of the Association for Computational
volume = {7},
pages = {625--641},
year = {2019},
publisher = {MIT Press}
@Book{ Wasserman.2013,
title = {All of statistics: a concise course in statistical
author = {Wasserman, Larry},
year = {2013},
publisher = {Springer Science \& Business Media}
@Article{ Watkins.Dayan.1992,
title = {Q-learning},
author = {Watkins, Christopher JCH and Dayan, Peter},
journal = {Machine learning},
volume = {8},
number = {3-4},
pages = {279--292},
year = {1992},
publisher = {Springer}
@Article{ Watson.1964,
title = {Smooth regression analysis},
author = {Watson, Geoffrey S},
journal = {Sankhy{\=a}: The Indian Journal of Statistics, Series A},
pages = {359--372},
year = {1964},
publisher = {JSTOR}
@InProceedings{ Welling.Teh.2011,
title = {Bayesian learning via stochastic gradient Langevin
author = {Welling, Max and Teh, Yee W},
booktitle = {Proceedings of the 28th international conference on
machine learning (ICML-11)},
pages = {681--688},
year = {2011}
@Article{ Werbos.1990,
title = {Backpropagation through time: what it does and how to do
author = {Werbos, Paul J},
journal = {Proceedings of the IEEE},
volume = {78},
number = {10},
pages = {1550--1560},
year = {1990},
publisher = {IEEE}
@InProceedings{ Wigner.1958,
title = {On the distribution of the roots of certain symmetric
author = {Wigner, Eugene P.},
booktitle = {Ann. Math},
pages = {325--327},
year = {1958}
@TechReport{ Williams.Waterman.Patterson.2009,
title = {Roofline: An insightful visual performance model for
floating-point programs and multicore architectures},
author = {Williams, Samuel and Waterman, Andrew and Patterson,
year = {2009},
institution = {Lawrence Berkeley National Lab.(LBNL), Berkeley, CA
(United States)}
@Article{ Wood.Gasthaus.Archambeau.ea.2011,
title = {The sequence memoizer},
author = {Wood, Frank and Gasthaus, Jan and Archambeau, C{\'e}dric
and James, Lancelot and Teh, Yee Whye},
journal = {Communications of the ACM},
volume = {54},
number = {2},
pages = {91--98},
year = {2011},
publisher = {ACM}
@InProceedings{ Wu.Ahmed.Beutel.ea.2017,
title = {Recurrent recommender networks},
author = {Wu, Chao-Yuan and Ahmed, Amr and Beutel, Alex and Smola,
Alexander J and Jing, How},
booktitle = {Proceedings of the tenth ACM international conference on
web search and data mining},
pages = {495--503},
year = {2017},
organization = {ACM}
@Article{ Wu.Schuster.Chen.ea.2016,
title = {Google's neural machine translation system: Bridging the
gap between human and machine translation},
author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le,
Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and
Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey,
Klaus and others},
journal = {arXiv preprint arXiv:1609.08144},
year = {2016}
@InProceedings{ Xiao.Bahri.Sohl-Dickstein.ea.2018,
title = {Dynamical Isometry and a Mean Field Theory of CNNs: How to
Train 10,000-Layer Vanilla Convolutional Neural Networks},
author = {Xiao, Lechao and Bahri, Yasaman and Sohl-Dickstein, Jascha
and Schoenholz, Samuel and Pennington, Jeffrey},
booktitle = {International Conference on Machine Learning},
pages = {5393--5402},
year = {2018}
@Article{ Xiao.Rasul.Vollgraf.2017,
title = {Fashion-mnist: a novel image dataset for benchmarking
machine learning algorithms},
author = {Xiao, Han and Rasul, Kashif and Vollgraf, Roland},
journal = {arXiv preprint arXiv:1708.07747},
year = {2017}
@InProceedings{ Xiong.Wu.Alleva.ea.2018,
title = {The Microsoft 2017 conversational speech recognition
author = {Xiong, Wayne and Wu, Lingfeng and Alleva, Fil and Droppo,
Jasha and Huang, Xuedong and Stolcke, Andreas},
booktitle = {2018 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)},
pages = {5934--5938},
year = {2018},
organization = {IEEE}
@InProceedings{ Ye.Yin.Lee.ea.2011,
title = {Exploiting geographical influence for collaborative
point-of-interest recommendation},
author = {Ye, Mao and Yin, Peifeng and Lee, Wang-Chien and Lee,
booktitle = {Proceedings of the 34th international ACM SIGIR conference
on Research and development in Information Retrieval},
pages = {325--334},
year = {2011},
organization = {ACM}
@Article{ You.Gitman.Ginsburg.2017,
title = {Large batch training of convolutional networks},
author = {You, Yang and Gitman, Igor and Ginsburg, Boris},
journal = {arXiv preprint arXiv:1708.03888},
year = {2017}
@InProceedings{ Zaheer.Reddi.Sachan.ea.2018,
title = {Adaptive methods for nonconvex optimization},
author = {Zaheer, Manzil and Reddi, Sashank and Sachan, Devendra and
Kale, Satyen and Kumar, Sanjiv},
booktitle = {Advances in Neural Information Processing Systems},
pages = {9793--9803},
year = {2018}
@Article{ Zeiler.2012,
title = {ADADELTA: an adaptive learning rate method},
author = {Zeiler, Matthew D},
journal = {arXiv preprint arXiv:1212.5701},
year = {2012}
@InProceedings{ Zhang.Tay.Zhang.ea.2021,
title = {Parameterization of Hypercomplex Multiplications},
author = {Zhang, Aston and Tay, Yi and Zhang, Shuai and Chan, Alvin
and Luu, Anh Tuan and Hui, Siu Cheung and Fu, Jie},
booktitle = {International Conference on Learning Representations},
year = {2021}
@Article{ Zhang.Yao.Sun.ea.2019,
title = {Deep learning based recommender system: A survey and new
author = {Zhang, Shuai and Yao, Lina and Sun, Aixin and Tay, Yi},
journal = {ACM Computing Surveys (CSUR)},
volume = {52},
number = {1},
pages = {5},
year = {2019},
publisher = {ACM}
@InProceedings{ Zhu.Kiros.Zemel.ea.2015,
title = {Aligning books and movies: Towards story-like visual
explanations by watching movies and reading books},
author = {Zhu, Yukun and Kiros, Ryan and Zemel, Rich and
Salakhutdinov, Ruslan and Urtasun, Raquel and Torralba,
Antonio and Fidler, Sanja},
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {19--27},
year = {2015}
@InProceedings{ Zhu.Park.Isola.ea.2017,
title = {Unpaired image-to-image translation using cycle-consistent
adversarial networks},
author = {Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and
Efros, Alexei A},
booktitle = {Proceedings of the IEEE international conference on
computer vision},
pages = {2223--2232},
year = {2017}
