9 Star 14 Fork 3

Gitee 极速下载/Horovod

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
此仓库是为了提升国内下载速度的镜像仓库,每日同步一次。 原始仓库: https://github.com/uber/horovod
克隆/下载
Dockerfile.test.cpu 14.26 KB
一键复制 编辑 原始数据 按行查看 历史
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION}
# Arguments for the build. UBUNTU_VERSION needs to be repeated because
# the first usage only applies to the FROM tag.
ARG UBUNTU_VERSION=20.04
ARG MPI_KIND=OpenMPI
ARG PYTHON_VERSION=3.6
ARG GPP_VERSION=7
# NOTE: keep versions in sync with setup.py extras_require{'dev'}:
ARG TENSORFLOW_PACKAGE=tensorflow-cpu==1.15.0
ARG KERAS_PACKAGE=keras==2.2.4
ARG PYTORCH_PACKAGE=torch==1.2.0+cpu
ARG PYTORCH_LIGHTNING_PACKAGE=pytorch_lightning==0.7.6
ARG TORCHVISION_PACKAGE=torchvision==0.4.0+cpu
ARG MXNET_PACKAGE=mxnet==1.5.0
ARG PYSPARK_PACKAGE=pyspark==2.4.7
# if SPARK_PACKAGE is set, installs Spark into /spark from the tgz archive
# if SPARK_PACKAGE is a preview version, installs PySpark from the tgz archive
# see https://archive.apache.org/dist/spark/ for available packages, version must match PYSPARK_PACKAGE
ARG SPARK_PACKAGE=spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz
ARG CCL_PACKAGE=master
ARG HOROVOD_BUILD_FLAGS=""
# to avoid interaction with apt-get
ENV DEBIAN_FRONTEND=noninteractive
# Set default shell to /bin/bash
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
# Log given ARGs (and all other environment vars)
RUN env | sort
# Prepare to install specific g++ versions
RUN apt-get update -qq && apt-get install -y --no-install-recommends software-properties-common && \
rm -rf /var/lib/apt/lists/*
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
# Install essential packages.
RUN apt-get update -qq && apt-get install -y --no-install-recommends \
wget \
ca-certificates \
cmake \
openssh-client \
openssh-server \
git \
build-essential \
g++-${GPP_VERSION} \
moreutils && \
rm -rf /var/lib/apt/lists/*
# setup ssh service
RUN ssh-keygen -f /root/.ssh/id_rsa -q -N ''
RUN cp -v /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
# Install Python.
RUN apt-get update -qq && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-distutils && \
rm -rf /var/lib/apt/lists/*
RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python
RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python${PYTHON_VERSION/%.*/}
RUN wget --progress=dot:mega https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py
RUN pip install --no-cache-dir -U --force requests pytest mock pytest-forked parameterized
# Add launch helper scripts
RUN echo "env SPARK_HOME=/spark SPARK_DRIVER_MEM=512m PYSPARK_PYTHON=/usr/bin/python${PYTHON_VERSION} PYSPARK_DRIVER_PYTHON=/usr/bin/python${PYTHON_VERSION} \"\$@\"" > /spark_env.sh
RUN echo /spark_env.sh pytest -v --capture=no --continue-on-collection-errors --junit-xml=/artifacts/junit.\$1.\${HOROVOD_RANK:-\${OMPI_COMM_WORLD_RANK:-\${PMI_RANK}}}.\$2.xml \${@:2} > /pytest.sh
RUN echo /spark_env.sh pytest -v --capture=no --continue-on-collection-errors --junit-xml=/artifacts/junit.\$1.standalone.\$2.xml --forked \${@:2} > /pytest_standalone.sh
RUN chmod a+x /spark_env.sh
RUN chmod a+x /pytest.sh
RUN chmod a+x /pytest_standalone.sh
# Install Spark stand-alone cluster.
RUN if [[ -n ${SPARK_PACKAGE} ]]; then \
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC /tmp; \
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v /tmp/\${archive/%.tgz/} /spark"; \
fi
# Install PySpark.
RUN apt-get update -qq && apt install -y openjdk-8-jdk-headless
RUN if [[ ${SPARK_PACKAGE} != *"-preview"* ]]; then \
pip install --no-cache-dir ${PYSPARK_PACKAGE}; \
else \
apt-get update -qq && apt-get install pandoc; \
pip install --no-cache-dir pypandoc; \
(cd /spark/python && python setup.py sdist && pip install --no-cache-dir dist/pyspark-*.tar.gz && rm dist/pyspark-*); \
fi
# Pin cloudpickle to 1.3.0
# Dill breaks clouldpickle > 1.3.0 when using Spark2
# https://github.com/cloudpipe/cloudpickle/issues/393
RUN if [[ ${PYSPARK_PACKAGE} == "pyspark==2."* ]]; then \
pip install --no-cache-dir cloudpickle==1.3.0; \
fi
# Install Ray.
RUN pip install --no-cache-dir ray
# Install MPI.
RUN if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
wget --progress=dot:mega -O /tmp/openmpi-4.1.4-bin.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz && \
cd /tmp && tar -zxf /tmp/openmpi-4.1.4-bin.tar.gz && \
mkdir openmpi-4.1.4/build && cd openmpi-4.1.4/build && ../configure --prefix=/usr/local && \
make -j all && make install && ldconfig && \
echo "mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot -mca mpi_abort_print_stack 1 -tag-output" > /mpirun_command; \
elif [[ ${MPI_KIND} == "ONECCL" ]]; then \
wget --progress=dot:mega -O /tmp/oneccl.tar.gz https://github.com/oneapi-src/oneCCL/archive/${CCL_PACKAGE}.tar.gz && \
cd /tmp && tar -zxf oneccl.tar.gz && \
mkdir oneCCL-${CCL_PACKAGE}/build && cd oneCCL-${CCL_PACKAGE}/build && cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/oneccl -DCMAKE_BUILD_TYPE=Release && make -j install && \
rm /tmp/oneccl.tar.gz && rm -Rf /tmp/oneCCL-${CCL_PACKAGE} && \
sed -i 's/if \[ -z \"\${I_MPI_ROOT}\" \]/if [ -z \"${I_MPI_ROOT:-}\" ]/g' /usr/local/oneccl/env/setvars.sh && \
sed -i 's/ \$1/ \${1:-}/g' /usr/local/oneccl/env/setvars.sh && \
echo ". /usr/local/oneccl/env/setvars.sh" > /oneccl_env && \
chmod +x /oneccl_env && \
echo "export CCL_ATL_TRANSPORT=ofi; \
export HOROVOD_CCL_CACHE=1; \
echo \"\$(env)\"; \
echo \"mpirun is \$(which mpirun)\"; \
echo \"LD_LIBRARY_PATH is \$(echo \$LD_LIBRARY_PATH)\"; \
echo \"oneCCL links with \$(ldd /usr/local/oneccl/lib/libccl.so)\"; \
mpirun -np 2 -hosts localhost \$@" > /mpirun_command_ofi && \
chmod +x /mpirun_command_ofi && \
cp /mpirun_command_ofi /mpirun_command_mpi && \
sed -i 's/export CCL_ATL_TRANSPORT=ofi;/export CCL_ATL_TRANSPORT=mpi;/g' /mpirun_command_mpi && \
sed -i 's/export HOROVOD_CCL_CACHE=1;/export HOROVOD_CCL_CACHE=0;/g' /mpirun_command_mpi && \
echo "/mpirun_command_mpi" > /mpirun_command && \
echo "-L/usr/local/oneccl/lib -lmpi -I/usr/local/oneccl/include" > /mpicc_oneccl && \
chmod +x /mpicc_oneccl; \
elif [[ ${MPI_KIND} == "MPICH" ]]; then \
apt-get update -qq && apt-get install -y mpich && \
echo "mpirun -np 2 -l" > /mpirun_command; \
fi
# Install mpi4py.
# This requires SETUPTOOLS_USE_DISTUTILS=stdlib as with setuptools>=60.1.0 installing mpi4py broke
# https://github.com/mpi4py/mpi4py/issues/157#issuecomment-1001022274
RUN if [[ ${MPI_KIND} != "None" ]]; then \
if [[ ${MPI_KIND} == "ONECCL" ]]; then \
export I_MPI_ROOT=/usr/local/oneccl; \
export MPICC=/usr/local/oneccl/bin/mpicc; \
fi; \
SETUPTOOLS_USE_DISTUTILS=stdlib pip install --no-cache-dir mpi4py; \
fi
# Install TensorFlow and Keras (releases).
# Pin scipy!=1.4.0: https://github.com/scipy/scipy/issues/11237
# Pin protobuf~=3.20 for tensorflow<2.6.5: https://github.com/tensorflow/tensorflow/issues/56077
RUN if [[ ${TENSORFLOW_PACKAGE} != "tf-nightly" ]]; then \
PROTOBUF_PACKAGE=""; \
if [[ ${TENSORFLOW_PACKAGE} == tensorflow*==1.15.* ]] || \
[[ ${TENSORFLOW_PACKAGE} == tensorflow-cpu==2.[012345].* ]]; then \
PROTOBUF_PACKAGE="protobuf~=3.20"; \
fi; \
pip install --no-cache-dir ${TENSORFLOW_PACKAGE} ${PROTOBUF_PACKAGE}; \
if [[ ${KERAS_PACKAGE} != "None" ]]; then \
pip uninstall -y keras; \
pip install --no-cache-dir ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
fi; \
mkdir -p ~/.keras; \
python -c "import tensorflow as tf; tf.keras.datasets.mnist.load_data()"; \
fi
# Pin h5py < 3 for tensorflow: https://github.com/tensorflow/tensorflow/issues/44467
RUN pip install 'h5py<3.0' 'numpy<1.24.0' --force-reinstall
# Install PyTorch (releases).
# Pin Pillow<7.0 for torchvision < 0.5.0: https://github.com/pytorch/vision/issues/1718
# Pin Pillow!=8.3.0 for torchvision: https://github.com/pytorch/vision/issues/4146
RUN if [[ ${PYTORCH_PACKAGE} != "torch-nightly" ]]; then \
pip install --no-cache-dir ${PYTORCH_PACKAGE} ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/torch_stable.html; \
if [[ "${TORCHVISION_PACKAGE/%+*/}" == torchvision==0.[1234].* ]]; then \
pip install --no-cache-dir "Pillow<7.0" --no-deps; \
else \
pip install --no-cache-dir "Pillow!=8.3.0" --no-deps; \
fi; \
pip install ${PYTORCH_LIGHTNING_PACKAGE}; \
fi
# Install MXNet (releases).
RUN if [[ ${MXNET_PACKAGE} != "mxnet-nightly" ]]; then \
pip install --no-cache-dir ${MXNET_PACKAGE} ; \
fi
# Prefetch Spark MNIST dataset.
RUN mkdir -p /work /data && wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/mnist.bz2 -O /data/mnist.bz2
# Prefetch Spark Rossmann dataset.
RUN mkdir -p /work /data && wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/rossmann.tgz -O - | tar -xzC /data
# Prefetch PyTorch datasets.
RUN wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/pytorch_datasets.tgz -O - | tar -xzC /data
### END OF CACHE ###
COPY . /horovod
# Install nightly packages here so they do not get cached
# Install TensorFlow and Keras (nightly).
# Pin scipy!=1.4.0: https://github.com/scipy/scipy/issues/11237
RUN if [[ ${TENSORFLOW_PACKAGE} == "tf-nightly" ]]; then \
pip install --no-cache-dir ${TENSORFLOW_PACKAGE}; \
if [[ ${KERAS_PACKAGE} != "None" ]]; then \
pip uninstall -y keras-nightly; \
pip install --no-cache-dir ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
fi; \
mkdir -p ~/.keras; \
python -c "import tensorflow as tf; tf.keras.datasets.mnist.load_data()"; \
fi
# Install PyTorch (nightly).
# Pin Pillow!=8.3.0 for torchvision: https://github.com/pytorch/vision/issues/4146
RUN if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then \
pip install --no-cache-dir --pre torch ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; \
pip install --no-cache-dir "Pillow!=8.3.0" --no-deps; \
pip install ${PYTORCH_LIGHTNING_PACKAGE}; \
fi
# Install MXNet (nightly).
RUN if [[ ${MXNET_PACKAGE} == "mxnet-nightly" ]]; then \
pip install --no-cache-dir --pre mxnet -f https://dist.mxnet.io/python/all; \
fi
# Install Horovod.
RUN if [[ ${MPI_KIND} == "ONECCL" ]]; then \
if [ -z "${LD_LIBRARY_PATH:-}" ]; then \
export LD_LIBRARY_PATH=""; \
fi; \
if [ -z "${PYTHONPATH:-}" ]; then \
export PYTHONPATH=""; \
fi; \
. /usr/local/oneccl/env/setvars.sh; \
export I_MPI_ROOT=/usr/local/oneccl; \
echo "horovod python setup.py sdist, mpicxx is $(which mpicxx)"; \
fi; \
cd /horovod && \
python setup.py sdist && \
bash -c "${HOROVOD_BUILD_FLAGS} HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_MXNET=1 pip install --no-cache-dir -v $(ls /horovod/dist/horovod-*.tar.gz)[spark,ray]"
# Show the effective python package version to easily spot version differences
RUN pip freeze | sort
# Assert installed package versions
RUN /horovod/assert-package-versions.sh
# Hack for compatibility of MNIST example with TensorFlow 1.1.0.
RUN if [[ ${TENSORFLOW_PACKAGE} == "tensorflow==1.1.0" ]]; then \
sed -i "s/from tensorflow import keras/from tensorflow.contrib import keras/" /horovod/examples/tensorflow/tensorflow_mnist.py; \
fi
# Hack TensorFlow MNIST example to be smaller.
RUN sed -i "s/last_step=20000/last_step=100/" /horovod/examples/tensorflow/tensorflow_mnist.py
# Hack TensorFlow Eager MNIST example to be smaller.
RUN sed -i "s/dataset.take(20000/dataset.take(100/" /horovod/examples/tensorflow/tensorflow_mnist_eager.py
# Hack TensorFlow 2.0 example to be smaller.
RUN sed -i "s/dataset.take(10000/dataset.take(100/" /horovod/examples/tensorflow2/tensorflow2_mnist.py
# Hack TensorFlow 2.0 Data Service example to be smaller.
RUN sed -i "s/ epochs=24/ epochs=2/" /horovod/examples/tensorflow2/tensorflow2_mnist_data_service_train_fn_*_side_dispatcher.py
# Hack Keras MNIST advanced example to be smaller.
RUN sed -i "s/'--epochs', type=int, default=24,/'--epochs', type=int, default=2,/" /horovod/examples/keras/keras_mnist_advanced.py
RUN sed -i "s/model.add(Conv2D(32, kernel_size=(3, 3),/model.add(Conv2D(1, kernel_size=(3, 3),/" /horovod/examples/keras/keras_mnist_advanced.py
RUN sed -i "s/model.add(Conv2D(64, (3, 3), activation='relu'))//" /horovod/examples/keras/keras_mnist_advanced.py
# Hack TensorFlow 2.0 Keras MNIST advanced example to be smaller.
RUN sed -i "s/epochs=24/epochs=2/" /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py
RUN sed -i "s/tf.keras.layers.Conv2D(32, \\[3, 3\\],/tf.keras.layers.Conv2D(1, [3, 3],/" /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py
RUN sed -i -E "s/\s+tf.keras.layers.Conv2D\(64, \\[3, 3\\], activation='relu'\),//" /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py
# Hack PyTorch MNIST example to be smaller.
RUN sed -i "s/'--epochs', type=int, default=10,/'--epochs', type=int, default=2,/" /horovod/examples/pytorch/pytorch_mnist.py
RUN sed -i "s/self.fc1 = nn.Linear(320, 50)/self.fc1 = nn.Linear(784, 50)/" /horovod/examples/pytorch/pytorch_mnist.py
RUN sed -i "s/x = F.relu(F.max_pool2d(self.conv1(x), 2))//" /horovod/examples/pytorch/pytorch_mnist.py
RUN sed -i "s/x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))//" /horovod/examples/pytorch/pytorch_mnist.py
RUN sed -i "s/x = x.view(-1, 320)/x = x.view(-1, 784)/" /horovod/examples/pytorch/pytorch_mnist.py
# Hack Keras Spark Rossmann Run example to be smaller.
RUN sed -i "s/x = Dense(1000,/x = Dense(100,/g" /horovod/examples/spark/keras/keras_spark_rossmann_run.py
RUN sed -i "s/x = Dense(500,/x = Dense(50,/g" /horovod/examples/spark/keras/keras_spark_rossmann_run.py
# Hack Keras Spark Rossmann Estimator example to be smaller.
RUN sed -i "s/x = Dense(1000,/x = Dense(100,/g" /horovod/examples/spark/keras/keras_spark_rossmann_estimator.py
RUN sed -i "s/x = Dense(500,/x = Dense(50,/g" /horovod/examples/spark/keras/keras_spark_rossmann_estimator.py
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C/C++
1
https://gitee.com/mirrors/Horovod.git
git@gitee.com:mirrors/Horovod.git
mirrors
Horovod
Horovod
master

搜索帮助