多机使用Singularity并行 openmpi
2020-12-03 本文已影响0人
全村滴希望
参考链接:Singularity and MPI applications
多个节点分别安装openmpi
yum install -y wget make gcc gcc-c++ perl bind-utils openssh*
cd /usr/local/src
wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz
tar -zxvf openmpi-3.1.0.tar.gz
cd openmpi-3.1.0/
./configure --prefix="/usr/local/openmpi"
make && make install
vi /etc/profile
# OPENMPI
export PATH=$PATH:/usr/local/openmpi/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/openmpi/lib
source /etc/profile
多个节点分别安装singularity
# 安装依赖
yum groupinstall -y 'Development Tools'
yum install -y openssl-devel libuuid-devel libseccomp-devel squashfs-tools
# 安装GO
cd /usr/local/src
wget https://dl.google.com/go/go1.13.linux-amd64.tar.gz
tar -zxvf go1.13.linux-amd64.tar.gz -C /usr/local
vi /etc/profile
# GO
export PATH=$PATH:/usr/local/go/bin
source /etc/profile
# 安装singularity
cd /usr/local/src
wget https://github.com/hpcng/singularity/releases/download/v3.7.0/singularity-3.7.0.tar.gz
tar -zxvf singularity-3.7.0.tar.gz
cd ./singularity
./mconfig
cd ./builddir
make && make install
准备MPI样例
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char **argv) {
int rc;
int size;
int myrank;
rc = MPI_Init (&argc, &argv);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Init() failed");
return EXIT_FAILURE;
}
rc = MPI_Comm_size (MPI_COMM_WORLD, &size);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Comm_size() failed");
goto exit_with_error;
}
rc = MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
if (rc != MPI_SUCCESS) {
fprintf (stderr, "MPI_Comm_rank() failed");
goto exit_with_error;
}
fprintf (stdout, "Hello, I am rank %d/%d", myrank, size);
MPI_Finalize();
return EXIT_SUCCESS;
exit_with_error:
MPI_Finalize();
return EXIT_FAILURE;
}
制作openmpi的sif定义文件 (命名为mpitest)
Bootstrap: docker
From: ubuntu:latest
%files
mpitest.c /opt
%environment
export OMPI_DIR=/opt/ompi
export SINGULARITY_OMPI_DIR=$OMPI_DIR
export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin
export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib
%post
echo "Installing required packages..."
apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file
echo "Installing Open MPI"
export OMPI_DIR=/opt/ompi
export OMPI_VERSION=4.0.1
export OMPI_URL="https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-$OMPI_VERSION.tar.bz2"
mkdir -p /tmp/ompi
mkdir -p /opt
# Download
cd /tmp/ompi && wget -O openmpi-$OMPI_VERSION.tar.bz2 $OMPI_URL && tar -xjf openmpi-$OMPI_VERSION.tar.bz2
# Compile and install
cd /tmp/ompi/openmpi-$OMPI_VERSION && ./configure --prefix=$OMPI_DIR && make install
# Set env variables so we can compile our application
export PATH=$OMPI_DIR/bin:$PATH
export LD_LIBRARY_PATH=$OMPI_DIR/lib:$LD_LIBRARY_PATH
export MANPATH=$OMPI_DIR/share/man:$MANPATH
echo "Compiling the MPI application..."
cd /opt && mpicc -o mpitest mpitest.c
singularity build mpitest.sif mpitest
mpirun --allow-run-as-root --oversubscribe --machinefile machines --prefix /usr/local/openmpi -np 4 singularity exec mpitest.sif /opt/mpitest
Hello, I am rank 1/4Hello, I am rank 0/4Hello, I am rank 2/4Hello, I am rank 3/4
在kubernetes中替换singularity为CRI
yum install -y socat
cd /usr/local/src
git clone https://github.com/sylabs/singularity-cri.git
cd singularity-cri
git checkout tags/v1.0.0-beta.5 -b v1.0.0-beta.5
make && make install
vi /usr/lib/systemd/system/sycri.service
[Unit]
Description=Singularity-CRI
After=network.target
StartLimitIntervalSec=0
[Service]
Type=simple
Restart=always
RestartSec=1
ExecStart=/usr/local/bin/sycri
Environment="PATH=/usr/local/libexec/singularity/bin:/bin:/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin"
[Install]
WantedBy=multi-user.target
systemctl enable sycri
systemctl start sycri
vi /usr/local/kubernetes/conf/kubelet.conf
KUBELET_OPTS="--logtostderr=false \
--v=2 \
--log-dir=/usr/local/kubernetes/logs \
--hostname-override=node1 \
--network-plugin=cni \
--kubeconfig=/usr/local/kubernetes/conf/kubelet.kubeconfig \
--bootstrap-kubeconfig=/usr/local/kubernetes/conf/bootstrap.kubeconfig \
--config=/usr/local/kubernetes/conf/kubelet-config.yml \
--cert-dir=/usr/local/kubernetes/ssl \
--container-runtime=remote \
--container-runtime-endpoint=unix:///var/run/singularity.sock \
--image-service-endpoint=unix:///var/run/singularity.sock \
--pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google-containers/pause-amd64:3.0"