当前位置: 首页> 科技> 能源 > 业务系统管理软件_软件定制开发软件_东营网站建设_网络营销是什么?

业务系统管理软件_软件定制开发软件_东营网站建设_网络营销是什么?

时间:2025/7/10 17:36:30来源:https://blog.csdn.net/weixin_43360707/article/details/143592350 浏览次数:0次
业务系统管理软件_软件定制开发软件_东营网站建设_网络营销是什么?

背景:
有多台GPU服务器,我的一些kernel需要在上面编译以及跑,但是每次ssh去登录,启动docker,都非常麻烦,有没有办法可以一键启动,在本地远程操作呢?

下面就是这次的实现:

远程执行

在src文件夹下提供了remote_code_exe.sh脚本,可以在本地机器上远程操控其它服务器,可以实现单个case编译,执行,所有case编译执行
脚本中提供了DEBUG环境变量来选择是否输出脚本log, 可以用来调试脚本

root# ./remote_code_exe.sh -h
Usage:  <DEBUG=1/0> ./remote_code_exe.sh gpu/gcu [options]Options:-b test case        build test case(all/case_name, case_name is not contain the prefix 'test_' and suffix '_cuda' or '_tops')-c clean build      clean build(all/case_name, case_name is not contain the prefix 'test_' and suffix '_cuda' or '_tops')-d device id        device id(default: 0)-r test case        run case(all/case_name, case_name is not contain the prefix 'test_' and suffix '_cuda' or '_tops')-m run mode         run mode(default: 0, when run mode is 1, CHIP_BENCH_MORE_DEBUG_PARAMS=1)-h                  help messageexp:1. DEBUG=1 ./remote_code_exe.sh [gpu/gcu default gpu] -b record_env2. DEBUG=1 ./remote_code_exe.sh [gpu/gcu default gpu] -d 0 -r test_xxx_case_cuda/all3. DEBUG=1 ./remote_code_exe.sh [gpu/gcu default gpu] -c 1/0

编译一个文件

./remote_code_exe.sh  gpu  -b  [case_name]

执行一个文件,结果打印到标准输出

DEBUG=0 ./remote_code_exe.sh gpu  -m 1 -d 0 -r  [case_name]

编译所有文件

./remote_code_exe.sh  gpu  -b  all

执行report.sh, 结果保存到用该机器name_report.log的文件中

DEBUG=0 ./remote_code_exe.sh gpu  -m 1 -d 0 -r  all

代码如下:

#!/bin/bashhistchars=# Set DEBUG to 1 to enable debug mode, 0 to disable
# Get DEBUG from environment, default to 0 if not set
DEBUG=${DEBUG:-0}# log for debug
DEBUG() {if [ "$DEBUG" -eq 1 ]; then# echo "DEBUG: $@"echo "$@" | while IFS= read -r line; doecho "DEBUG: $line"donefi
}DEVICE_ID="0"
TYPE=""
BUILD_CASE=""
RUN_CASE=""
RUN_MODE="0"
LIST_INFO="0"
EXE_CMD=""
CFG_FILE="remote_cfg_template.txt"usage="Usage:  <DEBUG=1/0> $0 gpu/gcu [options]Options:-b compile case     build test case(all/case_name, case_name is not contain the prefix 'test_' and suffix '_cuda' or '_tops')-c clean build      clean build dir-d device id        device id(default: 0)-e execute rc       execute remote command-f config file      configuration file(default: remote_cfg_template.txt)-r test case        run case(all/case_name, case_name is not contain the prefix 'test_' and suffix '_cuda' or '_tops')-m run mode         run mode(default: 0, when run mode is 1, CHIP_BENCH_MORE_DEBUG_PARAMS=1)-l list GPU         list GPU information-h                  help messageexp:1. <DEBUG=1/0> $0 [gpu/gcu default gpu] -b record_env2. <DEBUG=1/0> $0 [gpu/gcu default gpu] -d 0 -r test_xxx_case_cuda/all3. <DEBUG=1/0> $0 [gpu/gcu default gpu] -c 1/04. <DEBUG=1/0> $0 [gpu/gcu default gpu] -l5. <DEBUG=1/0> $0 [gpu/gcu default gpu] -e \"nvidia-smi -h\"
"if [ $# -lt 1 ]; thenecho "$usage"exit 1
fiif [ $1 ]; thenif [ $1 == "gpu" ]; thenTYPE="gpu"elif [ $1 == "gcu" ]; thenTYPE="gcu"elseTYPE="gpu"fi
fi
# Use getopt to parse options
OPTIONS=$(getopt -o b:c:d:e:f:r:m:lh --long build:,clean:,device:,exe:,file:,run:,mode:,list,help -- "$@")
if [ $? -ne 0 ]; thenecho "$usage"exit 1
fieval set -- "$OPTIONS"while true; docase "$1" in-b | --build)BUILD_CASE="$2"shift 2;;-c | --clean)CLEAN_BUILD="$2"shift 2;;-d | --device)DEVICE_ID="$2"shift 2;;-e | --exe)EXE_CMD="$2"break;;-f | --file)CFG_FILE="$2"shift 2;;-r | --run)RUN_CASE="$2"shift 2;;-m | --mode)RUN_MODE="$2"shift 2;;-l | --list)LIST_INFO="1"break;;-h | --help)echo "$usage"exit 0;;--)shiftbreak;;*)echo "$usage"exit 1;;esac
doneDEBUG "DEBUG: $DEBUG"
DEBUG "TYPE: $TYPE"
DEBUG "DEVICE_ID: $DEVICE_ID"
DEBUG "BUILD_CASE: $BUILD_CASE"
DEBUG "RUN_CASE: $RUN_CASE"
DEBUG "RUN_MODE: $RUN_MODE"# Global variables
EXCLUDE_FILE=".rsync_exclude_file" 
SUDO="sudo"
IP="10.9.113.22"
PASSWORD="123456"
USER="root"
CHIPBENCH_DOCKER_NAME=""
CHIPBENCH_DOCKER_SUFFIX=""
CURRENT_PATH=$(dirname "$(realpath "$0")")
REMOTE_PATH=""
PRIVATE_SUFFIX=""# get the LOCAL_PATH
LOCAL_PATH=$(find "$CURRENT_PATH" -type d -name "src" -exec dirname {} \; | head -n 1)do_cmd() {if [ -n "$1" ]; thenDEBUG "$1"eval "$1"elseecho "cmd str is null."fi
}do_remote_cmd() {if [ -n "$1" ]; thenif [ $USER == "root" ]; thenCMD="SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} \"$1\""elseCMD="echo '${PASSWORD}' | SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} -S \"$1\""fiDEBUG "$CMD"eval "$CMD"elseecho "cmd str is null."fi
}do_remote_cmd_with_current_user() {CMD="SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} \"$1\""DEBUG "$CMD"eval "$CMD"
}do_remote_cmd_silent() {if [ -n "$1" ]; thenif [ $USER == "root" ]; thenCMD="SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} \"$1\""elseCMD="echo '${PASSWORD}' | SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} -S \"$1\""fiDEBUG "$CMD"eval "$CMD" > /dev/null 2>&1 #silent menas no outputelseecho "cmd str is null."fi
}do_remote_cmd_with_return() {if [ -n "$1" ]; thenif [ $USER == "root" ]; thenCMD="SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} \"$1\""elseCMD="echo '${PASSWORD}' | SSHPASS='${PASSWORD}' sshpass -e ssh -o StrictHostKeyChecking=no ${USER}@${IP} ${SUDO} -S \"$1\""fiOUTPUT=$(eval "$CMD")echo "$OUTPUT"elseecho "cmd str is null."fi
}# sync local/remote file to remote/local server
# eg. do_sync_cmd dir1 dir2 , means sync dir1 to dir2 #3060 adduser suiyuan root. mkdir /home/chipbench/workspace
do_sync_cmd() {if [ -n "$1" ]; thenCMD="SSHPASS='${PASSWORD}' rsync --rsync-path=\"rsync --no-p --no-g --chmod=ugo=rwX\"  --exclude-from=\"$CURRENT_PATH/${EXCLUDE_FILE}\"  -a --rsh=\"sshpass -e ssh -l ${USER}\" ${1} ${2}"do_cmd "$CMD"elseecho "cmd str is null."fi
}command_is_exist() {if ! command -v ${1} &> /dev/nullthenecho "${1} could not be found. Please install ${1}."echo "For Ubuntu, you can install it using: sudo apt install ${1}"exit 1elsemsg=$(sshpass -V | head -n 1)DEBUG "$msg"fi
}parse_cfg_file() {if [ ! -f "$1" ]; thenecho "Configuration file $1 not found!"exit 1fi# Read non-empty lines from the configuration filereadarray -t config_lines < <(grep -v '^#' "$1" | grep -v '^\s*$')# Assign values to IP, PASSWORD, and USERPRIVATE_SUFFIX=${config_lines[0]}IP=${config_lines[1]}PASSWORD=${config_lines[2]}USER=${config_lines[3]}# Check if IP, PASSWORD, or USER is validif [ -z "$IP" ] || [ -z "$PASSWORD" ] || [ -z "$USER" ]; thenecho "IP, PASSWORD, or USER is empty. Please check the configuration file."exit 1fi# Check if the user is rootif [ $USER == "root" ]; thenSUDO=""fi  
}check_device_id() {if [ $1 -ge $2 ]; thenecho "Invalid device id: $1"echo "Please provide a valid device id from 0 to $(($2-1))"exit 1fiDEBUG "Device id $1 is valid. there are $2 devices."
}get_suffix_from_type() {if [ "$TYPE" == "gpu" ]; thenecho "cuda"elseecho "tops"fi
}file_is_exist() {if [ ! -f "$1" ]; thenecho "File $1 not found!"exit 1fi
}dir_is_exist() {if [ ! -d "$1" ]; thenecho "Directory $1 not found!"exit 1fi
}docker_is_active() {# Check if 1 is setif [ -z "$1" ]; thenecho "CHIPBENCH_DOCKER_NAME is not set."exit 1fi# Check if the Docker container is running CMD="docker ps --filter \"name=$1\" --filter \"status=running\" | awk '{print \$NF}' | grep \"$1\" | wc -l"DEBUG "CMD: $CMD"S=$(do_remote_cmd_with_return $CMD)DEBUG "s: $S"NUM_ACTIVE=$(echo "$S" | wc -l)DEBUG "NUM_ACTIVE: $NUM_ACTIVE"if [ -z $S ]; thenecho "active"elseecho "inactive"fi
}# Parse IP, PASSWORD, USER from cfg file
file_is_exist "$CURRENT_PATH/$CFG_FILE"
parse_cfg_file "$CURRENT_PATH/$CFG_FILE"
# 一定要保证路径在user目录下
REMOTE_PATH="/home/${USER}/chipbench"
CHIPBENCH_DOCKER_SUFFIX=${PRIVATE_SUFFIX}
DEBUG "private name: $PRIVATE_SUFFIX"
DEBUG "remote IP  : $IP"
DEBUG "remote PW  : $PASSWORD"
DEBUG "remote USER: $USER"# List the GPU information
if [ $LIST_INFO -eq 1 ]; thenif [ "$TYPE" == "gpu" ]; thendo_remote_cmd "nvidia-smi"elsedo_remote_cmd "efsmi"fiexit 0
fi# Execute the remote command
if [ -n "$EXE_CMD" ]; thendo_remote_cmd "$EXE_CMD"exit 0
fi# Clean the build dir, for make clean
if [ "$CLEAN_BUILD" == "all" ]; thenecho "delete ${LOCAL_PATH}/src/build/*"do_cmd "rm -rf ${LOCAL_PATH}/src/build"exit 0
elif [ -n "$CLEAN_BUILD" ]; thenecho "delete ${LOCAL_PATH}/src/build/*${CLEAN_BUILD}*"CMD="rm -rf ${LOCAL_PATH}/src/build/*${CLEAN_BUILD}*"do_cmd "$CMD"exit 0
fi# Check if BUILD_CASE or RUN_CASE is provided
if [ -z "$BUILD_CASE" ] && [ -z "$RUN_CASE" ]; thenecho "Please provide the test case to build or run."echo "$usage"exit 1
fi# Check if RUN_CASE is valid, for make test xxx
if [ -n "$RUN_CASE" ] && [ $RUN_CASE != "all" ]; thenRUN_TYPE=$(get_suffix_from_type)if [ $RUN_CASE == "record_env" ]; thenRUN_CASE_NAME=record_env_${RUN_TYPE}elseRUN_CASE_NAME=test_${RUN_CASE}_${RUN_TYPE}fifile_is_exist ${LOCAL_PATH}/src/build/${RUN_CASE_NAME}
fi# 1. Check whether sshpass and rsync are installed
command_is_exist sshpass
command_is_exist rsync# 2. Check if REMOTE_PATH exists on the remote server, create it if it does not
# now the path is /home/chipbench/workspace
DEBUG "LOCAL_PATH: $LOCAL_PATH"
DEBUG "REMOTE_PATH: $REMOTE_PATH"# do_remote_cmd "adduser ${USER} root"
# Check if the user is not in the root group and add them to the root group if they are not
if ! id -nG "$USER" | grep -qw "root"; thenDEBUG "User $USER is not in the root group. Adding to root group..."do_remote_cmd "adduser ${USER} root"
elseDEBUG "User $USER is already in the root group."
fi
do_remote_cmd "rm -rf /home/chipbench"
do_remote_cmd_with_current_user "mkdir -p  ${REMOTE_PATH}/"# 3. Sync the current directory to the remote server
DEBUG "sync current directory to remote server"
do_sync_cmd "${LOCAL_PATH}/" "${IP}:${REMOTE_PATH}/"# 4. Build the docker container
CHIPBENCH_DOCKER_NAME=$(do_remote_cmd_with_return "${REMOTE_PATH}/docker/build_or_run.sh name $TYPE $CHIPBENCH_DOCKER_SUFFIX")
DEBUG "Build docker container[$CHIPBENCH_DOCKER_NAME]..."
if [ "$DEBUG" -eq 1 ]; thendo_remote_cmd "${REMOTE_PATH}/docker/build_or_run.sh build $TYPE $CHIPBENCH_DOCKER_SUFFIX"
elsedo_remote_cmd_silent "${REMOTE_PATH}/docker/build_or_run.sh build $TYPE $CHIPBENCH_DOCKER_SUFFIX"
fi# if container is already running, skip the restart
DOCKER_ACTIVE=$(do_remote_cmd_with_return "${REMOTE_PATH}/docker/build_or_run.sh status $TYPE $CHIPBENCH_DOCKER_SUFFIX")
DEBUG "docker container [$CHIPBENCH_DOCKER_NAME] status: $DOCKER_ACTIVE"
if [ "$DOCKER_ACTIVE" == "active" ]; thenDEBUG "Docker container[$CHIPBENCH_DOCKER_NAME] is already running."
elseDEBUG "Start the docker container[$CHIPBENCH_DOCKER_NAME]..."do_remote_cmd_silent "${REMOTE_PATH}/docker/build_or_run.sh restart $TYPE $CHIPBENCH_DOCKER_SUFFIX"
fi# 5. Build the test case or run the test case
if [ -n "$BUILD_CASE" ]; then# Build the test caseif [ "$DEBUG" -eq 1 ]; thendo_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"cd ${REMOTE_PATH}/src && ls\\\""fiDEBUG "Build the test case: $BUILD_CASE"COMPILE_TYPE=$(get_suffix_from_type)if [ $BUILD_CASE == "all" ]; thendo_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"cd ${REMOTE_PATH}/src && make $COMPILE_TYPE\\\""elsedo_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"cd ${REMOTE_PATH}/src && make $COMPILE_TYPE $BUILD_CASE\\\""fi
elif [ -n "$RUN_CASE" ]; then# Check if the device id is validDEBUG "Check if the device id is valid"DEVICE_INFO=$(do_remote_cmd_with_return "nvidia-smi -L")DEVICE_NUM=$(echo "$DEVICE_INFO" | grep -v '^\s*$' | wc -l)DEBUG "$DEVICE_INFO"DEBUG "GPU count is $DEVICE_NUM"check_device_id $DEVICE_ID $DEVICE_NUM# Peek the remote directory, for debugif [ "$DEBUG" -eq 1 ]; thendo_remote_cmd "ls -la ${REMOTE_PATH}"fi# Get the GPU InformationDEVICE_INFO=$(do_remote_cmd_with_return "nvidia-smi")DEBUG "$DEVICE_INFO"# Check the GPU processDEBUG "get the GPU process"PROCESS=$(do_remote_cmd_with_return "nvidia-smi --query-compute-apps pid --format=noheader,csv -i ${DEVICE_ID}")PROCESS_NUM=$(echo "$PROCESS" | grep -v '^\s*$' | wc -l)if [ $PROCESS_NUM -gt 0 ]; thenecho "There are $PROCESS_NUM processes running on GPU $DEVICE_ID"echo "$PROCESS"echo "Please stop the processes and try again."exit 1fiDEBUG "No process is running on GPU $DEVICE_ID"# Run the test casesDEVICE_NAME=$(do_remote_cmd_with_return "nvidia-smi --query-gpu name --format=noheader,csv -i ${DEVICE_ID}")echo "Current device id is $DEVICE_ID, name is:$DEVICE_NAME"LOG_NAME=${DEVICE_NAME// /_}if [ "$RUN_CASE" == "all" ]; thendo_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"CHIP_BENCH_MORE_DEBUG_PARAMS=${RUN_MODE} CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${DEVICE_ID} ${REMOTE_PATH}/src/report.sh ${LOG_NAME}_report.log\\\""echo "Log saved in ${LOCAL_PATH}/src/${LOG_NAME}_report.log"elif [ "$RUN_CASE" == "record_env" ]; thenRUN_TYPE=$(get_suffix_from_type)do_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"CHIP_BENCH_MORE_DEBUG_PARAMS=${RUN_MODE} CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${DEVICE_ID} ${REMOTE_PATH}/src/build/record_env_${RUN_TYPE}\\\""elsedo_remote_cmd "docker exec $CHIPBENCH_DOCKER_NAME /bin/bash -c \\\"cd ${REMOTE_PATH}/src && CHIP_BENCH_MORE_DEBUG_PARAMS=${RUN_MODE} CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${DEVICE_ID} make test $TYPE ${RUN_CASE}\\\""fi
elseecho "Please provide the test case to build or run."echo "$usage"exit 1
fi# 同步remote dir 到local dir
DEBUG "sync report file to local"
do_sync_cmd "${IP}:${REMOTE_PATH}/" "${LOCAL_PATH}/"# Stop the docker container
# echo "Stop the docker container, [$CHIPBENCH_DOCKER_NAME]..."
# do_remote_cmd_silent "${REMOTE_PATH}/docker/build_or_run.sh stop $TYPE $CHIPBENCH_DOCKER_SUFFIX"echo "All done."exit 0

配置文件:
remote_cfg_template.txt

# 本文件是 remote_code_exe.sh 的配置文件
# 用户可以自己配置机器的 IP地址,用户名、密码等信息
# 注意: 一次只能配置一台机器的信息# 这是所有私有文件的后缀
caizc# 10.9.113.149
# 123456
# root

非同步文件
.rsync_exclude_file

.vscode
.git
.
..

docker文件

#!/bin/bash
set -eu -o pipefail
set +u
# set -x will print all the variables , so it is not recommended to use it in production
# set -xCURRENT_PATH=$(dirname "$(realpath "$0")")arch=$2
docker_version=""
docker_image=""
docker_name=""if [ "$arch" == "gcu" ]; thendrun_cmd=""docker_version=latestdocker_image=artifact.mywork.cn/ccc_docker_images/ubuntu/qic_ubuntu_2004_gcc9
elif [ "$arch" == "gpu" ]; thendrun_cmd="--gpus all \-e NVIDIA_DRIVER_CAPABILITIES=compute,utility,video "docker_version=12.6.1-devel-ubuntu22.04docker_image=nvcr.io/nvidia/cuda
elseecho "Usage: build_or_run.sh build|run|restart|status|stop|name gcu|gpu [name]"arch="gcu"
fiif [ -n "$3" ]; thendocker_name=chipbenchmark."$arch"."$USER"."$3"
elsedocker_name=chipbenchmark."$arch"."$USER"
fi# Function to check if a Docker container exists
container_exists() {docker ps -a --format '{{.Names}}' | grep -Eq "^${docker_name}\$"
}if [ "$1" == "build" ]; thenif container_exists; thenecho "Container $docker_name already exists."exit 0fidocker pull $docker_image:$docker_versiondocker run -itd \--privileged \--rm \--network host \--name $docker_name \-h indocker \-v ~/.ssh:/root/.ssh/ \-v /etc/localtime:/etc/localtime:ro \-v /etc/passwd:/etc/passwd:ro \-v /etc/group:/etc/group:ro \-v $HOME:$HOME \-v ~/.cache:/root/.cache \-v /home:/home \-v /sys:/sys \-v /var/lib/gitlab-runner:/var/lib/gitlab-runner \--ipc=host \--security-opt seccomp=unconfined \$drun_cmd \$docker_image:$docker_version
elif [ "$1" == "run" ]; thendocker exec -it -w $(pwd) $docker_name /bin/bash
elif [ "$1" == "restart" ]; thendocker restart $docker_name
elif [ "$1" == "stop" ]; thendocker stop $docker_name
elif [ "$1" == "name" ]; thenecho "$docker_name"
elif [ "$1" == "status" ]; thenS=$(docker ps --filter "name=$docker_name" --filter "status=running" | awk '{print $NF}'| grep -w "$docker_name$")if [ "$S" == "$docker_name" ]; thenecho "active"elseecho "inactive"fi
elseecho "Usage: build_or_run.sh build|run|restart|status|stop|name gcu|gpu [name]"
fi
关键字:业务系统管理软件_软件定制开发软件_东营网站建设_网络营销是什么?

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com

责任编辑: