实验要求
lab1的两个测试bash的解释
- test-mr.sh
#!/bin/sh#
# basic map-reduce test
#RACE=# uncomment this to run the tests with the Go race detector.
#RACE=-race# -------------------------------------------------------
# 初始化测试环境
# -------------------------------------------------------# 创建新的测试目录并清理旧数据
rm -rf mr-tmp
mkdir mr-tmp || exit 1
cd mr-tmp || exit 1
rm -f mr-*# -------------------------------------------------------
# 编译 MapReduce 所需的插件和主程序
# -------------------------------------------------------# 在 `mrapps` 目录中编译多个插件应用程序
(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1# 编译主 MapReduce 程序,包括 master 和 worker
(cd .. && go build $RACE mrmaster.go) || exit 1
(cd .. && go build $RACE mrworker.go) || exit 1
(cd .. && go build $RACE mrsequential.go) || exit 1failed_any=0# -------------------------------------------------------
# 单词计数测试模块(wc test)
# -------------------------------------------------------# 生成正确的单词计数输出,作为对比基准
../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-wc.txt
rm -f mr-out*echo '***' Starting wc test.# 启动 master 和多个 worker 执行单词计数任务
timeout -k 2s 180s ../mrmaster ../pg*txt &# 等待 master 准备好套接字
sleep 1# 启动多个 worker 并行执行
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &# 等待一个进程退出(所有 worker 都会在任务完成后退出)
wait# 对比实际输出和预期输出
sort mr-out* | grep . > mr-wc-all
if cmp mr-wc-all mr-correct-wc.txt
thenecho '---' wc test: PASS
elseecho '---' wc output is not the same as mr-correct-wc.txtecho '---' wc test: FAILfailed_any=1
fi# 等待剩余的进程退出
wait ; wait ; wait# -------------------------------------------------------
# 索引创建测试模块(indexer test)
# -------------------------------------------------------# 清除之前的输出文件,生成索引任务的正确输出
rm -f mr-*
../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-indexer.txt
rm -f mr-out*echo '***' Starting indexer test.# 启动 master 和 worker 执行索引任务
timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1# 启动多个 worker 并行执行
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so# 对比输出
sort mr-out* | grep . > mr-indexer-all
if cmp mr-indexer-all mr-correct-indexer.txt
thenecho '---' indexer test: PASS
elseecho '---' indexer output is not the same as mr-correct-indexer.txtecho '---' indexer test: FAILfailed_any=1
fiwait ; wait# -------------------------------------------------------
# Map 并行性测试模块(map parallelism test)
# -------------------------------------------------------# 清理输出文件,启动 master 和 worker 以测试 Map 并行性
rm -f mr-out* mr-worker*timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1# 启动两个 worker 执行 `mtiming.so` 插件,验证 Map 并行
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so# 检查并行 Map 工作器的数量
NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
if [ "$NT" != "2" ]
thenecho '---' saw "$NT" workers rather than 2echo '---' map parallelism test: FAILfailed_any=1
fi# 检查是否达到期望的并行度
if cat mr-out* | grep '^parallel.* 2' > /dev/null
thenecho '---' map parallelism test: PASS
elseecho '---' map workers did not run in parallelecho '---' map parallelism test: FAILfailed_any=1
fiwait ; wait# -------------------------------------------------------
# Reduce 并行性测试模块(reduce parallelism test)
# -------------------------------------------------------# 清理输出文件,启动 master 和 worker 以测试 Reduce 并行性
rm -f mr-out* mr-worker*timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1# 启动两个 worker 执行 `rtiming.so` 插件,验证 Reduce 并行
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so# 检查并行 Reduce 工作器的数量
NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
if [ "$NT" -lt "2" ]
thenecho '---' too few parallel reduces.echo '---' reduce parallelism test: FAILfailed_any=1
elseecho '---' reduce parallelism test: PASS
fiwait ; wait# -------------------------------------------------------
# 崩溃恢复测试模块(crash test)
# -------------------------------------------------------# 生成正确的输出,作为崩溃测试的对比基准
../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-crash.txt
rm -f mr-out*echo '***' Starting crash test.# 准备测试环境并启动 master 和多个 worker
rm -f mr-done
(timeout -k 2s 180s ../mrmaster ../pg*txt ; touch mr-done ) &
sleep 1# 启动 worker,加载 `crash.so` 插件,模拟崩溃恢复
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &# 模拟 `rpc.go` 中的 `masterSock()` 生成套接字
SOCKNAME=/var/tmp/824-mr-`id -u`# 循环重新启动 worker 模拟崩溃恢复
( while [ -e $SOCKNAME -a ! -f mr-done ]dotimeout -k 2s 180s ../mrworker ../../mrapps/crash.sosleep 1done ) &# 继续循环启动其他 worker
( while [ -e $SOCKNAME -a ! -f mr-done ]dotimeout -k 2s 180s ../mrworker ../../mrapps/crash.sosleep 1done ) &# 等待所有 worker 退出
while [ -e $SOCKNAME -a ! -f mr-done ]
dotimeout -k 2s 180s ../mrworker ../../mrapps/crash.sosleep 1
done# 比较崩溃测试输出
wait
wait
waitrm $SOCKNAME
sort mr-out* | grep . > mr-crash-all
if cmp mr-crash-all mr-correct-crash.txt
thenecho '---' crash test: PASS
elseecho '---' crash output is not the same as mr-correct-crash.txtecho '---' crash test: FAILfailed_any=1
fi# -------------------------------------------------------
# 最终总结
# -------------------------------------------------------if [ $failed_any -eq 0 ]; thenecho '***' PASSED ALL TESTS
elseecho '***' FAILED SOME TESTSexit 1
fi
- test-mr-many.sh
以下是这个脚本的逐行备注:
#!/usr/bin/env bash
- 指定使用
bash
解释器执行脚本。
if [ $# -ne 1 ]; thenecho "Usage: $0 numTrials"exit 1
fi
- 检查脚本的参数数量。
if [ $# -ne 1 ]; then
:判断是否传入了一个参数,$#
表示参数个数。- 如果参数数量不等于 1,输出用法提示并退出,
$0
表示脚本名。
trap 'kill -INT -$pid; exit 1' INT
- 设置
trap
捕获INT
信号(通常为 Ctrl+C)。 - 捕获到信号时,发送
INT
信号给test-mr.sh
进程并退出脚本。
# Note: because the socketID is based on the current userID,
# ./test-mr.sh cannot be run in parallel
- 注释解释:因为
socketID
基于当前userID
,所以不能并行运行./test-mr.sh
,否则会冲突。
runs=$1
chmod +x test-mr.sh
runs=$1
:将命令行参数赋值给变量runs
,表示测试的执行次数。chmod +x test-mr.sh
:赋予test-mr.sh
可执行权限。
for i in $(seq 1 $runs); dotimeout -k 2s 900s ./test-mr.sh &pid=$!if ! wait $pid; thenecho '***' FAILED TESTS IN TRIAL $iexit 1fi
done
- 循环执行指定次数的测试。
for i in $(seq 1 $runs); do
:循环从 1 到$runs
,i
表示当前循环的次数。timeout -k 2s 900s ./test-mr.sh &
:启动test-mr.sh
并在后台执行,限制其最长运行时间为 900 秒,超时后发送SIGKILL
信号。pid=$!
:将最后一个后台进程的 PID 赋值给pid
。if ! wait $pid; then
:等待test-mr.sh
执行完毕,如果返回值非零表示测试失败。echo '***' FAILED TESTS IN TRIAL $i
:如果测试失败,输出失败信息,并退出脚本。
echo '***' PASSED ALL $i TESTING TRIALS
- 如果循环结束且所有测试均通过,输出成功信息。