#!/bin/bash
#===============================================================================
# Copyright 2001-2023 Intel Corporation.
#
# This software and the related documents are Intel copyrighted  materials,  and
# your use of  them is  governed by the  express license  under which  they were
# provided to you (License).  Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute,  disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents  are provided as  is,  with no express
# or implied  warranties,  other  than those  that are  expressly stated  in the
# License.
#===============================================================================

echo "This is a SAMPLE run script.  Change it to reflect the correct number"
echo "of CPUs/threads, number of nodes, MPI processes per node, etc.."

# Set total number of MPI processes for the HPL (should be equal to PxQ).
export MPI_PROC_NUM=2

# Set the MPI per node for each node.
# MPI_PER_NODE should be equal to 1 or number of sockets on the system.
# It will be same as -perhost or -ppn paramaters in mpirun/mpiexec.
export MPI_PER_NODE=2

# Set the number of NUMA nodes per MPI. (MPI_PER_NODE * NUMA_PER_MPI)
# should be equal to number of NUMA nodes on the system.
export NUMA_PER_MPI=1

#====================================================================
# Following option is for Intel(R) Optimized HPL-AI Benchmark
#====================================================================

# Comment in to enable Intel(R) Optimized HPL-AI Benchmark
# export USE_HPL_AI=1

#====================================================================
# Following option is for Intel(R) Optimized HPL-AI Benchmark for GPU
#====================================================================

# By default, Intel(R) Optimized HPL-AI Benchmark for GPU will use
# Bfloat16 matrix. If you prefer less iterations, you could choose
# float based matrix. But it will reduce maximum problem size. 
# export USE_BF16MAT=0

#====================================================================
# Following options are for Intel(R) Distribution for LINPACK
# Benchmark for GPU and Intel(R) Optimized HPL-AI Benchmark for GPU
#====================================================================

# Comment in to enable GPUs
# export USE_HPL_GPU=1

# Select backend driver for GPU (OpenCL ... 0, Level Zero ... 1)
# export HPL_DRIVER=0

# Number of stacks on each GPU
# export HPL_NUMSTACK=2

# Total number of GPUs on each node
# export HPL_NUMDEV=2

#====================================================================

export OUT=xhpl_intel64_dynamic_outputs.txt

if [ -z ${USE_HPL_AI} ]; then
if [ -z ${USE_HPL_GPU} ]; then
export HPL_EXE=xhpl_intel64_dynamic
else
export HPL_EXE=xhpl_intel64_dynamic_gpu
fi
else
if [ -z ${USE_HPL_GPU} ]; then
export HPL_EXE=xhpl-ai_intel64_dynamic
else
export HPL_EXE=xhpl-ai_intel64_dynamic_gpu
fi
fi

# Unset this variable to avoid initialization failure
unset I_MPI_OFFLOAD

echo -n "This run was done on: "
date

# Capture some meaningful data for future reference:
echo -n "This run was done on: " >> $OUT
date >> $OUT
echo "HPL.dat: " >> $OUT
cat HPL.dat >> $OUT
echo "Binary name: " >> $OUT
ls -l ${HPL_EXE} >> $OUT
echo "This script: " >> $OUT
cat runme_intel64_dynamic >> $OUT
echo "Environment variables: " >> $OUT
env >> $OUT
echo "Actual run: " >> $OUT

# Environment variables can also be also be set on the Intel(R) MPI Library command
# line using the -genv option (to appear before the -np 1):

mpirun -perhost ${MPI_PER_NODE} -np ${MPI_PROC_NUM} ./runme_intel64_prv "$@" | tee -a $OUT

echo -n "Done: " >> $OUT
date >> $OUT

echo -n "Done: "
date
