```
scalar omp = $sysinfo.omp
if rseed > 0
set seed rseed
endif
string blas = $sysinfo.blas
if blas == "unknown"
blas = "sysblas"
endif
string cnames
if omp
sprintf cnames "m n k vanilla openmp %s", blas
else
sprintf cnames "m n k vanilla %s", blas
endif
matrix scorecard = zeros(0, omp ? 3 : 2)
# dgemm-1: first set of experiments
scalar m = 128 # rows of A
scalar n = 128 # cols of B
scalar k = 128 # cols of A = rows of B
scalar h = 1 # initial number of iterations
scalar iters = 5 # number of doublings in multime()
loop i=1..3 --quiet
matrix mm = dgemm_test1(m, n, k, h, iters, i, omp)
colnames(mm, cnames)
print_dgemm_matrix(&mm, 1, i)
analyse_dgemm_matrix(mm, omp, &scorecard)
flush
endloop
# dgemm-2: second set of experiments
loop i=1..3 --quiet
matrix mm = dgemm_test2(i, omp)
colnames(mm, cnames)
print_dgemm_matrix(&mm, 2, i)
analyse_dgemm_matrix(mm, omp, &scorecard)
flush
endloop
print_system_info(blas)
printf "\nPerformance summary:\n"
colnames(scorecard, cnames + 6)
analyse_score(scorecard)
```

```
matrix M = zeros(iters, 1)
scalar mnk
scalar gflops, et
matrix c
# printf " mnk secs Gflops\n"
loop i=1..iters --quiet
matrix a = mnormal(m,k)
matrix b = mnormal(k,n)
mnk = m*n*k
scalar h1 = h
if h*mnk < 1.0e8
h1 *= int(1.0e8 / (h*mnk))
endif
set stopwatch
if h1 == 1
set stopwatch
matrix c = a*b
et = $stopwatch
gflops = 2*mnk/(1.0e9*et)
else
set stopwatch
loop h1 --quiet
matrix c = a*b
endloop
scalar et = $stopwatch
gflops = 2*h1*mnk/(1.0e9*et)
endif
# printf "%10d %6.3fs %10.5f\n", mnk, et, gflops
if !isnull(szmat)
szmat[i,] = {m, n, k}
endif
M[i] = (et == 0) ? (1/0) : gflops
if h > 1
m *= 2
h /= 2
elif variant == 1
k *= 2
elif variant == 2
m *= 2
n *= 2
elif variant == 3
m *= 2
n *= 2
k *= 2
endif
endloop
return M
```

```
matrix M = {}
matrix szmat = zeros(iters, 3)
# printf "\nVanilla native code:\n\n"
set blas_mnk_min -1
set mp_mnk_min -1
M ~= multime(m, n, k, h, iters, variant, &szmat)
if omp
# printf "\nNative code with OpenMP:\n\n"
set mp_mnk_min 0
M ~= multime(m, n, k, h, iters, variant)
endif
# printf "\nsystem BLAS:\n\n"
set blas_mnk_min 0
M ~= multime(m, n, k, h, iters, variant)
return szmat ~ M
```

```
scalar m n k h iters
matrix M = {}
if setup == 1
m = 8 # rows of A
n = 8 # cols of B
k = 8 # cols of A = rows of B
h = 100000 # initial number of iterations
iters = 10 # number of doublings in multime()
elif setup == 2
m = 10 # rows of A
n = 2 # cols of B
k = 1000 # cols of A = rows of B
h = 50000 # initial number of iterations
iters = 10 # number of doublings in multime()
else
m = 10 # rows of A
n = 10 # cols of B
k = 1000 # cols of A = rows of B
h = 2000 # initial number of iterations
iters = 6 # number of doublings in multime()
endif
matrix szmat = zeros(iters, 3)
# printf "\nVanilla native code:\n\n"
set blas_mnk_min -1
set mp_mnk_min -1
M = multime(m, n, k, h, iters, 0, &szmat)
if omp
# printf "\nNative code with OpenMP:\n\n"
set mp_mnk_min 0 # 100000
M ~= multime(m, n, k, h, iters, 0)
endif
# printf "\nsystem BLAS:\n\n"
set blas_mnk_min 0
M ~= multime(m, n, k, h, iters, 0)
return szmat ~ M
```

```
printf "dgemm experiment %d, variant %d, speed in Gflops\n", j, k
printf "\n%#10.5v\n", m
```

```
scalar ret = 0
if minc(m) == maxc(m)
ret = minc(m)
endif
return ret
```

```
if k == 1
return "vanilla"
elif !omp
return $sysinfo.blas
elif k == 2
return "openmp"
else
return $sysinfo.blas
endif
```

```
if s == "netlib"
return "Netlib"
elif s == "atlas"
return "ATLAS"
elif s == "openblas"
return "OpenBLAS"
elif s == "mkl"
return "Intel MKL"
elif s == "veclib"
return "Apple VecLib"
else
return s
endif
```

```
string s = $sysinfo.os
scalar wlen = $sysinfo.wordlen
string ws = wlen == 32 ? "32-bit" : "64-bit"
string os
if s == "windows"
sprintf os "Windows (%s)", ws
elif s == "osx"
sprintf os "Mac OS X (%s)", ws
elif s == "linux"
sprintf os "Linux (%s)", ws
else
sprintf os "other (%s)", ws
endif
return os
```

```
printf "Operating system: %s\n", os_string()
printf "BLAS library: %s\n", blas_long_string(blas)
printf "Number of processors: %d\n", $sysinfo.nproc
printf "OpenMP enabled: %s\n", $sysinfo.omp ? "yes" : "no"
```

```
matrix myscore = zeros(3, cols(score))
matrix best = imaxr(m[,4:])
scalar d = dominant(best)
printf "result: "
if d > 0
printf "%s dominates\n", method_string(d, omp)
myscore[1,d] = 1
else
loop i=2..rows(m) --quiet
scalar mnk = m[i,1] * m[i,2] * m[i,3]
d = dominant(best[i:])
if d > 0
printf "%s dominates for mnk >= %d\n", method_string(d, omp), mnk
myscore[2,d] = mnk
dsml = dominant(best[1:i-1])
if dsml > 0
printf " %s dominates for mnk < %d\n", method_string(dsml, omp), mnk
myscore[3,dsml] = mnk
endif
break
endif
endloop
endif
score |= myscore
if d == 0
printf "no method dominates\n"
endif
printf "\n"
```

```
scalar n = nelem(v)
if n == 1
printf "%d\n", v[1]
else
printf "("
loop i=1..n --quiet
printf "%d", v[i]
if i < n
printf ", "
endif
endloop
printf ")\n"
endif
```

```
scalar ntests = rows(score) / 3
scalar hi lo
loop j=1..cols(score) --quiet
printf "\n%s -\n", colname(score, j)
scalar d0 = 0
scalar d1 = 0
scalar d2 = 0
matrix above = {}
matrix below = {}
scalar i = 1
loop k=1..ntests --quiet
d0 += score[i,j]
hi = score[i+1,j]
if hi > 0
above ~= {hi}
d1++
endif
lo = score[i+2,j]
if lo > 0
below ~= {lo}
d2++
endif
i += 3
endloop
printf " dominates outright in %d out of %d tests\n", d0, ntests
if d1 > 0
printf " dominates in %d test(s) for mnk >= ", d1
printvec(&above)
endif
if d2 > 0
printf " dominates in %d test(s) for mnk < ", d2
printvec(&below)
endif
endloop
printf "\n"
```