parent
d6d71364de
commit
879927f16e
1 changed files with 60 additions and 0 deletions
@ -0,0 +1,60 @@ |
|||||||
|
#!/usr/bin/env python |
||||||
|
# |
||||||
|
# 20160920 |
||||||
|
# Wirawan Purwanto |
||||||
|
# |
||||||
|
|
||||||
|
def est_hpl_timing(N, nprocs, proc_gflops, eff=0.8): |
||||||
|
"""Estimates the time it takes to do HPL calculation on |
||||||
|
an (N x N) problem, given `nprocs` processor cores which has |
||||||
|
`proc_gflops` GFLOPS. |
||||||
|
""" |
||||||
|
# Number of floating point operations |
||||||
|
# From HPL code, estimated to be |
||||||
|
# 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. |
||||||
|
assert N > 0 |
||||||
|
assert nprocs >= 1 |
||||||
|
assert proc_gflops > 0 |
||||||
|
assert 0.0 < eff <= 1.0 |
||||||
|
N = float(N) |
||||||
|
num_gflop = (2 * N**3 / 3 - 0.5 * N**2 + 2 * N**2) * 1e-9 |
||||||
|
tot_proc_gflops = nprocs * proc_gflops |
||||||
|
est = num_gflop / tot_proc_gflops / eff |
||||||
|
proc_mem_gb = (N**2 * 1e-9 * 8) / nprocs |
||||||
|
#if verbose >= 1: |
||||||
|
|
||||||
|
return (est, num_gflop, tot_proc_gflops, proc_mem_gb) |
||||||
|
|
||||||
|
|
||||||
|
def est_hpl_timing2(proc_mem_gb, nprocs, proc_gflops, eff=0.8): |
||||||
|
"""Estimates the time it takes to do HPL calculation on |
||||||
|
a problem specified by `proc_mem_gb` RAM per core (in GB), |
||||||
|
`nprocs` processor cores, each having `proc_gflops` GFLOPS. |
||||||
|
|
||||||
|
We assume the matrix is evenly distributed across processors in |
||||||
|
a square fashion (i.e. P == Q for tile definition). |
||||||
|
""" |
||||||
|
from math import sqrt |
||||||
|
N1 = sqrt(proc_mem_gb * 1e9 / 8) |
||||||
|
N = float(int(N1 * sqrt(nprocs))) |
||||||
|
(est, num_gflop, tot_proc_flops, proc_mem_gb0) = \ |
||||||
|
est_hpl_timing(N, nprocs, proc_gflops, eff) |
||||||
|
return est, num_gflop, tot_proc_flops, N |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def Test_64core_memscale(proc_mem_gb=[0.15, 0.25, 0.50, 0.780125, 1.25, 2.00, 3.00], |
||||||
|
proc_gflops=17.6, eff=0.8): |
||||||
|
"""[20160920] |
||||||
|
Test: keep at 64 cores, scale up memory and see how much time it takes.""" |
||||||
|
nproc = 64 |
||||||
|
from wpylib.text_tools import str_fmt_heading |
||||||
|
cols = ("N", "mem/proc", "nproc", "time", "numops_gf", "proc_gf") |
||||||
|
fmt = "%8d %8.3f %5d %6.0f %10.2f %10.2f" |
||||||
|
hfmt = str_fmt_heading(fmt) |
||||||
|
print(hfmt % cols) |
||||||
|
for pm1 in proc_mem_gb: |
||||||
|
(est_t, num_gflop, tot_proc_gflops, N) = \ |
||||||
|
est_hpl_timing2(pm1, nproc, proc_gflops, eff) |
||||||
|
print(fmt % (N, pm1, nproc, est_t, num_gflop, tot_proc_gflops)) |
||||||
|
|
Loading…
Reference in new issue