numactl --interleave=all ./testing_cgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.5.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F
=======================================================================
  100   100     ---   (  ---  )      4.32 (   0.00)     ---
 1000  1000     ---   (  ---  )    166.99 (   0.03)     ---
   10    10     ---   (  ---  )      0.24 (   0.00)     ---
   20    20     ---   (  ---  )      1.15 (   0.00)     ---
   30    30     ---   (  ---  )      2.70 (   0.00)     ---
   40    40     ---   (  ---  )      4.49 (   0.00)     ---
   50    50     ---   (  ---  )      5.93 (   0.00)     ---
   60    60     ---   (  ---  )      7.25 (   0.00)     ---
   70    70     ---   (  ---  )      1.77 (   0.00)     ---
   80    80     ---   (  ---  )      2.55 (   0.00)     ---
   90    90     ---   (  ---  )      3.33 (   0.00)     ---
  100   100     ---   (  ---  )      4.34 (   0.00)     ---
  200   200     ---   (  ---  )     11.87 (   0.00)     ---
  300   300     ---   (  ---  )     25.62 (   0.01)     ---
  400   400     ---   (  ---  )     39.15 (   0.01)     ---
  500   500     ---   (  ---  )     57.54 (   0.01)     ---
  600   600     ---   (  ---  )     74.58 (   0.02)     ---
  700   700     ---   (  ---  )     92.93 (   0.02)     ---
  800   800     ---   (  ---  )    119.90 (   0.02)     ---
  900   900     ---   (  ---  )    139.50 (   0.03)     ---
 1000  1000     ---   (  ---  )    160.19 (   0.03)     ---
 2000  2000     ---   (  ---  )    464.97 (   0.09)     ---
 3000  3000     ---   (  ---  )    805.28 (   0.18)     ---
 4000  4000     ---   (  ---  )   1171.38 (   0.29)     ---
 5000  5000     ---   (  ---  )   1312.35 (   0.51)     ---
 6000  6000     ---   (  ---  )   1615.34 (   0.71)     ---
 7000  7000     ---   (  ---  )   1814.43 (   1.01)     ---
 8000  8000     ---   (  ---  )   1963.68 (   1.39)     ---
 9000  9000     ---   (  ---  )   2086.49 (   1.86)     ---
10000 10000     ---   (  ---  )   2160.64 (   2.47)     ---
12000 12000     ---   (  ---  )   2245.60 (   4.10)     ---
14000 14000     ---   (  ---  )   2348.00 (   6.23)     ---
16000 16000     ---   (  ---  )   2360.41 (   9.26)     ---
18000 18000     ---   (  ---  )   2375.64 (  13.09)     ---
20000 20000     ---   (  ---  )   2423.19 (  17.61)     ---
numactl --interleave=all ./testing_cgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.5.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||Ax-b||_F/(N*||A||_F*||x||_F)
====================================================================================
  100   100     ---   (  ---  )      2.04 (   0.00)     ---
 1000  1000     ---   (  ---  )    158.45 (   0.03)     ---
   10    10     ---   (  ---  )      0.01 (   0.00)     ---
   20    20     ---   (  ---  )      0.05 (   0.00)     ---
   30    30     ---   (  ---  )      0.17 (   0.00)     ---
   40    40     ---   (  ---  )      0.33 (   0.00)     ---
   50    50     ---   (  ---  )      0.62 (   0.00)     ---
   60    60     ---   (  ---  )      1.03 (   0.00)     ---
   70    70     ---   (  ---  )      0.99 (   0.00)     ---
   80    80     ---   (  ---  )      1.48 (   0.00)     ---
   90    90     ---   (  ---  )      2.18 (   0.00)     ---
  100   100     ---   (  ---  )      5.44 (   0.00)     ---
  200   200     ---   (  ---  )      8.58 (   0.01)     ---
  300   300     ---   (  ---  )     21.61 (   0.01)     ---
  400   400     ---   (  ---  )     34.58 (   0.01)     ---
  500   500     ---   (  ---  )     53.53 (   0.01)     ---
  600   600     ---   (  ---  )     72.98 (   0.02)     ---
  700   700     ---   (  ---  )     95.55 (   0.02)     ---
  800   800     ---   (  ---  )    116.02 (   0.02)     ---
  900   900     ---   (  ---  )    142.03 (   0.03)     ---
 1000  1000     ---   (  ---  )    168.84 (   0.03)     ---
 2000  2000     ---   (  ---  )    468.10 (   0.09)     ---
 3000  3000     ---   (  ---  )    816.00 (   0.18)     ---
 4000  4000     ---   (  ---  )   1188.32 (   0.29)     ---
 5000  5000     ---   (  ---  )   1246.02 (   0.54)     ---
 6000  6000     ---   (  ---  )   1466.15 (   0.79)     ---
 7000  7000     ---   (  ---  )   1648.23 (   1.11)     ---
 8000  8000     ---   (  ---  )   1914.82 (   1.43)     ---
 9000  9000     ---   (  ---  )   1910.97 (   2.03)     ---
10000 10000     ---   (  ---  )   1955.22 (   2.73)     ---
12000 12000     ---   (  ---  )   2176.58 (   4.23)     ---
14000 14000     ---   (  ---  )   2303.54 (   6.35)     ---
16000 16000     ---   (  ---  )   2341.02 (   9.33)     ---
18000 18000     ---   (  ---  )   2359.87 (  13.18)     ---
20000 20000     ---   (  ---  )   2435.09 (  17.52)     ---
