In [2]:
import pandas as pd
from pathlib import Path
In [3]:
xs = []
for i in sorted(Path('out').glob('**/stats')):
    n = int(i.parts[1].split('-')[0])
    x = pd.read_csv(i, names=['name', 'value'], delimiter='\t')
    parts = x['name'].str.split('//', expand=True)
    commitOpt = parts[0].str.split('-', expand=True)
    
    metric = parts[3]
    x = x.assign(metric=metric, commit=commitOpt[0], opt=commitOpt[1], n=n)
    x = x.drop(columns='name')
    xs.append(x)

xs = pd.concat(xs)
print(xs)
            value                  metric  \
0    5.060707e+10         bytes allocated   
1    1.719000e+03                 num_GCs   
2    1.002446e+08      average_bytes_used   
3    3.713043e+08          max_bytes_used   
4    1.800000e+01  num_byte_usage_samples   
..            ...                     ...   
238  6.586019e+11                  cycles   
239  6.112792e+11            instructions   
240  2.340010e+09            cache-misses   
241  1.132859e+11                branches   
242  5.726920e+09           branch-misses   

                                       commit opt   n  
0    a69f8278984ea36f191bcbb9d81553c653fba092  o0   0  
1    a69f8278984ea36f191bcbb9d81553c653fba092  o0   0  
2    a69f8278984ea36f191bcbb9d81553c653fba092  o0   0  
3    a69f8278984ea36f191bcbb9d81553c653fba092  o0   0  
4    a69f8278984ea36f191bcbb9d81553c653fba092  o0   0  
..                                        ...  ..  ..  
238  394677198ccb4f984bec34bf8d43262b80e447c6  o1  13  
239  394677198ccb4f984bec34bf8d43262b80e447c6  o1  13  
240  394677198ccb4f984bec34bf8d43262b80e447c6  o1  13  
241  394677198ccb4f984bec34bf8d43262b80e447c6  o1  13  
242  394677198ccb4f984bec34bf8d43262b80e447c6  o1  13  

[6804 rows x 5 columns]
In [6]:
xs['metric'].unique()
Out[6]:
array(['bytes allocated', 'num_GCs', 'average_bytes_used',
       'max_bytes_used', 'num_byte_usage_samples',
       'peak_megabytes_allocated', 'init_cpu_seconds',
       'init_wall_seconds', 'mut_cpu_seconds', 'mut_wall_seconds',
       'GC_cpu_seconds', 'GC_wall_seconds', 'exit_cpu_seconds',
       'exit_wall_seconds', 'total_cpu_seconds', 'total_wall_seconds',
       'major_gcs', 'allocated_bytes', 'max_live_bytes',
       'max_large_objects_bytes', 'max_compact_bytes', 'max_slop_bytes',
       'max_mem_in_use_bytes', 'cumulative_live_bytes', 'copied_bytes',
       'par_copied_bytes', 'cumulative_par_max_copied_bytes',
       'cumulative_par_balanced_copied_bytes', 'fragmentation_bytes',
       'alloc_rate', 'productivity_cpu_percent',
       'productivity_wall_percent', 'bound_task_count', 'sparks_count',
       'sparks_converted', 'sparks_overflowed', 'sparks_dud ',
       'sparks_gcd', 'sparks_fizzled', 'work_balance', 'n_capabilities',
       'task_count', 'peak_worker_count', 'worker_count',
       'gc_alloc_block_sync_spin', 'gc_alloc_block_sync_yield',
       'gc_spin_spin', 'gc_spin_yield', 'mut_spin_spin', 'mut_spin_yield',
       'waitForGcThreads_spin', 'waitForGcThreads_yield',
       'whitehole_gc_spin', 'whitehole_lockClosure_spin',
       'whitehole_lockClosure_yield', 'whitehole_executeMessage_spin',
       'whitehole_threadPaused_spin', 'any_work', 'no_work',
       'scav_find_work', 'gen_0_collections', 'gen_0_par_collections',
       'gen_0_cpu_seconds', 'gen_0_wall_seconds',
       'gen_0_max_pause_seconds', 'gen_0_avg_pause_seconds',
       'gen_0_sync_spin', 'gen_0_sync_yield', 'gen_1_collections',
       'gen_1_par_collections', 'gen_1_cpu_seconds', 'gen_1_wall_seconds',
       'gen_1_max_pause_seconds', 'gen_1_avg_pause_seconds',
       'gen_1_sync_spin', 'gen_1_sync_yield', 'cycles', 'instructions',
       'cache-misses', 'branches', 'branch-misses'], dtype=object)
In [33]:
gs = xs.groupby(['metric', 'opt', 'n']).mean().unstack()['value']
rel = gs.divide(gs[0], axis=0)
rel
Out[33]:
n 0 1 2 3 4 5 6 7 8 9 10 11 12 13
metric opt
GC_cpu_seconds o0 1.0 1.002823 0.995423 0.987554 0.990345 0.995100 0.988458 0.995318 1.013204 1.016144 1.012122 1.023479 1.029281 1.028144
o1 1.0 0.994068 0.987643 0.994019 0.996859 0.993973 0.988122 0.990229 0.999359 0.989451 0.990873 0.986232 0.992389 0.993649
GC_wall_seconds o0 1.0 1.002814 0.995397 0.987586 0.990300 0.995063 0.988441 0.995323 1.013174 1.016131 1.012131 1.023444 1.029294 1.028127
o1 1.0 0.994069 0.987662 0.994033 0.996882 0.993977 0.988119 0.990243 0.999392 0.989465 0.990895 0.986235 0.992388 0.993676
alloc_rate o0 1.0 0.994835 0.996497 1.001991 1.009151 1.012502 1.013309 1.008085 1.020716 1.028229 1.026406 1.021702 1.016569 1.016293
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
whitehole_threadPaused_spin o1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
work_balance o0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
o1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
worker_count o0 1.0 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
o1 1.0 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000

162 rows × 14 columns

In [40]:
rel.loc[['bytes allocated', 'cycles', 'instructions']].T.plot()
Out[40]:
<AxesSubplot:xlabel='n'>
In [38]:
rel.loc[['bytes allocated', 'cycles']]
Out[38]:
n 0 1 2 3 4 5 6 7 8 9 10 11 12 13
metric opt
bytes allocated o0 1.0 1.000257 1.009229 1.009262 1.009266 1.009271 1.028301 1.028305 1.037981 1.037452 1.037747 1.037746 1.043013 1.041336
o1 1.0 1.000067 1.002442 1.002449 1.002447 1.002452 1.007561 1.007559 1.011002 1.010860 1.010947 1.010946 1.012310 1.011872
cycles o0 1.0 1.002498 1.007534 1.001947 0.993675 0.993522 1.006332 1.009875 1.011711 1.004925 1.005599 1.012508 1.020923 1.016255
o1 1.0 0.998963 0.997238 0.999641 0.999886 1.000559 0.994910 0.993959 1.003594 0.998274 0.993961 1.002930 1.005449 1.008866
In [53]:
import subprocess
def get_commit_title(commit: str) -> str:
    return subprocess.check_output(['git', 'show', '-q', '--pretty=%s', commit], encoding='UTF-8').strip()

commits = [
    (n, commit, get_commit_title(commit))
    for n, commit in enumerate(Path('commits').read_text().split())
]

commits = pd.DataFrame(commits, columns=['n', 'commit', 'subject'])
commits
Out[53]:
n commit subject
0 0 a69f8278984ea36f191bcbb9d81553c653fba092 X86.Instr: Strictness
1 1 b3aad28082d11cdfe5b85aa4e18c087c084d9e91 Try specialising backend
2 2 be58b9edca1f77ae4ba804fd89c165bfb894ee1b TcMType: Various other strictness
3 3 c47930abb15c09767a319f9c77c2ecbd45c5e73c CmmToAsm.Reg.Linear: Use concat rather than re...
4 4 3f657f438bbc5e224bae19e3afdefca95a0bb0ab Refactor linearRA
5 5 c1a2a33e310d85bb6681e6745621c3b5ffd98f58 CmmToAsm.Reg.Linear: oneShot-ify RegM
6 6 6a8f5f96f1c9f65059c11136cadbebce5c2cc389 Drop tidyCos
7 7 e4eedf2b4809b57a292ad76e2d35941e8607f236 CoreTidy: Use strictMap
8 8 e82deb577b7c930565f3841e9acee2b62b281140 Allow wrapLocM to inline
9 9 54182c131f2467df3c7fc507ca1ac987d4edf3c5 Rewrite.split: Fix reboxing
10 10 41d3da7171a04d19048e3e4ff779a36fc39727a0 zonk: Strict zonking of applications
11 11 4fe40da0d5b63ba7e0f298ba5b970cddefc39906 TcS: oneShot-ify
12 12 91cf8b716ce0bbb91fa37ce8543d16da4d1c79f7 GHC.Tc.Solver.Rewrite: oneShot-ify
13 13 394677198ccb4f984bec34bf8d43262b80e447c6 typecheck: Eliminate allocations in tc_eq_type
In [ ]: