1
0
Fork 0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-05-24 22:36:36 +02:00

Port datasets

This commit is contained in:
joncrall 2022-05-26 07:36:27 -04:00
parent da6428296d
commit d036df252f
4 changed files with 368 additions and 208 deletions

View File

@ -8,58 +8,136 @@ import sys
import ubelt as ub
def data_lut(input, size):
if input == "Array with UTF-8 strings":
test_object = []
for x in range(size):
test_object.append(
"نظام الحكم سلطاني وراثي "
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
)
def json_test_data_generators():
"""
Generates data for benchmarks with various sizes
Returns:
Dict[str, callable]:
a mapping from test data name to its generator
Example:
>>> data_lut = json_test_data_generators()
>>> size = 2
>>> keys = sorted(set(data_lut) - {'Complex object'})
>>> for key in keys:
>>> func = data_lut[key]
>>> test_object = func(size)
>>> print('key = {!r}'.format(key))
>>> print('test_object = {!r}'.format(test_object))
"""
data_lut = {}
def _register_data(name):
def _wrap(func):
data_lut[name] = func
return _wrap
# seed if desired
#rng = random.Random()
rng = random
@_register_data('Array with doubles')
def array_with_doubles(size):
test_object = [sys.maxsize * rng.random() for _ in range(size)]
return test_object
elif input == "Array with doubles":
test_object = []
for x in range(256):
test_object.append(sys.maxsize * random.random())
else:
raise KeyError(input)
@_register_data('Array with UTF-8 strings')
def array_with_utf8_strings(size):
utf8_string = (
"نظام الحكم سلطاني وراثي "
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
)
test_object = [utf8_string for _ in range(size)]
return test_object
@_register_data('Medium complex object')
def medium_complex_object(size):
user = {
"userId": 3381293,
"age": 213,
"username": "johndoe",
"fullname": "John Doe the Second",
"isAuthorized": True,
"liked": 31231.31231202,
"approval": 31.1471,
"jobs": [1, 2],
"currJob": None,
}
friends = [user, user, user, user, user, user, user, user]
test_object = [[user, friends] for _ in range(size)]
return test_object
@_register_data('Array with True values')
def true_values(size):
test_object = [True for _ in range(size)]
return test_object
@_register_data('Array of Dict[str, int]')
def array_of_dict_string_int(size):
test_object = [
{str(rng.random() * 20): int(rng.random() * 1000000)}
for _ in range(size)
]
return test_object
@_register_data('Dict of List[Dict[str, int]]')
def dict_of_list_dict_str_int(size):
keys = set()
while len(keys) < size:
key = str(rng.random() * 20)
keys.add(key)
test_object = {
key: [
{str(rng.random() * 20): int(rng.random() * 1000000)}
for _ in range(256)
]
for key in keys
}
return test_object
@_register_data('Complex object')
def complex_object(size):
import json
# TODO: might be better to reigster this file with setup.py or
# download it via some mechanism
try:
dpath = ub.Path(__file__).parent
fpath = dpath / 'sample.json'
if not fpath.exists():
raise Exception
except Exception:
import ujson
dpath = ub.Path(ujson.__file__).parent / 'tests'
fpath = dpath / 'sample.json'
if not fpath.exists():
raise Exception
with open(fpath, 'r') as f:
test_object = json.load(f)
if size > 1:
test_object = [test_object] * size
return test_object
return data_lut
def available_json_impls():
JSON_IMPLS = {}
try:
import json
JSON_IMPLS["json"] = json
except ImportError:
pass
try:
import ujson
JSON_IMPLS["ujson"] = ujson
except ImportError:
pass
try:
import nujson
JSON_IMPLS["nujson"] = nujson
except ImportError:
pass
try:
import orjson
JSON_IMPLS["nujson"] = orjson
except ImportError:
pass
try:
import simplejson
JSON_IMPLS["simplejson"] = simplejson
except ImportError:
pass
return JSON_IMPLS
import importlib
known_modnames = [
'ujson', 'json', 'nujson', 'orjson', 'simplejson'
]
json_impls = {}
for libname in known_modnames:
try:
module = importlib.import_module(libname)
except ImportError:
pass
else:
json_impls[libname] = {
'module': module,
'version': module.__version__,
}
return json_impls
def benchmark_json_dumps():
@ -67,28 +145,34 @@ def benchmark_json_dumps():
sys.path.append(ub.expandpath('~/code/ultrajson/tests'))
from benchmarker import Benchmarker
JSON_IMPLS = available_json_impls()
json_impls = available_json_impls()
data_lut = json_test_data_generators()
version_infos = {k: v.__version__ for k, v in JSON_IMPLS.items()}
def method_lut(impl):
return JSON_IMPLS[impl].dumps
list(data_lut.keys())
# These are the parameters that we benchmark over
basis = {
"input": [
"Array with UTF-8 strings",
"Array with doubles",
'Array with doubles',
'Array with UTF-8 strings',
# 'Medium complex object',
'Array with True values',
'Array of Dict[str, int]',
# 'Dict of List[Dict[str, int]]',
# 'Complex object'
],
"size": [1, 32, 256, 1024, 2048],
"impl": list(JSON_IMPLS.keys()),
"impl": list(json_impls.keys()),
}
# The Benchmarker class is a new experimental API around timerit to
# abstract away the details of timing a process over a grid of parameters,
# serializing the results, and aggregating results from disparate runs.
benchmark = Benchmarker(
name='bench_json_dumps',
# Change params here to modify number of trials
num=100,
bestof=10,
verbose=2,
basis=basis,
)
@ -96,11 +180,11 @@ def benchmark_json_dumps():
for params in benchmark.iter_params():
# Make any modifications you need to compute input kwargs for each
# method here.
impl = params["impl"]
impl_version = version_infos[impl]
impl_info = json_impls[params["impl"]]
method = impl_info['module'].dumps
impl_version = impl_info['version']
params["impl_version"] = impl_version
method = method_lut(impl)
data = data_lut(params["input"], params["size"])
data = data_lut[params["input"]](params["size"])
# Timerit will run some user-specified number of loops.
# and compute time stats with similar methodology to timeit
for timer in benchmark.measure():
@ -114,20 +198,25 @@ def benchmark_json_dumps():
benchmark.dump_in_dpath(dpath)
RECORD_ALL = 0
metric_key = "time" if RECORD_ALL else "mean"
metric_key = "time" if RECORD_ALL else "mean_time"
from benchmarker import result_analysis
results = benchmark.result.to_result_list()
analysis = result_analysis.ResultAnalysis(
results,
metrics=[metric_key],
params=['impl'],
metric_objectives={
'min': 'min',
'mean': 'min',
'min_time': 'min',
'mean_time': 'min',
'time': 'min',
})
analysis.analysis()
analysis.table
param_group = ['impl', 'impl_version']
analysis.abalate(param_group)
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)

View File

@ -1,28 +0,0 @@
def check_ttest():
import scipy
import scipy.stats # NOQA
from benchmarker.benchmarker import stats_dict
import numpy as np
metric_vals1 = np.random.randn(10000) + 0.01
metric_vals2 = np.random.randn(1000)
stats1 = stats_dict(metric_vals1)
stats2 = stats_dict(metric_vals2)
ind_kw = dict(
equal_var=0,
# alternative='two-sided'
alternative='less' if stats1['mean'] < stats2['mean'] else 'greater'
)
# Not sure why these are slightly different
res1 = scipy.stats.ttest_ind(metric_vals1, metric_vals2, **ind_kw)
res2 = scipy.stats.ttest_ind_from_stats(
stats1['mean'], stats1['std'], stats1['n'],
stats2['mean'], stats2['std'], stats2['n'],
**ind_kw
)
print('res1 = {!r}'.format(res1))
print('res2 = {!r}'.format(res2))

View File

@ -8,9 +8,9 @@ from benchmarker.process_context import ProcessContext
@dataclass
class BenchmarkerConfig:
name : str = None
num : int = 100
bestof : int = 10
name : str = None
num : int = 100
bestof : int = 10
class BenchmarkerResult:
@ -97,14 +97,16 @@ class Benchmarker:
>>> dpath = ub.Path.appdir('benchmarker/demo').ensuredir()
>>> self.dump_in_dpath(dpath)
"""
def __init__(self, basis={}, **kwargs):
def __init__(self, basis={}, verbose=1, **kwargs):
self.basis = basis
self.config = BenchmarkerConfig(**kwargs)
self.ti = timerit.Timerit(
num=self.config.num,
bestof=self.config.bestof)
bestof=self.config.bestof,
verbose=verbose,
)
self.context = ProcessContext(name=self.config.name)
self.rows = []
self.RECORD_ALL = 0
@ -152,7 +154,7 @@ class Benchmarker:
rows.append(row)
else:
times = np.array(ti.robust_times())
metrics = stats_dict(times)
metrics = stats_dict(times, '_time')
row = {
'metrics': metrics,
'params': params,
@ -161,13 +163,13 @@ class Benchmarker:
rows.append(row)
def stats_dict(data):
def stats_dict(data, suffix=''):
stats = {
'n': len(data),
'mean': data.mean(),
'std': data.std(),
'min': data.min(),
'max': data.max(),
'nobs' + suffix: len(data),
'mean' + suffix: data.mean(),
'std' + suffix: data.std(),
'min' + suffix: data.min(),
'max' + suffix: data.max(),
}
return stats
@ -182,12 +184,12 @@ def combine_stats(s1, s2):
Example:
>>> basis = {
>>> 'n1': [1, 10, 100, 10000],
>>> 'n2': [1, 10, 100, 10000],
>>> 'nobs1': [1, 10, 100, 10000],
>>> 'nobs2': [1, 10, 100, 10000],
>>> }
>>> for params in ub.named_product(basis):
>>> data1 = np.random.rand(params['n1'])
>>> data2 = np.random.rand(params['n2'])
>>> data1 = np.random.rand(params['nobs1'])
>>> data2 = np.random.rand(params['nobs2'])
>>> data3 = np.hstack([data1, data2])
>>> s1 = stats_dict(data1)
>>> s2 = stats_dict(data2)
@ -203,7 +205,7 @@ def combine_stats(s1, s2):
https://math.stackexchange.com/questions/2971315/how-do-i-combine-standard-deviations-of-two-groups
"""
stats = [s1, s2]
sizes = np.array([s['n'] for s in stats])
sizes = np.array([s['nobs'] for s in stats])
means = np.array([s['mean'] for s in stats])
stds = np.array([s['std'] for s in stats])
mins = np.array([s['min'] for s in stats])
@ -221,7 +223,7 @@ def combine_stats(s1, s2):
combo_std = np.sqrt(combo_vars)
combo_stats = {
'n': combo_size,
'nobs': combo_size,
'mean': combo_mean,
'std': combo_std,
'min': mins.min(),

View File

@ -8,6 +8,19 @@ import scipy
import scipy.stats # NOQA
# a list of common objectives
DEFAULT_METRIC_TO_OBJECTIVE = {
'time': 'min',
'ap': 'max',
'acc': 'max',
'f1': 'max',
'mcc': 'max',
#
'loss': 'min',
'brier': 'min',
}
class Result(ub.NiceRepr):
"""
Storage of names, parameters, and quality metrics for a single experiment.
@ -31,6 +44,10 @@ class Result(ub.NiceRepr):
>>> self = Result.demo(rng=32)
>>> print('self = {}'.format(self))
self = <Result(name=53f57161,f1=0.33,acc=0.75,param1=1,param2=6.67,param3=a)>
Example:
>>> self = Result.demo(mode='alt', rng=32)
>>> print('self = {}'.format(self))
"""
def __init__(self, name, params, metrics, meta=None):
self.name = name
@ -48,21 +65,43 @@ class Result(ub.NiceRepr):
return text
@classmethod
def demo(cls, rng=None):
def demo(cls, mode='null', rng=None):
import numpy as np
import string
import kwarray
rng = kwarray.ensure_rng(rng)
demo_param_space = {
'param1': list(range(3)),
'param2': np.linspace(0, 10, 10),
'param3': list(string.ascii_lowercase[0:3]),
}
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
metrics = {
'f1': rng.rand(),
'acc': rng.rand(),
}
if mode == 'null':
# The null hypothesis should generally be true here,
# there is no relation between the results and parameters
demo_param_space = {
'param1': list(range(3)),
'param2': np.linspace(0, 10, 10),
'param3': list(string.ascii_lowercase[0:3]),
}
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
metrics = {
'f1': rng.rand(),
'acc': rng.rand(),
}
elif mode == 'alt':
# The alternative hypothesis should be true here, there is a
# relationship between results two of the params.
from scipy.special import expit
params = {
'w': rng.randint(-1, 1),
'x': rng.randint(-3, 3),
'y': rng.randint(-2, 2),
'z': rng.randint(-3, 3),
}
noise = np.random.randn() * 1
r = 3 * params['x'] + params['y'] ** 2 + 0.3 * params['z'] ** 3
acc = expit(r / 20 + noise)
metrics = {
'acc': acc,
}
else:
raise KeyError(mode)
name = ub.hash_data(params)[0:8]
self = cls(name, params, metrics)
return self
@ -105,6 +144,10 @@ class ResultAnalysis(ub.NiceRepr):
>>> self = ResultAnalysis.demo()
>>> self.analysis()
Example:
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
>>> self.analysis()
Example:
>>> # Given a list of experiments, configs, and results
>>> # Create a ResultAnalysis object
@ -168,7 +211,8 @@ class ResultAnalysis(ub.NiceRepr):
def __init__(self, results, metrics=None, params=None, ignore_params=None,
ignore_metrics=None, metric_objectives=None,
abalation_orders={1}, default_objective='max'):
abalation_orders={1}, default_objective='max',
p_threshold=0.05):
self.results = results
if ignore_metrics is None:
ignore_metrics = set()
@ -181,23 +225,15 @@ class ResultAnalysis(ub.NiceRepr):
self.default_objective = default_objective
# encode if we want to maximize or minimize a metric
default_metric_to_objective = {
'ap': 'max',
'acc': 'max',
'f1': 'max',
#
'loss': 'min',
'brier': 'min',
}
if metric_objectives is None:
metric_objectives = {}
self.metric_objectives = default_metric_to_objective.copy()
self.metric_objectives = DEFAULT_METRIC_TO_OBJECTIVE.copy()
self.metric_objectives.update(metric_objectives)
self.params = params
self.metrics = metrics
self.statistics = None
self.p_threshold = p_threshold
self._description = {}
self._description['built'] = False
@ -210,11 +246,14 @@ class ResultAnalysis(ub.NiceRepr):
return ub.repr2(self._description, si=1, sv=1)
@classmethod
def demo(cls, num=10, rng=None):
def demo(cls, num=10, mode='null', rng=None):
import kwarray
rng = kwarray.ensure_rng(rng)
results = [Result.demo(rng=rng) for _ in range(num)]
self = cls(results, metrics={'f1', 'acc'})
results = [Result.demo(mode=mode, rng=rng) for _ in range(num)]
if mode == 'null':
self = cls(results, metrics={'f1', 'acc'})
else:
self = cls(results, metrics={'acc'})
return self
def run(self):
@ -251,18 +290,30 @@ class ResultAnalysis(ub.NiceRepr):
varied = {k: vs for k, vs in varied.items() if len(vs)}
return varied
def abalation_groups(self, param):
def abalation_groups(self, param_group, k=2):
"""
Return groups where the specified parameter(s) are varied, but all
other non-ignored parameters are held the same.
Args:
param_group (str | List[str]):
One or more parameters that are allowed to vary
k (int):
minimum number of items a group must contain to be returned
Returns:
List[DataFrame]:
a list of subsets of in the table where all but the specified
(non-ignored) parameters are allowed to vary.
Example:
>>> self = ResultAnalysis.demo()
>>> param = 'param2'
>>> self.abalation_groups(param)
"""
if not ub.iterable(param):
param = [param]
if not ub.iterable(param_group):
param_group = [param_group]
table = self.table
config_rows = [r.params for r in self.results]
config_keys = list(map(set, config_rows))
@ -271,14 +322,14 @@ class ResultAnalysis(ub.NiceRepr):
if self.ignore_params:
config_keys = [c - self.ignore_params for c in config_keys]
isect_params = set.intersection(*config_keys)
other_params = sorted(isect_params - set(param))
other_params = sorted(isect_params - set(param_group))
groups = []
for key, group in table.groupby(other_params, dropna=False):
if len(group) > 1:
if len(group) >= k:
groups.append(group)
return groups
def abalate(self, param):
def abalate(self, param_group):
"""
Example:
>>> self = ResultAnalysis.demo(100)
@ -287,34 +338,34 @@ class ResultAnalysis(ub.NiceRepr):
>>> self.abalate(param)
>>> self = ResultAnalysis.demo()
>>> param = ['param2', 'param3']
>>> self.abalate(param)
>>> param_group = ['param2', 'param3']
>>> # xdoctest: +REQUIRES(module:openskill)
>>> self.abalate(param_group)
"""
import itertools as it
if self.table is None:
self.table = self.build_table()
if not ub.iterable(param):
param = [param]
if not ub.iterable(param_group):
param_group = [param_group]
# For hashable generic dictionary
from collections import namedtuple
gd = namedtuple('config', param)
gd = namedtuple('config', param_group)
# from types import SimpleNamespace
param_unique_vals_ = self.table[param].drop_duplicates().to_dict('records')
param_unique_vals_ = self.table[param_group].drop_duplicates().to_dict('records')
param_unique_vals = [gd(**d) for d in param_unique_vals_]
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param}
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param_group}
score_improvements = ub.ddict(list)
scored_obs = []
skillboard = SkillTracker(param_unique_vals)
groups = self.abalation_groups(param)
groups = self.abalation_groups(param_group, k=2)
for group in groups:
for metric_key in self.metrics:
ascending = self._objective_is_ascending(metric_key)
group = group.sort_values(metric_key, ascending=ascending)
subgroups = group.groupby(param)
subgroups = group.groupby(param_group)
if ascending:
best_idx = subgroups[metric_key].idxmax()
else:
@ -326,19 +377,19 @@ class ResultAnalysis(ub.NiceRepr):
if x1 != x2:
r1 = best_group.loc[x1]
r2 = best_group.loc[x2]
k1 = gd(**r1[param])
k2 = gd(**r2[param])
k1 = gd(**r1[param_group])
k2 = gd(**r2[param_group])
diff = r1[metric_key] - r2[metric_key]
score_improvements[(k1, k2, metric_key)].append(diff)
# metric_vals = best_group[metric_key].values
# diffs = metric_vals[None, :] - metric_vals[:, None]
best_group.set_index(param)
# best_group[param]
best_group.set_index(param_group)
# best_group[param_group]
# best_group[metric_key].diff()
scored_ranking = best_group[param + [metric_key]].reset_index(drop=True)
scored_ranking = best_group[param_group + [metric_key]].reset_index(drop=True)
scored_obs.append(scored_ranking)
ranking = [gd(**d) for d in scored_ranking[param].to_dict('records')]
ranking = [gd(**d) for d in scored_ranking[param_group].to_dict('records')]
skillboard.observe(ranking)
print('skillboard.ratings = {}'.format(ub.repr2(skillboard.ratings, nl=1, align=':')))
@ -377,15 +428,12 @@ class ResultAnalysis(ub.NiceRepr):
# TODO : document these stats clearly and accurately
Example:
>>> self = ResultAnalysis.demo(num=30)
>>> self = ResultAnalysis.demo(num=100)
>>> print(self.table)
>>> param_group = ['param2']
>>> param_group = ['param2', 'param1']
>>> metric_key = 'f1'
>>> stats_row = self.test_group(param_group, metric_key)
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, precision=2)))
>>> # ---
>>> self.build()
>>> self.report()
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, sort=0, precision=2)))
"""
param_group_name = ','.join(param_group)
stats_row = {
@ -461,10 +509,6 @@ class ResultAnalysis(ub.NiceRepr):
pairwise_statistics = []
for pair in value_pairs:
pair_statistics = {}
# try:
# param_val1, param_val2 = sorted(pair)
# except Exception:
# param_val1, param_val2 = (pair)
param_val1, param_val2 = pair
metric_vals1 = value_to_metric[param_val1]
@ -477,16 +521,17 @@ class ResultAnalysis(ub.NiceRepr):
pair_statistics['value2'] = param_val2
pair_statistics['n1'] = len(metric_vals1)
pair_statistics['n2'] = len(metric_vals2)
# TODO: probably want to use an alternative=less or greater here
# instead of simply unequal
alternative = 'two-sided'
if 1:
TEST_ONLY_FOR_DIFFERENCE = True
if TEST_ONLY_FOR_DIFFERENCE:
if ascending:
# We want to minimize the metric
alternative = 'less' if rank1 < rank2 else 'greater'
else:
# We want to maximize the metric
alternative = 'greater' if rank1 < rank2 else 'less'
else:
alternative = 'two-sided'
ind_kw = dict(
equal_var=False,
@ -499,8 +544,8 @@ class ResultAnalysis(ub.NiceRepr):
stats1 = stats_dict(metric_vals1)
stats2 = stats_dict(metric_vals2)
scipy.stats.ttest_ind_from_stats(
stats1['mean'], stats1['std'], stats1['n'],
stats2['mean'], stats2['std'], stats2['n'],
stats1['mean'], stats1['std'], stats1['nobs'],
stats2['mean'], stats2['std'], stats2['nobs'],
**ind_kw
)
# metric_vals1, metric_vals2, equal_var=False)
@ -523,6 +568,8 @@ class ResultAnalysis(ub.NiceRepr):
for nk in common:
group1 = nk_to_group1[nk]
group2 = nk_to_group2[nk]
# TODO: Not sure if taking the product of everything within
# the comparable group is correct or not. I think it is ok.
for i, j in it.product(group1.index, group2.index):
comparable_indexes1.append(i)
comparable_indexes2.append(j)
@ -590,7 +637,6 @@ class ResultAnalysis(ub.NiceRepr):
self._description['built'] = True
def report(self):
p_threshold = 0.05
stat_groups = ub.group_items(self.statistics, key=lambda x: x['param_name'])
stat_groups_items = list(stat_groups.items())
@ -600,43 +646,47 @@ class ResultAnalysis(ub.NiceRepr):
'metrics': self.metrics_of_interest,
})
for grid_item in grid:
metric_key = grid_item['metrics']
stat_groups_item = grid_item['stat_group_item']
param_name, stat_group = stat_groups_item
stats_row = ub.group_items(stat_group, key=lambda x: x['metric'])[metric_key][0]
title = ('PARAMETER: {} - METRIC: {}'.format(param_name, metric_key))
print('\n\n')
print(title)
print('=' * len(title))
print(stats_row['moments'])
anova_rank_p = stats_row['anova_rank_p']
anova_mean_p = stats_row['anova_mean_p']
# Rougly speaking
print('')
print(f'ANOVA: If p is low, the param {param_name!r} might have an effect')
print(ub.color_text(f' Rank-ANOVA: p={anova_rank_p:0.8f}', 'green' if anova_rank_p < p_threshold else None))
print(ub.color_text(f' Mean-ANOVA: p={anova_mean_p:0.8f}', 'green' if anova_mean_p < p_threshold else None))
print('')
print('Pairwise T-Tests')
for pairstat in stats_row['pairwise']:
# Is this backwards?
value1 = pairstat['value1']
value2 = pairstat['value2']
winner = pairstat['winner']
if value2 == winner:
value1, value2 = value2, value1
print(f' If p is low, {param_name}={value1} may outperform {param_name}={value2}.')
if 'ttest_ind' in pairstat:
ttest_ind_result = pairstat['ttest_ind']
print(ub.color_text(f' ttest_ind: p={ttest_ind_result.pvalue:0.8f}', 'green' if ttest_ind_result.pvalue < p_threshold else None))
if 'ttest_rel' in pairstat:
n_common = pairstat['n_common']
ttest_rel_result = pairstat['ttest_ind']
print(ub.color_text(f' ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n={n_common}', 'green' if ttest_rel_result.pvalue < p_threshold else None))
self._report_one(grid_item)
print(self.stats_table)
def _report_one(self, grid_item):
p_threshold = self.p_threshold
metric_key = grid_item['metrics']
stat_groups_item = grid_item['stat_group_item']
param_name, stat_group = stat_groups_item
stats_row = ub.group_items(stat_group, key=lambda x: x['metric'])[metric_key][0]
title = ('PARAMETER: {} - METRIC: {}'.format(param_name, metric_key))
print('\n\n')
print(title)
print('=' * len(title))
print(stats_row['moments'])
anova_rank_p = stats_row['anova_rank_p']
anova_mean_p = stats_row['anova_mean_p']
# Rougly speaking
print('')
print(f'ANOVA: If p is low, the param {param_name!r} might have an effect')
print(ub.color_text(f' Rank-ANOVA: p={anova_rank_p:0.8f}', 'green' if anova_rank_p < p_threshold else None))
print(ub.color_text(f' Mean-ANOVA: p={anova_mean_p:0.8f}', 'green' if anova_mean_p < p_threshold else None))
print('')
print('Pairwise T-Tests')
for pairstat in stats_row['pairwise']:
# Is this backwards?
value1 = pairstat['value1']
value2 = pairstat['value2']
winner = pairstat['winner']
if value2 == winner:
value1, value2 = value2, value1
print(f' If p is low, {param_name}={value1} may outperform {param_name}={value2}.')
if 'ttest_ind' in pairstat:
ttest_ind_result = pairstat['ttest_ind']
print(ub.color_text(f' ttest_ind: p={ttest_ind_result.pvalue:0.8f}', 'green' if ttest_ind_result.pvalue < p_threshold else None))
if 'ttest_rel' in pairstat:
n_common = pairstat['n_common']
ttest_rel_result = pairstat['ttest_ind']
print(ub.color_text(f' ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n_pairs={n_common}', 'green' if ttest_rel_result.pvalue < p_threshold else None))
def conclusions(self):
conclusions = []
for stat in self.statistics:
@ -653,6 +703,50 @@ class ResultAnalysis(ub.NiceRepr):
conclusions.append(txt)
return conclusions
def plot(self, xlabel, metric_key, group_labels):
"""
Example:
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
>>> self.analysis()
>>> print('self = {}'.format(self))
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> xlabel = 'x'
>>> metric_key = 'acc'
>>> group_labels = {
>>> 'col': ['y', 'w'],
>>> 'hue': ['z'],
>>> 'size': [],
>>> }
>>> self.plot(xlabel, metric_key, group_labels)
"""
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt # NOQA
data = self.table
data = data.sort_values(metric_key)
for gname, labels in group_labels.items():
if len(labels):
new_col = []
for row in data[labels].to_dict('records'):
item = ub.repr2(row, compact=1, si=1)
new_col.append(item)
gkey = gname + "_key"
data[gkey] = new_col
plotkw = {}
for gname, labels in group_labels.items():
if labels:
plotkw[gname] = gname + "_key"
# Your variables may change
# ax = plt.figure().gca()
col = plotkw.pop("col")
facet = sns.FacetGrid(data, col=col, sharex=False, sharey=False)
facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, marker="o", **plotkw)
facet.add_legend()
class SkillTracker:
"""
@ -677,6 +771,9 @@ class SkillTracker:
4: 0.20,
5: 0.20,
}
Requirements:
openskill
"""
def __init__(self, player_ids):