mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-05-24 22:36:36 +02:00
Port datasets
This commit is contained in:
parent
da6428296d
commit
d036df252f
|
@ -8,58 +8,136 @@ import sys
|
|||
import ubelt as ub
|
||||
|
||||
|
||||
def data_lut(input, size):
|
||||
if input == "Array with UTF-8 strings":
|
||||
test_object = []
|
||||
for x in range(size):
|
||||
test_object.append(
|
||||
"نظام الحكم سلطاني وراثي "
|
||||
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
|
||||
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
|
||||
)
|
||||
def json_test_data_generators():
|
||||
"""
|
||||
Generates data for benchmarks with various sizes
|
||||
|
||||
Returns:
|
||||
Dict[str, callable]:
|
||||
a mapping from test data name to its generator
|
||||
|
||||
Example:
|
||||
>>> data_lut = json_test_data_generators()
|
||||
>>> size = 2
|
||||
>>> keys = sorted(set(data_lut) - {'Complex object'})
|
||||
>>> for key in keys:
|
||||
>>> func = data_lut[key]
|
||||
>>> test_object = func(size)
|
||||
>>> print('key = {!r}'.format(key))
|
||||
>>> print('test_object = {!r}'.format(test_object))
|
||||
"""
|
||||
data_lut = {}
|
||||
def _register_data(name):
|
||||
def _wrap(func):
|
||||
data_lut[name] = func
|
||||
return _wrap
|
||||
|
||||
# seed if desired
|
||||
#rng = random.Random()
|
||||
rng = random
|
||||
|
||||
@_register_data('Array with doubles')
|
||||
def array_with_doubles(size):
|
||||
test_object = [sys.maxsize * rng.random() for _ in range(size)]
|
||||
return test_object
|
||||
elif input == "Array with doubles":
|
||||
test_object = []
|
||||
for x in range(256):
|
||||
test_object.append(sys.maxsize * random.random())
|
||||
else:
|
||||
raise KeyError(input)
|
||||
|
||||
@_register_data('Array with UTF-8 strings')
|
||||
def array_with_utf8_strings(size):
|
||||
utf8_string = (
|
||||
"نظام الحكم سلطاني وراثي "
|
||||
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
|
||||
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
|
||||
)
|
||||
test_object = [utf8_string for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data('Medium complex object')
|
||||
def medium_complex_object(size):
|
||||
user = {
|
||||
"userId": 3381293,
|
||||
"age": 213,
|
||||
"username": "johndoe",
|
||||
"fullname": "John Doe the Second",
|
||||
"isAuthorized": True,
|
||||
"liked": 31231.31231202,
|
||||
"approval": 31.1471,
|
||||
"jobs": [1, 2],
|
||||
"currJob": None,
|
||||
}
|
||||
friends = [user, user, user, user, user, user, user, user]
|
||||
test_object = [[user, friends] for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data('Array with True values')
|
||||
def true_values(size):
|
||||
test_object = [True for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data('Array of Dict[str, int]')
|
||||
def array_of_dict_string_int(size):
|
||||
test_object = [
|
||||
{str(rng.random() * 20): int(rng.random() * 1000000)}
|
||||
for _ in range(size)
|
||||
]
|
||||
return test_object
|
||||
|
||||
@_register_data('Dict of List[Dict[str, int]]')
|
||||
def dict_of_list_dict_str_int(size):
|
||||
keys = set()
|
||||
while len(keys) < size:
|
||||
key = str(rng.random() * 20)
|
||||
keys.add(key)
|
||||
test_object = {
|
||||
key: [
|
||||
{str(rng.random() * 20): int(rng.random() * 1000000)}
|
||||
for _ in range(256)
|
||||
]
|
||||
for key in keys
|
||||
}
|
||||
return test_object
|
||||
|
||||
@_register_data('Complex object')
|
||||
def complex_object(size):
|
||||
import json
|
||||
# TODO: might be better to reigster this file with setup.py or
|
||||
# download it via some mechanism
|
||||
try:
|
||||
dpath = ub.Path(__file__).parent
|
||||
fpath = dpath / 'sample.json'
|
||||
if not fpath.exists():
|
||||
raise Exception
|
||||
except Exception:
|
||||
import ujson
|
||||
dpath = ub.Path(ujson.__file__).parent / 'tests'
|
||||
fpath = dpath / 'sample.json'
|
||||
if not fpath.exists():
|
||||
raise Exception
|
||||
with open(fpath, 'r') as f:
|
||||
test_object = json.load(f)
|
||||
if size > 1:
|
||||
test_object = [test_object] * size
|
||||
return test_object
|
||||
|
||||
return data_lut
|
||||
|
||||
|
||||
def available_json_impls():
|
||||
JSON_IMPLS = {}
|
||||
|
||||
try:
|
||||
import json
|
||||
JSON_IMPLS["json"] = json
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import ujson
|
||||
JSON_IMPLS["ujson"] = ujson
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import nujson
|
||||
JSON_IMPLS["nujson"] = nujson
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import orjson
|
||||
JSON_IMPLS["nujson"] = orjson
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import simplejson
|
||||
JSON_IMPLS["simplejson"] = simplejson
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return JSON_IMPLS
|
||||
import importlib
|
||||
known_modnames = [
|
||||
'ujson', 'json', 'nujson', 'orjson', 'simplejson'
|
||||
]
|
||||
json_impls = {}
|
||||
for libname in known_modnames:
|
||||
try:
|
||||
module = importlib.import_module(libname)
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
json_impls[libname] = {
|
||||
'module': module,
|
||||
'version': module.__version__,
|
||||
}
|
||||
return json_impls
|
||||
|
||||
|
||||
def benchmark_json_dumps():
|
||||
|
@ -67,28 +145,34 @@ def benchmark_json_dumps():
|
|||
sys.path.append(ub.expandpath('~/code/ultrajson/tests'))
|
||||
from benchmarker import Benchmarker
|
||||
|
||||
JSON_IMPLS = available_json_impls()
|
||||
json_impls = available_json_impls()
|
||||
data_lut = json_test_data_generators()
|
||||
|
||||
version_infos = {k: v.__version__ for k, v in JSON_IMPLS.items()}
|
||||
|
||||
def method_lut(impl):
|
||||
return JSON_IMPLS[impl].dumps
|
||||
list(data_lut.keys())
|
||||
|
||||
# These are the parameters that we benchmark over
|
||||
basis = {
|
||||
"input": [
|
||||
"Array with UTF-8 strings",
|
||||
"Array with doubles",
|
||||
'Array with doubles',
|
||||
'Array with UTF-8 strings',
|
||||
# 'Medium complex object',
|
||||
'Array with True values',
|
||||
'Array of Dict[str, int]',
|
||||
# 'Dict of List[Dict[str, int]]',
|
||||
# 'Complex object'
|
||||
],
|
||||
"size": [1, 32, 256, 1024, 2048],
|
||||
"impl": list(JSON_IMPLS.keys()),
|
||||
"impl": list(json_impls.keys()),
|
||||
}
|
||||
|
||||
# The Benchmarker class is a new experimental API around timerit to
|
||||
# abstract away the details of timing a process over a grid of parameters,
|
||||
# serializing the results, and aggregating results from disparate runs.
|
||||
benchmark = Benchmarker(
|
||||
name='bench_json_dumps',
|
||||
# Change params here to modify number of trials
|
||||
num=100,
|
||||
bestof=10,
|
||||
verbose=2,
|
||||
basis=basis,
|
||||
)
|
||||
|
||||
|
@ -96,11 +180,11 @@ def benchmark_json_dumps():
|
|||
for params in benchmark.iter_params():
|
||||
# Make any modifications you need to compute input kwargs for each
|
||||
# method here.
|
||||
impl = params["impl"]
|
||||
impl_version = version_infos[impl]
|
||||
impl_info = json_impls[params["impl"]]
|
||||
method = impl_info['module'].dumps
|
||||
impl_version = impl_info['version']
|
||||
params["impl_version"] = impl_version
|
||||
method = method_lut(impl)
|
||||
data = data_lut(params["input"], params["size"])
|
||||
data = data_lut[params["input"]](params["size"])
|
||||
# Timerit will run some user-specified number of loops.
|
||||
# and compute time stats with similar methodology to timeit
|
||||
for timer in benchmark.measure():
|
||||
|
@ -114,20 +198,25 @@ def benchmark_json_dumps():
|
|||
benchmark.dump_in_dpath(dpath)
|
||||
|
||||
RECORD_ALL = 0
|
||||
metric_key = "time" if RECORD_ALL else "mean"
|
||||
metric_key = "time" if RECORD_ALL else "mean_time"
|
||||
|
||||
from benchmarker import result_analysis
|
||||
results = benchmark.result.to_result_list()
|
||||
|
||||
analysis = result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=[metric_key],
|
||||
params=['impl'],
|
||||
metric_objectives={
|
||||
'min': 'min',
|
||||
'mean': 'min',
|
||||
'min_time': 'min',
|
||||
'mean_time': 'min',
|
||||
'time': 'min',
|
||||
})
|
||||
analysis.analysis()
|
||||
analysis.table
|
||||
|
||||
param_group = ['impl', 'impl_version']
|
||||
analysis.abalate(param_group)
|
||||
|
||||
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
|
||||
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
def check_ttest():
|
||||
import scipy
|
||||
import scipy.stats # NOQA
|
||||
from benchmarker.benchmarker import stats_dict
|
||||
import numpy as np
|
||||
metric_vals1 = np.random.randn(10000) + 0.01
|
||||
metric_vals2 = np.random.randn(1000)
|
||||
|
||||
stats1 = stats_dict(metric_vals1)
|
||||
stats2 = stats_dict(metric_vals2)
|
||||
|
||||
ind_kw = dict(
|
||||
equal_var=0,
|
||||
# alternative='two-sided'
|
||||
alternative='less' if stats1['mean'] < stats2['mean'] else 'greater'
|
||||
)
|
||||
|
||||
# Not sure why these are slightly different
|
||||
res1 = scipy.stats.ttest_ind(metric_vals1, metric_vals2, **ind_kw)
|
||||
|
||||
res2 = scipy.stats.ttest_ind_from_stats(
|
||||
stats1['mean'], stats1['std'], stats1['n'],
|
||||
stats2['mean'], stats2['std'], stats2['n'],
|
||||
**ind_kw
|
||||
)
|
||||
print('res1 = {!r}'.format(res1))
|
||||
print('res2 = {!r}'.format(res2))
|
|
@ -8,9 +8,9 @@ from benchmarker.process_context import ProcessContext
|
|||
|
||||
@dataclass
|
||||
class BenchmarkerConfig:
|
||||
name : str = None
|
||||
num : int = 100
|
||||
bestof : int = 10
|
||||
name : str = None
|
||||
num : int = 100
|
||||
bestof : int = 10
|
||||
|
||||
|
||||
class BenchmarkerResult:
|
||||
|
@ -97,14 +97,16 @@ class Benchmarker:
|
|||
>>> dpath = ub.Path.appdir('benchmarker/demo').ensuredir()
|
||||
>>> self.dump_in_dpath(dpath)
|
||||
"""
|
||||
def __init__(self, basis={}, **kwargs):
|
||||
def __init__(self, basis={}, verbose=1, **kwargs):
|
||||
self.basis = basis
|
||||
|
||||
self.config = BenchmarkerConfig(**kwargs)
|
||||
|
||||
self.ti = timerit.Timerit(
|
||||
num=self.config.num,
|
||||
bestof=self.config.bestof)
|
||||
bestof=self.config.bestof,
|
||||
verbose=verbose,
|
||||
)
|
||||
self.context = ProcessContext(name=self.config.name)
|
||||
self.rows = []
|
||||
self.RECORD_ALL = 0
|
||||
|
@ -152,7 +154,7 @@ class Benchmarker:
|
|||
rows.append(row)
|
||||
else:
|
||||
times = np.array(ti.robust_times())
|
||||
metrics = stats_dict(times)
|
||||
metrics = stats_dict(times, '_time')
|
||||
row = {
|
||||
'metrics': metrics,
|
||||
'params': params,
|
||||
|
@ -161,13 +163,13 @@ class Benchmarker:
|
|||
rows.append(row)
|
||||
|
||||
|
||||
def stats_dict(data):
|
||||
def stats_dict(data, suffix=''):
|
||||
stats = {
|
||||
'n': len(data),
|
||||
'mean': data.mean(),
|
||||
'std': data.std(),
|
||||
'min': data.min(),
|
||||
'max': data.max(),
|
||||
'nobs' + suffix: len(data),
|
||||
'mean' + suffix: data.mean(),
|
||||
'std' + suffix: data.std(),
|
||||
'min' + suffix: data.min(),
|
||||
'max' + suffix: data.max(),
|
||||
}
|
||||
return stats
|
||||
|
||||
|
@ -182,12 +184,12 @@ def combine_stats(s1, s2):
|
|||
|
||||
Example:
|
||||
>>> basis = {
|
||||
>>> 'n1': [1, 10, 100, 10000],
|
||||
>>> 'n2': [1, 10, 100, 10000],
|
||||
>>> 'nobs1': [1, 10, 100, 10000],
|
||||
>>> 'nobs2': [1, 10, 100, 10000],
|
||||
>>> }
|
||||
>>> for params in ub.named_product(basis):
|
||||
>>> data1 = np.random.rand(params['n1'])
|
||||
>>> data2 = np.random.rand(params['n2'])
|
||||
>>> data1 = np.random.rand(params['nobs1'])
|
||||
>>> data2 = np.random.rand(params['nobs2'])
|
||||
>>> data3 = np.hstack([data1, data2])
|
||||
>>> s1 = stats_dict(data1)
|
||||
>>> s2 = stats_dict(data2)
|
||||
|
@ -203,7 +205,7 @@ def combine_stats(s1, s2):
|
|||
https://math.stackexchange.com/questions/2971315/how-do-i-combine-standard-deviations-of-two-groups
|
||||
"""
|
||||
stats = [s1, s2]
|
||||
sizes = np.array([s['n'] for s in stats])
|
||||
sizes = np.array([s['nobs'] for s in stats])
|
||||
means = np.array([s['mean'] for s in stats])
|
||||
stds = np.array([s['std'] for s in stats])
|
||||
mins = np.array([s['min'] for s in stats])
|
||||
|
@ -221,7 +223,7 @@ def combine_stats(s1, s2):
|
|||
combo_std = np.sqrt(combo_vars)
|
||||
|
||||
combo_stats = {
|
||||
'n': combo_size,
|
||||
'nobs': combo_size,
|
||||
'mean': combo_mean,
|
||||
'std': combo_std,
|
||||
'min': mins.min(),
|
||||
|
|
|
@ -8,6 +8,19 @@ import scipy
|
|||
import scipy.stats # NOQA
|
||||
|
||||
|
||||
# a list of common objectives
|
||||
DEFAULT_METRIC_TO_OBJECTIVE = {
|
||||
'time': 'min',
|
||||
'ap': 'max',
|
||||
'acc': 'max',
|
||||
'f1': 'max',
|
||||
'mcc': 'max',
|
||||
#
|
||||
'loss': 'min',
|
||||
'brier': 'min',
|
||||
}
|
||||
|
||||
|
||||
class Result(ub.NiceRepr):
|
||||
"""
|
||||
Storage of names, parameters, and quality metrics for a single experiment.
|
||||
|
@ -31,6 +44,10 @@ class Result(ub.NiceRepr):
|
|||
>>> self = Result.demo(rng=32)
|
||||
>>> print('self = {}'.format(self))
|
||||
self = <Result(name=53f57161,f1=0.33,acc=0.75,param1=1,param2=6.67,param3=a)>
|
||||
|
||||
Example:
|
||||
>>> self = Result.demo(mode='alt', rng=32)
|
||||
>>> print('self = {}'.format(self))
|
||||
"""
|
||||
def __init__(self, name, params, metrics, meta=None):
|
||||
self.name = name
|
||||
|
@ -48,21 +65,43 @@ class Result(ub.NiceRepr):
|
|||
return text
|
||||
|
||||
@classmethod
|
||||
def demo(cls, rng=None):
|
||||
def demo(cls, mode='null', rng=None):
|
||||
import numpy as np
|
||||
import string
|
||||
import kwarray
|
||||
rng = kwarray.ensure_rng(rng)
|
||||
demo_param_space = {
|
||||
'param1': list(range(3)),
|
||||
'param2': np.linspace(0, 10, 10),
|
||||
'param3': list(string.ascii_lowercase[0:3]),
|
||||
}
|
||||
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
|
||||
metrics = {
|
||||
'f1': rng.rand(),
|
||||
'acc': rng.rand(),
|
||||
}
|
||||
|
||||
if mode == 'null':
|
||||
# The null hypothesis should generally be true here,
|
||||
# there is no relation between the results and parameters
|
||||
demo_param_space = {
|
||||
'param1': list(range(3)),
|
||||
'param2': np.linspace(0, 10, 10),
|
||||
'param3': list(string.ascii_lowercase[0:3]),
|
||||
}
|
||||
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
|
||||
metrics = {
|
||||
'f1': rng.rand(),
|
||||
'acc': rng.rand(),
|
||||
}
|
||||
elif mode == 'alt':
|
||||
# The alternative hypothesis should be true here, there is a
|
||||
# relationship between results two of the params.
|
||||
from scipy.special import expit
|
||||
params = {
|
||||
'w': rng.randint(-1, 1),
|
||||
'x': rng.randint(-3, 3),
|
||||
'y': rng.randint(-2, 2),
|
||||
'z': rng.randint(-3, 3),
|
||||
}
|
||||
noise = np.random.randn() * 1
|
||||
r = 3 * params['x'] + params['y'] ** 2 + 0.3 * params['z'] ** 3
|
||||
acc = expit(r / 20 + noise)
|
||||
metrics = {
|
||||
'acc': acc,
|
||||
}
|
||||
else:
|
||||
raise KeyError(mode)
|
||||
name = ub.hash_data(params)[0:8]
|
||||
self = cls(name, params, metrics)
|
||||
return self
|
||||
|
@ -105,6 +144,10 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
>>> self = ResultAnalysis.demo()
|
||||
>>> self.analysis()
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
|
||||
>>> self.analysis()
|
||||
|
||||
Example:
|
||||
>>> # Given a list of experiments, configs, and results
|
||||
>>> # Create a ResultAnalysis object
|
||||
|
@ -168,7 +211,8 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
|
||||
def __init__(self, results, metrics=None, params=None, ignore_params=None,
|
||||
ignore_metrics=None, metric_objectives=None,
|
||||
abalation_orders={1}, default_objective='max'):
|
||||
abalation_orders={1}, default_objective='max',
|
||||
p_threshold=0.05):
|
||||
self.results = results
|
||||
if ignore_metrics is None:
|
||||
ignore_metrics = set()
|
||||
|
@ -181,23 +225,15 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
self.default_objective = default_objective
|
||||
|
||||
# encode if we want to maximize or minimize a metric
|
||||
default_metric_to_objective = {
|
||||
'ap': 'max',
|
||||
'acc': 'max',
|
||||
'f1': 'max',
|
||||
#
|
||||
'loss': 'min',
|
||||
'brier': 'min',
|
||||
}
|
||||
if metric_objectives is None:
|
||||
metric_objectives = {}
|
||||
|
||||
self.metric_objectives = default_metric_to_objective.copy()
|
||||
self.metric_objectives = DEFAULT_METRIC_TO_OBJECTIVE.copy()
|
||||
self.metric_objectives.update(metric_objectives)
|
||||
|
||||
self.params = params
|
||||
self.metrics = metrics
|
||||
self.statistics = None
|
||||
self.p_threshold = p_threshold
|
||||
|
||||
self._description = {}
|
||||
self._description['built'] = False
|
||||
|
@ -210,11 +246,14 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
return ub.repr2(self._description, si=1, sv=1)
|
||||
|
||||
@classmethod
|
||||
def demo(cls, num=10, rng=None):
|
||||
def demo(cls, num=10, mode='null', rng=None):
|
||||
import kwarray
|
||||
rng = kwarray.ensure_rng(rng)
|
||||
results = [Result.demo(rng=rng) for _ in range(num)]
|
||||
self = cls(results, metrics={'f1', 'acc'})
|
||||
results = [Result.demo(mode=mode, rng=rng) for _ in range(num)]
|
||||
if mode == 'null':
|
||||
self = cls(results, metrics={'f1', 'acc'})
|
||||
else:
|
||||
self = cls(results, metrics={'acc'})
|
||||
return self
|
||||
|
||||
def run(self):
|
||||
|
@ -251,18 +290,30 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
varied = {k: vs for k, vs in varied.items() if len(vs)}
|
||||
return varied
|
||||
|
||||
def abalation_groups(self, param):
|
||||
def abalation_groups(self, param_group, k=2):
|
||||
"""
|
||||
Return groups where the specified parameter(s) are varied, but all
|
||||
other non-ignored parameters are held the same.
|
||||
|
||||
Args:
|
||||
param_group (str | List[str]):
|
||||
One or more parameters that are allowed to vary
|
||||
|
||||
k (int):
|
||||
minimum number of items a group must contain to be returned
|
||||
|
||||
Returns:
|
||||
List[DataFrame]:
|
||||
a list of subsets of in the table where all but the specified
|
||||
(non-ignored) parameters are allowed to vary.
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo()
|
||||
>>> param = 'param2'
|
||||
>>> self.abalation_groups(param)
|
||||
"""
|
||||
if not ub.iterable(param):
|
||||
param = [param]
|
||||
if not ub.iterable(param_group):
|
||||
param_group = [param_group]
|
||||
table = self.table
|
||||
config_rows = [r.params for r in self.results]
|
||||
config_keys = list(map(set, config_rows))
|
||||
|
@ -271,14 +322,14 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
if self.ignore_params:
|
||||
config_keys = [c - self.ignore_params for c in config_keys]
|
||||
isect_params = set.intersection(*config_keys)
|
||||
other_params = sorted(isect_params - set(param))
|
||||
other_params = sorted(isect_params - set(param_group))
|
||||
groups = []
|
||||
for key, group in table.groupby(other_params, dropna=False):
|
||||
if len(group) > 1:
|
||||
if len(group) >= k:
|
||||
groups.append(group)
|
||||
return groups
|
||||
|
||||
def abalate(self, param):
|
||||
def abalate(self, param_group):
|
||||
"""
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(100)
|
||||
|
@ -287,34 +338,34 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
>>> self.abalate(param)
|
||||
|
||||
>>> self = ResultAnalysis.demo()
|
||||
>>> param = ['param2', 'param3']
|
||||
>>> self.abalate(param)
|
||||
>>> param_group = ['param2', 'param3']
|
||||
>>> # xdoctest: +REQUIRES(module:openskill)
|
||||
>>> self.abalate(param_group)
|
||||
"""
|
||||
import itertools as it
|
||||
if self.table is None:
|
||||
self.table = self.build_table()
|
||||
if not ub.iterable(param):
|
||||
param = [param]
|
||||
if not ub.iterable(param_group):
|
||||
param_group = [param_group]
|
||||
|
||||
# For hashable generic dictionary
|
||||
from collections import namedtuple
|
||||
gd = namedtuple('config', param)
|
||||
gd = namedtuple('config', param_group)
|
||||
|
||||
# from types import SimpleNamespace
|
||||
param_unique_vals_ = self.table[param].drop_duplicates().to_dict('records')
|
||||
param_unique_vals_ = self.table[param_group].drop_duplicates().to_dict('records')
|
||||
param_unique_vals = [gd(**d) for d in param_unique_vals_]
|
||||
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param}
|
||||
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param_group}
|
||||
score_improvements = ub.ddict(list)
|
||||
scored_obs = []
|
||||
skillboard = SkillTracker(param_unique_vals)
|
||||
groups = self.abalation_groups(param)
|
||||
groups = self.abalation_groups(param_group, k=2)
|
||||
|
||||
for group in groups:
|
||||
for metric_key in self.metrics:
|
||||
ascending = self._objective_is_ascending(metric_key)
|
||||
|
||||
group = group.sort_values(metric_key, ascending=ascending)
|
||||
subgroups = group.groupby(param)
|
||||
subgroups = group.groupby(param_group)
|
||||
if ascending:
|
||||
best_idx = subgroups[metric_key].idxmax()
|
||||
else:
|
||||
|
@ -326,19 +377,19 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
if x1 != x2:
|
||||
r1 = best_group.loc[x1]
|
||||
r2 = best_group.loc[x2]
|
||||
k1 = gd(**r1[param])
|
||||
k2 = gd(**r2[param])
|
||||
k1 = gd(**r1[param_group])
|
||||
k2 = gd(**r2[param_group])
|
||||
diff = r1[metric_key] - r2[metric_key]
|
||||
score_improvements[(k1, k2, metric_key)].append(diff)
|
||||
|
||||
# metric_vals = best_group[metric_key].values
|
||||
# diffs = metric_vals[None, :] - metric_vals[:, None]
|
||||
best_group.set_index(param)
|
||||
# best_group[param]
|
||||
best_group.set_index(param_group)
|
||||
# best_group[param_group]
|
||||
# best_group[metric_key].diff()
|
||||
scored_ranking = best_group[param + [metric_key]].reset_index(drop=True)
|
||||
scored_ranking = best_group[param_group + [metric_key]].reset_index(drop=True)
|
||||
scored_obs.append(scored_ranking)
|
||||
ranking = [gd(**d) for d in scored_ranking[param].to_dict('records')]
|
||||
ranking = [gd(**d) for d in scored_ranking[param_group].to_dict('records')]
|
||||
skillboard.observe(ranking)
|
||||
|
||||
print('skillboard.ratings = {}'.format(ub.repr2(skillboard.ratings, nl=1, align=':')))
|
||||
|
@ -377,15 +428,12 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
# TODO : document these stats clearly and accurately
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=30)
|
||||
>>> self = ResultAnalysis.demo(num=100)
|
||||
>>> print(self.table)
|
||||
>>> param_group = ['param2']
|
||||
>>> param_group = ['param2', 'param1']
|
||||
>>> metric_key = 'f1'
|
||||
>>> stats_row = self.test_group(param_group, metric_key)
|
||||
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, precision=2)))
|
||||
>>> # ---
|
||||
>>> self.build()
|
||||
>>> self.report()
|
||||
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, sort=0, precision=2)))
|
||||
"""
|
||||
param_group_name = ','.join(param_group)
|
||||
stats_row = {
|
||||
|
@ -461,10 +509,6 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
pairwise_statistics = []
|
||||
for pair in value_pairs:
|
||||
pair_statistics = {}
|
||||
# try:
|
||||
# param_val1, param_val2 = sorted(pair)
|
||||
# except Exception:
|
||||
# param_val1, param_val2 = (pair)
|
||||
param_val1, param_val2 = pair
|
||||
|
||||
metric_vals1 = value_to_metric[param_val1]
|
||||
|
@ -477,16 +521,17 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
pair_statistics['value2'] = param_val2
|
||||
pair_statistics['n1'] = len(metric_vals1)
|
||||
pair_statistics['n2'] = len(metric_vals2)
|
||||
# TODO: probably want to use an alternative=less or greater here
|
||||
# instead of simply unequal
|
||||
alternative = 'two-sided'
|
||||
if 1:
|
||||
|
||||
TEST_ONLY_FOR_DIFFERENCE = True
|
||||
if TEST_ONLY_FOR_DIFFERENCE:
|
||||
if ascending:
|
||||
# We want to minimize the metric
|
||||
alternative = 'less' if rank1 < rank2 else 'greater'
|
||||
else:
|
||||
# We want to maximize the metric
|
||||
alternative = 'greater' if rank1 < rank2 else 'less'
|
||||
else:
|
||||
alternative = 'two-sided'
|
||||
|
||||
ind_kw = dict(
|
||||
equal_var=False,
|
||||
|
@ -499,8 +544,8 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
stats1 = stats_dict(metric_vals1)
|
||||
stats2 = stats_dict(metric_vals2)
|
||||
scipy.stats.ttest_ind_from_stats(
|
||||
stats1['mean'], stats1['std'], stats1['n'],
|
||||
stats2['mean'], stats2['std'], stats2['n'],
|
||||
stats1['mean'], stats1['std'], stats1['nobs'],
|
||||
stats2['mean'], stats2['std'], stats2['nobs'],
|
||||
**ind_kw
|
||||
)
|
||||
# metric_vals1, metric_vals2, equal_var=False)
|
||||
|
@ -523,6 +568,8 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
for nk in common:
|
||||
group1 = nk_to_group1[nk]
|
||||
group2 = nk_to_group2[nk]
|
||||
# TODO: Not sure if taking the product of everything within
|
||||
# the comparable group is correct or not. I think it is ok.
|
||||
for i, j in it.product(group1.index, group2.index):
|
||||
comparable_indexes1.append(i)
|
||||
comparable_indexes2.append(j)
|
||||
|
@ -590,7 +637,6 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
self._description['built'] = True
|
||||
|
||||
def report(self):
|
||||
p_threshold = 0.05
|
||||
stat_groups = ub.group_items(self.statistics, key=lambda x: x['param_name'])
|
||||
stat_groups_items = list(stat_groups.items())
|
||||
|
||||
|
@ -600,43 +646,47 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
'metrics': self.metrics_of_interest,
|
||||
})
|
||||
for grid_item in grid:
|
||||
metric_key = grid_item['metrics']
|
||||
stat_groups_item = grid_item['stat_group_item']
|
||||
|
||||
param_name, stat_group = stat_groups_item
|
||||
stats_row = ub.group_items(stat_group, key=lambda x: x['metric'])[metric_key][0]
|
||||
title = ('PARAMETER: {} - METRIC: {}'.format(param_name, metric_key))
|
||||
print('\n\n')
|
||||
print(title)
|
||||
print('=' * len(title))
|
||||
print(stats_row['moments'])
|
||||
anova_rank_p = stats_row['anova_rank_p']
|
||||
anova_mean_p = stats_row['anova_mean_p']
|
||||
# Rougly speaking
|
||||
print('')
|
||||
print(f'ANOVA: If p is low, the param {param_name!r} might have an effect')
|
||||
print(ub.color_text(f' Rank-ANOVA: p={anova_rank_p:0.8f}', 'green' if anova_rank_p < p_threshold else None))
|
||||
print(ub.color_text(f' Mean-ANOVA: p={anova_mean_p:0.8f}', 'green' if anova_mean_p < p_threshold else None))
|
||||
print('')
|
||||
print('Pairwise T-Tests')
|
||||
for pairstat in stats_row['pairwise']:
|
||||
# Is this backwards?
|
||||
value1 = pairstat['value1']
|
||||
value2 = pairstat['value2']
|
||||
winner = pairstat['winner']
|
||||
if value2 == winner:
|
||||
value1, value2 = value2, value1
|
||||
print(f' If p is low, {param_name}={value1} may outperform {param_name}={value2}.')
|
||||
if 'ttest_ind' in pairstat:
|
||||
ttest_ind_result = pairstat['ttest_ind']
|
||||
print(ub.color_text(f' ttest_ind: p={ttest_ind_result.pvalue:0.8f}', 'green' if ttest_ind_result.pvalue < p_threshold else None))
|
||||
if 'ttest_rel' in pairstat:
|
||||
n_common = pairstat['n_common']
|
||||
ttest_rel_result = pairstat['ttest_ind']
|
||||
print(ub.color_text(f' ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n={n_common}', 'green' if ttest_rel_result.pvalue < p_threshold else None))
|
||||
self._report_one(grid_item)
|
||||
|
||||
print(self.stats_table)
|
||||
|
||||
def _report_one(self, grid_item):
|
||||
p_threshold = self.p_threshold
|
||||
metric_key = grid_item['metrics']
|
||||
stat_groups_item = grid_item['stat_group_item']
|
||||
|
||||
param_name, stat_group = stat_groups_item
|
||||
stats_row = ub.group_items(stat_group, key=lambda x: x['metric'])[metric_key][0]
|
||||
title = ('PARAMETER: {} - METRIC: {}'.format(param_name, metric_key))
|
||||
print('\n\n')
|
||||
print(title)
|
||||
print('=' * len(title))
|
||||
print(stats_row['moments'])
|
||||
anova_rank_p = stats_row['anova_rank_p']
|
||||
anova_mean_p = stats_row['anova_mean_p']
|
||||
# Rougly speaking
|
||||
print('')
|
||||
print(f'ANOVA: If p is low, the param {param_name!r} might have an effect')
|
||||
print(ub.color_text(f' Rank-ANOVA: p={anova_rank_p:0.8f}', 'green' if anova_rank_p < p_threshold else None))
|
||||
print(ub.color_text(f' Mean-ANOVA: p={anova_mean_p:0.8f}', 'green' if anova_mean_p < p_threshold else None))
|
||||
print('')
|
||||
print('Pairwise T-Tests')
|
||||
for pairstat in stats_row['pairwise']:
|
||||
# Is this backwards?
|
||||
value1 = pairstat['value1']
|
||||
value2 = pairstat['value2']
|
||||
winner = pairstat['winner']
|
||||
if value2 == winner:
|
||||
value1, value2 = value2, value1
|
||||
print(f' If p is low, {param_name}={value1} may outperform {param_name}={value2}.')
|
||||
if 'ttest_ind' in pairstat:
|
||||
ttest_ind_result = pairstat['ttest_ind']
|
||||
print(ub.color_text(f' ttest_ind: p={ttest_ind_result.pvalue:0.8f}', 'green' if ttest_ind_result.pvalue < p_threshold else None))
|
||||
if 'ttest_rel' in pairstat:
|
||||
n_common = pairstat['n_common']
|
||||
ttest_rel_result = pairstat['ttest_ind']
|
||||
print(ub.color_text(f' ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n_pairs={n_common}', 'green' if ttest_rel_result.pvalue < p_threshold else None))
|
||||
|
||||
def conclusions(self):
|
||||
conclusions = []
|
||||
for stat in self.statistics:
|
||||
|
@ -653,6 +703,50 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
conclusions.append(txt)
|
||||
return conclusions
|
||||
|
||||
def plot(self, xlabel, metric_key, group_labels):
|
||||
"""
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
|
||||
>>> self.analysis()
|
||||
>>> print('self = {}'.format(self))
|
||||
>>> # xdoctest: +REQUIRES(module:kwplot)
|
||||
>>> import kwplot
|
||||
>>> kwplot.autompl()
|
||||
>>> xlabel = 'x'
|
||||
>>> metric_key = 'acc'
|
||||
>>> group_labels = {
|
||||
>>> 'col': ['y', 'w'],
|
||||
>>> 'hue': ['z'],
|
||||
>>> 'size': [],
|
||||
>>> }
|
||||
>>> self.plot(xlabel, metric_key, group_labels)
|
||||
"""
|
||||
import seaborn as sns
|
||||
sns.set()
|
||||
from matplotlib import pyplot as plt # NOQA
|
||||
data = self.table
|
||||
data = data.sort_values(metric_key)
|
||||
for gname, labels in group_labels.items():
|
||||
if len(labels):
|
||||
new_col = []
|
||||
for row in data[labels].to_dict('records'):
|
||||
item = ub.repr2(row, compact=1, si=1)
|
||||
new_col.append(item)
|
||||
gkey = gname + "_key"
|
||||
data[gkey] = new_col
|
||||
|
||||
plotkw = {}
|
||||
for gname, labels in group_labels.items():
|
||||
if labels:
|
||||
plotkw[gname] = gname + "_key"
|
||||
|
||||
# Your variables may change
|
||||
# ax = plt.figure().gca()
|
||||
col = plotkw.pop("col")
|
||||
facet = sns.FacetGrid(data, col=col, sharex=False, sharey=False)
|
||||
facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, marker="o", **plotkw)
|
||||
facet.add_legend()
|
||||
|
||||
|
||||
class SkillTracker:
|
||||
"""
|
||||
|
@ -677,6 +771,9 @@ class SkillTracker:
|
|||
4: 0.20,
|
||||
5: 0.20,
|
||||
}
|
||||
|
||||
Requirements:
|
||||
openskill
|
||||
"""
|
||||
|
||||
def __init__(self, player_ids):
|
||||
|
|
Loading…
Reference in New Issue