diff --git a/tests/benchmark3.py b/tests/benchmark3.py index b6e084e..2563d4d 100644 --- a/tests/benchmark3.py +++ b/tests/benchmark3.py @@ -217,13 +217,28 @@ def benchmark_json_dumps(): param_group = ['impl', 'impl_version'] analysis.abalate(param_group) - # benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL) + xlabel = "size" + # Set these to empty lists if they are not used + group_labels = { + "col": ["input"], + "hue": ["impl"], + "size": [], + } + import kwplot + kwplot.autompl() + facet = analysis.plot(xlabel, metric_key, group_labels) + for ax in facet.axes.ravel(): + ax.set_xscale('log') + ax.set_yscale('log') + print('facet = {!r}'.format(facet)) + kwplot.show_if_requested() + if __name__ == "__main__": """ CommandLine: - python ~/code/ultrajson/tests/benchmark3.py + python ~/code/ultrajson/tests/benchmark3.py --show """ benchmark_json_dumps() diff --git a/tests/benchmarker/result_analysis.py b/tests/benchmarker/result_analysis.py index fd56edb..d6f474e 100644 --- a/tests/benchmarker/result_analysis.py +++ b/tests/benchmarker/result_analysis.py @@ -240,9 +240,6 @@ class ResultAnalysis(ub.NiceRepr): self._description['num_results'] = len(self.results) def __nice__(self): - # if len(self._description) == 0: - # return 'unbuilt' - # else: return ub.repr2(self._description, si=1, sv=1) @classmethod @@ -405,8 +402,13 @@ class ResultAnalysis(ub.NiceRepr): def _objective_is_ascending(self, metric_key): """ - Return True if we should minimize the objective (lower is better) - Return False if we should maximize the objective (higher is better) + Args: + metric_key (str): the metric in question + + Returns: + bool: + True if we should minimize the objective (lower is better) + False if we should maximize the objective (higher is better) """ objective = self.metric_objectives.get(metric_key, None) if objective is None: @@ -578,6 +580,8 @@ class ResultAnalysis(ub.NiceRepr): comparable_groups2 = metric_group2.loc[comparable_indexes2, metric_key] # Does this need to have the values aligned? + # I think that is the case giving my understanding of paired + # t-tests, but the docs need a PR to make that more clear. ttest_rel_result = scipy.stats.ttest_rel(comparable_groups1, comparable_groups2) pair_statistics['n_common'] = len(common) pair_statistics['ttest_rel'] = ttest_rel_result @@ -746,6 +750,7 @@ class ResultAnalysis(ub.NiceRepr): facet = sns.FacetGrid(data, col=col, sharex=False, sharey=False) facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, marker="o", **plotkw) facet.add_legend() + return facet class SkillTracker: