Visualisation of results does not work as described in README
When I try to visualise my results, following the instructions in the readme, the code crashes with en error message and only one plot is created. It seems the conversion of the date-time-string does not work as intended. [branch origin/inclusion-own-benchmark.]
uv run src/utils/visualize_results.py imdb
Plot saved to plots/imdb_accuracy_vs_energy.png
Traceback (most recent call last):
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 1942, in _agg_py_fallback
res_values = self._grouper.agg_series(ser, alt, preserve_dtype=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/ops.py", line 864, in agg_series
result = self._aggregate_series_pure_python(obj, func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/ops.py", line 885, in _aggregate_series_pure_python
res = func(group)
^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 2454, in <lambda>
alt=lambda x: Series(x, copy=False).mean(numeric_only=numeric_only),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/series.py", line 6549, in mean
return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/generic.py", line 12420, in mean
return self._stat_function(
^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/generic.py", line 12377, in _stat_function
return self._reduce(
^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/series.py", line 6457, in _reduce
return op(delegate, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/nanops.py", line 147, in f
result = alt(values, axis=axis, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/nanops.py", line 404, in new_func
result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/nanops.py", line 720, in nanmean
the_sum = _ensure_numeric(the_sum)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/nanops.py", line 1701, in _ensure_numeric
raise TypeError(f"Could not convert string '{x}' to numeric")
TypeError: Could not convert string '2025-07-15T11:21:402025-07-16T14:47:32' to numeric
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/hartwigt/Documents/llm-testframework/src/utils/visualize_results.py", line 317, in <module>
exit(main())
^^^^^^
File "/home/hartwigt/Documents/llm-testframework/src/utils/visualize_results.py", line 292, in main
plot_energy_breakdown(
File "/home/hartwigt/Documents/llm-testframework/src/utils/visualize_results.py", line 140, in plot_energy_breakdown
grouped = results.groupby("model").mean()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 2452, in mean
result = self._cython_agg_general(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 1998, in _cython_agg_general
new_mgr = data.grouped_reduce(array_func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/internals/managers.py", line 1469, in grouped_reduce
applied = sb.apply(func)
^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/internals/blocks.py", line 393, in apply
result = func(self.values, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 1995, in array_func
result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/hartwigt/Documents/llm-testframework/.venv/lib/python3.12/site-packages/pandas/core/groupby/groupby.py", line 1946, in _agg_py_fallback
raise type(err)(msg) from err
TypeError: agg function failed [how->mean,dtype->object]