featurebench

v1.0

FeatureBench full split: 200 feature-implementation tasks across 24 Python repos. 7 tasks require Ampere+ GPU. Original benchmark: https://github.com/LiberCoders/FeatureBench. Adapter: https://github.com/harbor-framework/harbor/pull/875.

uvx harbor run -d featurebench@1.0

Tasks (200)

mlflow__mlflow.93dab383.test_jsonpath_utils.eb851c42.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_jsonpath_utils.eb851c42.lv1
HEAD
mlflow__mlflow.93dab383.test_jsonpath_utils.eb851c42.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_jsonpath_utils.eb851c42.lv2
HEAD
mlflow__mlflow.93dab383.test_judge_tool_search_traces.cad8fc4b.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_judge_tool_search_traces.cad8fc4b.lv1
HEAD
mlflow__mlflow.93dab383.test_log_image.439a816d.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_log_image.439a816d.lv1
HEAD
mlflow__mlflow.93dab383.test_mlflow.d1847ff9.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_mlflow.d1847ff9.lv1
HEAD
mlflow__mlflow.93dab383.test_numpy_dataset.1beaad57.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_numpy_dataset.1beaad57.lv1
HEAD
mlflow__mlflow.93dab383.test_numpy_dataset.1beaad57.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_numpy_dataset.1beaad57.lv2
HEAD
mlflow__mlflow.93dab383.test_presigned_url_artifact_repo.8d57288e.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_presigned_url_artifact_repo.8d57288e.lv1
HEAD
mlflow__mlflow.93dab383.test_responses_agent.072c4133.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_responses_agent.072c4133.lv1
HEAD
mlflow__mlflow.93dab383.test_rest_store_webhooks.6d85971c.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_rest_store_webhooks.6d85971c.lv1
HEAD
mlflow__mlflow.93dab383.test_scorer_description.9163195a.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_scorer_description.9163195a.lv1
HEAD
mlflow__mlflow.93dab383.test_scorers.63dc8c7a.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_scorers.63dc8c7a.lv1
HEAD
mlflow__mlflow.93dab383.test_security.98fcde1d.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_security.98fcde1d.lv1
HEAD
mlflow__mlflow.93dab383.test_serialization.2c029be6.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_serialization.2c029be6.lv1
HEAD
mlflow__mlflow.93dab383.test_span.69efd376.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_span.69efd376.lv1
HEAD
mlflow__mlflow.93dab383.test_trace_correlation.53c200a6.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_trace_correlation.53c200a6.lv1
HEAD
mlflow__mlflow.93dab383.test_trace_correlation.53c200a6.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_trace_correlation.53c200a6.lv2
HEAD
mlflow__mlflow.93dab383.test_trace_manager.bb95fbcd.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_trace_manager.bb95fbcd.lv1
HEAD
mlflow__mlflow.93dab383.test_trace_manager.bb95fbcd.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_trace_manager.bb95fbcd.lv2
HEAD
mlflow__mlflow.93dab383.test_trace.17fde8b0.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_trace.17fde8b0.lv1
HEAD
mlflow__mlflow.93dab383.test_tracing.0dc1e3a3.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_tracing.0dc1e3a3.lv1
HEAD
mlflow__mlflow.93dab383.test_tracing.0dc1e3a3.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_tracing.0dc1e3a3.lv2
HEAD
mlflow__mlflow.93dab383.test_type_hints.12d7e575.lv2
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_type_hints.12d7e575.lv2
HEAD
mlflow__mlflow.93dab383.test_unity_catalog_models_artifact_repo.ad6e49d9.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_unity_catalog_models_artifact_repo.ad6e49d9.lv1
HEAD
mlflow__mlflow.93dab383.test_unity_catalog_oss_rest_store.92593ea2.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_unity_catalog_oss_rest_store.92593ea2.lv1
HEAD
mlflow__mlflow.93dab383.test_unity_catalog_rest_store.f47a7d9f.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_unity_catalog_rest_store.f47a7d9f.lv1
HEAD
mlflow__mlflow.93dab383.test_utils.3c9647bf.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_utils.3c9647bf.lv1
HEAD
mlflow__mlflow.93dab383.test_validation.2e1c5076.lv1
uvx harbor run -d featurebench@1.0 -t mlflow__mlflow.93dab383.test_validation.2e1c5076.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_algorithms.1f0181c2.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_algorithms.1f0181c2.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_algorithms.1f0181c2.lv2
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_algorithms.1f0181c2.lv2
HEAD
mwaskom__seaborn.7001ebe7.test_bar.123ed709.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_bar.123ed709.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_distributions.f700676d.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_distributions.f700676d.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_line.25304a8b.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_line.25304a8b.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_plot.b645d353.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_plot.b645d353.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_plot.b645d353.lv2
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_plot.b645d353.lv2
HEAD
mwaskom__seaborn.7001ebe7.test_regression.ce8c62e2.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_regression.ce8c62e2.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_regression.ce8c62e2.lv2
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_regression.ce8c62e2.lv2
HEAD
mwaskom__seaborn.7001ebe7.test_relational.f23eb542.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_relational.f23eb542.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_scales.d9dcdef6.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_scales.d9dcdef6.lv1
HEAD
mwaskom__seaborn.7001ebe7.test_statistics.0f2ae277.lv1
uvx harbor run -d featurebench@1.0 -t mwaskom__seaborn.7001ebe7.test_statistics.0f2ae277.lv1
HEAD
netflix__metaflow.b390a8d4.test_stub_generator.7bf08c98.lv1
uvx harbor run -d featurebench@1.0 -t netflix__metaflow.b390a8d4.test_stub_generator.7bf08c98.lv1
HEAD
optuna__optuna.e7c6f1dd.test_heartbeat.5ad4d08f.lv1
uvx harbor run -d featurebench@1.0 -t optuna__optuna.e7c6f1dd.test_heartbeat.5ad4d08f.lv1
HEAD
pandas-dev__pandas.82fa2715.test_all_methods.c74b49a1.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_all_methods.c74b49a1.lv1
HEAD
pandas-dev__pandas.82fa2715.test_col.a592871d.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_col.a592871d.lv1
HEAD
pandas-dev__pandas.82fa2715.test_col.a592871d.lv2
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_col.a592871d.lv2
HEAD
pandas-dev__pandas.82fa2715.test_concat.ebe5de39.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_concat.ebe5de39.lv1
HEAD
pandas-dev__pandas.82fa2715.test_describe.3b919815.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_describe.3b919815.lv1
HEAD
pandas-dev__pandas.82fa2715.test_formats.05080a4e.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_formats.05080a4e.lv1
HEAD
pandas-dev__pandas.82fa2715.test_groupby_shift_diff.e13d5358.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_groupby_shift_diff.e13d5358.lv1
HEAD
pandas-dev__pandas.82fa2715.test_http_headers.aafb551e.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_http_headers.aafb551e.lv1
HEAD
pandas-dev__pandas.82fa2715.test_iceberg.85771c70.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_iceberg.85771c70.lv1
HEAD
pandas-dev__pandas.82fa2715.test_iceberg.85771c70.lv2
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_iceberg.85771c70.lv2
HEAD
pandas-dev__pandas.82fa2715.test_info.d8a64ebf.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_info.d8a64ebf.lv1
HEAD
pandas-dev__pandas.82fa2715.test_list_accessor.7ab0b2ea.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_list_accessor.7ab0b2ea.lv1
HEAD
pandas-dev__pandas.82fa2715.test_merge_antijoin.921feefe.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_merge_antijoin.921feefe.lv1
HEAD
pandas-dev__pandas.82fa2715.test_nlargest_nsmallest.08e879a9.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_nlargest_nsmallest.08e879a9.lv1
HEAD
pandas-dev__pandas.82fa2715.test_quantile.f4b69e49.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_quantile.f4b69e49.lv1
HEAD
pandas-dev__pandas.82fa2715.test_reduction.3ac3b298.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_reduction.3ac3b298.lv1
HEAD
pandas-dev__pandas.82fa2715.test_sample.5d2ba03e.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_sample.5d2ba03e.lv1
HEAD
pandas-dev__pandas.82fa2715.test_spec_conformance.3aff206b.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_spec_conformance.3aff206b.lv1
HEAD
pandas-dev__pandas.82fa2715.test_string.dcfa24ea.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_string.dcfa24ea.lv1
HEAD
pandas-dev__pandas.82fa2715.test_struct_accessor.3b465152.lv1
uvx harbor run -d featurebench@1.0 -t pandas-dev__pandas.82fa2715.test_struct_accessor.3b465152.lv1
HEAD
pydantic__pydantic.e1dcaf9e.test_deprecated_fields.40a2ec54.lv1
uvx harbor run -d featurebench@1.0 -t pydantic__pydantic.e1dcaf9e.test_deprecated_fields.40a2ec54.lv1
HEAD
pydantic__pydantic.e1dcaf9e.test_experimental_arguments_schema.00dc2dd4.lv1
uvx harbor run -d featurebench@1.0 -t pydantic__pydantic.e1dcaf9e.test_experimental_arguments_schema.00dc2dd4.lv1
HEAD
pydantic__pydantic.e1dcaf9e.test_pipeline.c9b08962.lv1
uvx harbor run -d featurebench@1.0 -t pydantic__pydantic.e1dcaf9e.test_pipeline.c9b08962.lv1
HEAD
pydantic__pydantic.e1dcaf9e.test_titles.e806bda8.lv1
uvx harbor run -d featurebench@1.0 -t pydantic__pydantic.e1dcaf9e.test_titles.e806bda8.lv1
HEAD
pydantic__pydantic.e1dcaf9e.test_types_self.3ae7e16d.lv1
uvx harbor run -d featurebench@1.0 -t pydantic__pydantic.e1dcaf9e.test_types_self.3ae7e16d.lv1
HEAD
pydata__xarray.97f3a746.test_backends_chunks.fa55f68a.lv1
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_backends_chunks.fa55f68a.lv1
HEAD
pydata__xarray.97f3a746.test_coordinate_transform.6cacb660.lv1
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_coordinate_transform.6cacb660.lv1
HEAD
pydata__xarray.97f3a746.test_coordinate_transform.6cacb660.lv2
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_coordinate_transform.6cacb660.lv2
HEAD
pydata__xarray.97f3a746.test_range_index.7b95ba66.lv1
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_range_index.7b95ba66.lv1
HEAD
pydata__xarray.97f3a746.test_treenode.aa8ba777.lv1
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_treenode.aa8ba777.lv1
HEAD
pydata__xarray.97f3a746.test_treenode.aa8ba777.lv2
uvx harbor run -d featurebench@1.0 -t pydata__xarray.97f3a746.test_treenode.aa8ba777.lv2
HEAD
pypa__hatch.ff4b4040.test_config.38d2ef18.lv1
uvx harbor run -d featurebench@1.0 -t pypa__hatch.ff4b4040.test_config.38d2ef18.lv1
HEAD
pypa__hatch.ff4b4040.test_fmt.782c88a8.lv1
uvx harbor run -d featurebench@1.0 -t pypa__hatch.ff4b4040.test_fmt.782c88a8.lv1
HEAD
pypa__packaging.013f3b03.test_metadata.e00b5801.lv1
uvx harbor run -d featurebench@1.0 -t pypa__packaging.013f3b03.test_metadata.e00b5801.lv1
HEAD
pypa__setuptools.d198e86f.test_bdist_wheel.51482fc6.lv1
uvx harbor run -d featurebench@1.0 -t pypa__setuptools.d198e86f.test_bdist_wheel.51482fc6.lv1
HEAD
pytest-dev__pytest.68016f0e.raises_group.c28bf36a.lv1
uvx harbor run -d featurebench@1.0 -t pytest-dev__pytest.68016f0e.raises_group.c28bf36a.lv1
HEAD
pytest-dev__pytest.68016f0e.test_local.40fb2f1f.lv1
uvx harbor run -d featurebench@1.0 -t pytest-dev__pytest.68016f0e.test_local.40fb2f1f.lv1
HEAD
python__mypy.8e2ce962.testconstraints.db380fe7.lv1
uvx harbor run -d featurebench@1.0 -t python__mypy.8e2ce962.testconstraints.db380fe7.lv1
HEAD
python__mypy.8e2ce962.testconstraints.db380fe7.lv2
uvx harbor run -d featurebench@1.0 -t python__mypy.8e2ce962.testconstraints.db380fe7.lv2
HEAD
scikit-learn__scikit-learn.5741bac9.test_arff_parser.ecde431a.lv1
uvx harbor run -d featurebench@1.0 -t scikit-learn__scikit-learn.5741bac9.test_arff_parser.ecde431a.lv1
HEAD
scikit-learn__scikit-learn.5741bac9.test_predict_error_display.11cc0c3a.lv1
uvx harbor run -d featurebench@1.0 -t scikit-learn__scikit-learn.5741bac9.test_predict_error_display.11cc0c3a.lv1
HEAD
scikit-learn__scikit-learn.5741bac9.test_public_functions.28421aef.lv1
uvx harbor run -d featurebench@1.0 -t scikit-learn__scikit-learn.5741bac9.test_public_functions.28421aef.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_build_gettext.2721e644.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_build_gettext.2721e644.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_build_html.d253ea54.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_build_html.d253ea54.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_build_linkcheck.20428c8f.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_build_linkcheck.20428c8f.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_command_line.039d53a2.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_command_line.039d53a2.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_domain_c.4068b9e8.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_domain_c.4068b9e8.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_ext_doctest.530dec5f.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_ext_doctest.530dec5f.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_ext_intersphinx.04901873.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_ext_intersphinx.04901873.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_ext_math.96576214.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_ext_math.96576214.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_ext_napoleon_docstring.50dabe1f.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_ext_napoleon_docstring.50dabe1f.lv1
HEAD
sphinx-doc__sphinx.e347e59c.test_util_nodes.08cdc62d.lv1
uvx harbor run -d featurebench@1.0 -t sphinx-doc__sphinx.e347e59c.test_util_nodes.08cdc62d.lv1
HEAD
sympy__sympy.c1097516.test_inverse.c240ffe7.lv1
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_inverse.c240ffe7.lv1
HEAD
sympy__sympy.c1097516.test_nullspace.f14fc970.lv1
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_nullspace.f14fc970.lv1
HEAD
sympy__sympy.c1097516.test_puiseux.cd575f09.lv1
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_puiseux.cd575f09.lv1
HEAD
sympy__sympy.c1097516.test_puiseux.cd575f09.lv2
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_puiseux.cd575f09.lv2
HEAD
sympy__sympy.c1097516.test_runtests_pytest.71d43c5f.lv1
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_runtests_pytest.71d43c5f.lv1
HEAD
sympy__sympy.c1097516.test_smtlib.69d52b90.lv1
uvx harbor run -d featurebench@1.0 -t sympy__sympy.c1097516.test_smtlib.69d52b90.lv1
HEAD