researchcodebench

v1.0

ResearchCodeBench evaluates AI agents' ability to implement algorithms from academic papers. Contains 212 code implementation tasks across 20 ML/AI research problems from top-tier venues (ICLR, NeurIPS, CVPR, COLM). Tests paper comprehension, algorithm understanding, and precise code implementation skills with 1,449 lines of reference code.

uvx harbor run -d researchcodebench@1.0

Tasks (212)

advantage-alignment_aa_terms1
uvx harbor run -d researchcodebench@1.0 -t advantage-alignment_aa_terms1
69581ca
advantage-alignment_aa_terms2
uvx harbor run -d researchcodebench@1.0 -t advantage-alignment_aa_terms2
69581ca
advantage-alignment_aa_terms3
uvx harbor run -d researchcodebench@1.0 -t advantage-alignment_aa_terms3
69581ca
advantage-alignment_integrated_aa
uvx harbor run -d researchcodebench@1.0 -t advantage-alignment_integrated_aa
69581ca
advantage-alignment_proximal_surrogate
uvx harbor run -d researchcodebench@1.0 -t advantage-alignment_proximal_surrogate
69581ca
diff-transformer_empirical_lambda_init_function
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_empirical_lambda_init_function
69581ca
diff-transformer_groupnorm_with_scaling
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_groupnorm_with_scaling
69581ca
diff-transformer_lambda_setup
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_lambda_setup
69581ca
diff-transformer_multihead_differential_attention
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_multihead_differential_attention
69581ca
diff-transformer_normalization_setup
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_normalization_setup
69581ca
diff-transformer_re-parameterize_lambda
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_re-parameterize_lambda
69581ca
diff-transformer_taking_difference
uvx harbor run -d researchcodebench@1.0 -t diff-transformer_taking_difference
69581ca
diffusiondpo_calculate_model_losses
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_calculate_model_losses
69581ca
diffusiondpo_calculate_reference_losses
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_calculate_reference_losses
69581ca
diffusiondpo_dpo_loss_computation
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_dpo_loss_computation
69581ca
diffusiondpo_dpo_loss_formula
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_dpo_loss_formula
69581ca
diffusiondpo_implicit_accuracy_metric
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_implicit_accuracy_metric
69581ca
diffusiondpo_loss_computation
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_loss_computation
69581ca
diffusiondpo_raw_model_loss_metric
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_raw_model_loss_metric
69581ca
diffusiondpo_raw_ref_loss
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_raw_ref_loss
69581ca
diffusiondpo_sft_loss
uvx harbor run -d researchcodebench@1.0 -t diffusiondpo_sft_loss
69581ca
dyt_alpha
uvx harbor run -d researchcodebench@1.0 -t dyt_alpha
69581ca
dyt_bias
uvx harbor run -d researchcodebench@1.0 -t dyt_bias
69581ca
dyt_channel_split
uvx harbor run -d researchcodebench@1.0 -t dyt_channel_split
69581ca
dyt_channels_first
uvx harbor run -d researchcodebench@1.0 -t dyt_channels_first
69581ca
dyt_channels_last
uvx harbor run -d researchcodebench@1.0 -t dyt_channels_last
69581ca
dyt_forward
uvx harbor run -d researchcodebench@1.0 -t dyt_forward
69581ca
dyt_init
uvx harbor run -d researchcodebench@1.0 -t dyt_init
69581ca
dyt_tanh
uvx harbor run -d researchcodebench@1.0 -t dyt_tanh
69581ca
dyt_weight
uvx harbor run -d researchcodebench@1.0 -t dyt_weight
69581ca
eomt_activation
uvx harbor run -d researchcodebench@1.0 -t eomt_activation
69581ca
eomt_add_query_tokens
uvx harbor run -d researchcodebench@1.0 -t eomt_add_query_tokens
69581ca
eomt_apply_masking_probability
uvx harbor run -d researchcodebench@1.0 -t eomt_apply_masking_probability
69581ca
eomt_calculate_upscale_parameters
uvx harbor run -d researchcodebench@1.0 -t eomt_calculate_upscale_parameters
69581ca
eomt_convolution_1
uvx harbor run -d researchcodebench@1.0 -t eomt_convolution_1
69581ca
eomt_convolution_2
uvx harbor run -d researchcodebench@1.0 -t eomt_convolution_2
69581ca
eomt_create_attention_mask
uvx harbor run -d researchcodebench@1.0 -t eomt_create_attention_mask
69581ca
eomt_custom_attention_mechanism
uvx harbor run -d researchcodebench@1.0 -t eomt_custom_attention_mechanism
69581ca
eomt_disable_attention_mask
uvx harbor run -d researchcodebench@1.0 -t eomt_disable_attention_mask
69581ca
eomt_eomt_initialization
uvx harbor run -d researchcodebench@1.0 -t eomt_eomt_initialization
69581ca
eomt_extract_query_key_value
uvx harbor run -d researchcodebench@1.0 -t eomt_extract_query_key_value
69581ca
eomt_extract_query_tokens
uvx harbor run -d researchcodebench@1.0 -t eomt_extract_query_tokens
69581ca
eomt_final_prediction
uvx harbor run -d researchcodebench@1.0 -t eomt_final_prediction
69581ca
eomt_forward_pass
uvx harbor run -d researchcodebench@1.0 -t eomt_forward_pass
69581ca
eomt_generate_class_logits
uvx harbor run -d researchcodebench@1.0 -t eomt_generate_class_logits
69581ca
eomt_generate_mask_logits
uvx harbor run -d researchcodebench@1.0 -t eomt_generate_mask_logits
69581ca
eomt_generate_masks
uvx harbor run -d researchcodebench@1.0 -t eomt_generate_masks
69581ca
eomt_initialize_class_prediction_head
uvx harbor run -d researchcodebench@1.0 -t eomt_initialize_class_prediction_head
69581ca
eomt_initialize_mask_prediction_head
uvx harbor run -d researchcodebench@1.0 -t eomt_initialize_mask_prediction_head
69581ca
eomt_initialize_query_embedding
uvx harbor run -d researchcodebench@1.0 -t eomt_initialize_query_embedding
69581ca
eomt_initialize_tracking_variables
uvx harbor run -d researchcodebench@1.0 -t eomt_initialize_tracking_variables
69581ca
eomt_initialize_upscale_module
uvx harbor run -d researchcodebench@1.0 -t eomt_initialize_upscale_module
69581ca
eomt_input_normalization
uvx harbor run -d researchcodebench@1.0 -t eomt_input_normalization
69581ca
eomt_normalization
uvx harbor run -d researchcodebench@1.0 -t eomt_normalization
69581ca
eomt_patch_embedding
uvx harbor run -d researchcodebench@1.0 -t eomt_patch_embedding
69581ca
eomt_predict_from_features
uvx harbor run -d researchcodebench@1.0 -t eomt_predict_from_features
69581ca
eomt_predict_masks_and_classes
uvx harbor run -d researchcodebench@1.0 -t eomt_predict_masks_and_classes
69581ca
eomt_process_attention_mask
uvx harbor run -d researchcodebench@1.0 -t eomt_process_attention_mask
69581ca
eomt_process_image_features
uvx harbor run -d researchcodebench@1.0 -t eomt_process_image_features
69581ca
eomt_process_transformer_blocks
uvx harbor run -d researchcodebench@1.0 -t eomt_process_transformer_blocks
69581ca
eomt_project_attention_output
uvx harbor run -d researchcodebench@1.0 -t eomt_project_attention_output
69581ca
eomt_random_mask_disable
uvx harbor run -d researchcodebench@1.0 -t eomt_random_mask_disable
69581ca
eomt_register_attention_mask_probabilities
uvx harbor run -d researchcodebench@1.0 -t eomt_register_attention_mask_probabilities
69581ca
eomt_scale_block_forward
uvx harbor run -d researchcodebench@1.0 -t eomt_scale_block_forward
69581ca
eomt_scale_block_initialization
uvx harbor run -d researchcodebench@1.0 -t eomt_scale_block_initialization
69581ca
eomt_store_parameters
uvx harbor run -d researchcodebench@1.0 -t eomt_store_parameters
69581ca
eomt_transformer_block_forward
uvx harbor run -d researchcodebench@1.0 -t eomt_transformer_block_forward
69581ca
fractalgen_cfg_schedule
uvx harbor run -d researchcodebench@1.0 -t fractalgen_cfg_schedule
69581ca
fractalgen_chunk_mask_to_pred
uvx harbor run -d researchcodebench@1.0 -t fractalgen_chunk_mask_to_pred
69581ca
fractalgen_concatenate_patches
uvx harbor run -d researchcodebench@1.0 -t fractalgen_concatenate_patches
69581ca
fractalgen_init_token_mask__patches__orders__num_iter
uvx harbor run -d researchcodebench@1.0 -t fractalgen_init_token_mask__patches__orders__num_iter
69581ca
fractalgen_mask_by_order
uvx harbor run -d researchcodebench@1.0 -t fractalgen_mask_by_order
69581ca
fractalgen_mask_ratio_and_mask_length
uvx harbor run -d researchcodebench@1.0 -t fractalgen_mask_ratio_and_mask_length
69581ca
fractalgen_next_level_sample_function
uvx harbor run -d researchcodebench@1.0 -t fractalgen_next_level_sample_function
69581ca
fractalgen_patchify
uvx harbor run -d researchcodebench@1.0 -t fractalgen_patchify
69581ca
fractalgen_random_masking
uvx harbor run -d researchcodebench@1.0 -t fractalgen_random_masking
69581ca
fractalgen_random_masking_uniform
uvx harbor run -d researchcodebench@1.0 -t fractalgen_random_masking_uniform
69581ca
fractalgen_sample_image
uvx harbor run -d researchcodebench@1.0 -t fractalgen_sample_image
69581ca
fractalgen_sample_orders
uvx harbor run -d researchcodebench@1.0 -t fractalgen_sample_orders
69581ca
fractalgen_unpatchify
uvx harbor run -d researchcodebench@1.0 -t fractalgen_unpatchify
69581ca
gmflow_apply_2nd_order_gm
uvx harbor run -d researchcodebench@1.0 -t gmflow_apply_2nd_order_gm
69581ca
gmflow_apply_mask_to_gm
uvx harbor run -d researchcodebench@1.0 -t gmflow_apply_mask_to_gm
69581ca
gmflow_apply_probabilistic_cfg
uvx harbor run -d researchcodebench@1.0 -t gmflow_apply_probabilistic_cfg
69581ca
gmflow_compute_final_nll
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_final_nll
69581ca
gmflow_compute_gaussian_log_likelihood
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_gaussian_log_likelihood
69581ca
gmflow_compute_gaussian_mask
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_gaussian_mask
69581ca
gmflow_compute_inverse_stds
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_inverse_stds
69581ca
gmflow_compute_noise_scale
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_noise_scale
69581ca
gmflow_compute_timestep_coefficients
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_timestep_coefficients
69581ca
gmflow_compute_weighted_difference
uvx harbor run -d researchcodebench@1.0 -t gmflow_compute_weighted_difference
69581ca
gmflow_convert_model_output_to_x0
uvx harbor run -d researchcodebench@1.0 -t gmflow_convert_model_output_to_x0
69581ca
gmflow_gm_kl_loss
uvx harbor run -d researchcodebench@1.0 -t gmflow_gm_kl_loss
69581ca
gmflow_main_sampling_loop
uvx harbor run -d researchcodebench@1.0 -t gmflow_main_sampling_loop
69581ca
gmflow_perform_gm_sdeode_step
uvx harbor run -d researchcodebench@1.0 -t gmflow_perform_gm_sdeode_step
69581ca
gmflow_prepare_timestep_tensor
uvx harbor run -d researchcodebench@1.0 -t gmflow_prepare_timestep_tensor
69581ca
gmflow_set_timesteps
uvx harbor run -d researchcodebench@1.0 -t gmflow_set_timesteps
69581ca
gps_chamfer_distance
uvx harbor run -d researchcodebench@1.0 -t gps_chamfer_distance
69581ca
gps_distance_between_two_sets
uvx harbor run -d researchcodebench@1.0 -t gps_distance_between_two_sets
69581ca
gps_euclidean_distance_computation
uvx harbor run -d researchcodebench@1.0 -t gps_euclidean_distance_computation
69581ca
gps_gps_reparameterization_with_minimum_euclidean_distance
uvx harbor run -d researchcodebench@1.0 -t gps_gps_reparameterization_with_minimum_euclidean_distance
69581ca