bfcl
v1.0Berkeley Function-Calling Leaderboard: 3,641 function calling tasks for evaluating LLM tool use capabilities across simple, multiple, parallel, and irrelevance categories.
uvx harbor run -d bfcl@1.0Tasks (3641)
bfcl-live-multiple-209-91-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-209-91-36bedd78
bfcl-live-multiple-21-4-13
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-21-4-136bedd78
bfcl-live-multiple-210-91-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-210-91-46bedd78
bfcl-live-multiple-211-91-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-211-91-56bedd78
bfcl-live-multiple-212-91-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-212-91-66bedd78
bfcl-live-multiple-213-91-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-213-91-76bedd78
bfcl-live-multiple-214-91-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-214-91-86bedd78
bfcl-live-multiple-215-91-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-215-91-96bedd78
bfcl-live-multiple-216-92-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-216-92-06bedd78
bfcl-live-multiple-217-93-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-217-93-06bedd78
bfcl-live-multiple-218-94-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-218-94-06bedd78
bfcl-live-multiple-219-94-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-219-94-16bedd78
bfcl-live-multiple-22-4-14
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-22-4-146bedd78
bfcl-live-multiple-220-94-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-220-94-26bedd78
bfcl-live-multiple-221-95-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-221-95-06bedd78
bfcl-live-multiple-222-96-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-222-96-06bedd78
bfcl-live-multiple-223-97-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-223-97-06bedd78
bfcl-live-multiple-224-98-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-224-98-06bedd78
bfcl-live-multiple-225-99-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-225-99-06bedd78
bfcl-live-multiple-226-100-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-226-100-06bedd78
bfcl-live-multiple-227-101-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-227-101-06bedd78
bfcl-live-multiple-228-102-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-228-102-06bedd78
bfcl-live-multiple-229-103-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-229-103-06bedd78
bfcl-live-multiple-23-5-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-23-5-06bedd78
bfcl-live-multiple-230-103-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-230-103-16bedd78
bfcl-live-multiple-231-104-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-231-104-06bedd78
bfcl-live-multiple-232-104-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-232-104-16bedd78
bfcl-live-multiple-233-105-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-233-105-06bedd78
bfcl-live-multiple-234-106-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-234-106-06bedd78
bfcl-live-multiple-235-106-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-235-106-16bedd78
bfcl-live-multiple-236-106-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-236-106-26bedd78
bfcl-live-multiple-237-106-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-237-106-36bedd78
bfcl-live-multiple-238-106-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-238-106-46bedd78
bfcl-live-multiple-239-107-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-239-107-06bedd78
bfcl-live-multiple-24-5-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-24-5-16bedd78
bfcl-live-multiple-240-107-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-240-107-16bedd78
bfcl-live-multiple-241-107-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-241-107-26bedd78
bfcl-live-multiple-242-107-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-242-107-36bedd78
bfcl-live-multiple-243-107-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-243-107-46bedd78
bfcl-live-multiple-244-108-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-244-108-06bedd78
bfcl-live-multiple-245-109-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-245-109-06bedd78
bfcl-live-multiple-246-110-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-246-110-06bedd78
bfcl-live-multiple-247-111-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-247-111-06bedd78
bfcl-live-multiple-248-112-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-248-112-06bedd78
bfcl-live-multiple-249-113-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-249-113-06bedd78
bfcl-live-multiple-25-6-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-25-6-06bedd78
bfcl-live-multiple-250-114-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-250-114-06bedd78
bfcl-live-multiple-251-115-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-251-115-06bedd78
bfcl-live-multiple-252-116-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-252-116-06bedd78
bfcl-live-multiple-253-117-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-253-117-06bedd78
bfcl-live-multiple-254-118-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-254-118-06bedd78
bfcl-live-multiple-255-119-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-255-119-06bedd78
bfcl-live-multiple-256-120-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-256-120-06bedd78
bfcl-live-multiple-257-121-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-257-121-06bedd78
bfcl-live-multiple-258-122-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-258-122-06bedd78
bfcl-live-multiple-259-123-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-259-123-06bedd78
bfcl-live-multiple-26-6-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-26-6-16bedd78
bfcl-live-multiple-260-124-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-260-124-06bedd78
bfcl-live-multiple-261-125-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-261-125-06bedd78
bfcl-live-multiple-262-125-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-262-125-16bedd78
bfcl-live-multiple-263-126-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-263-126-06bedd78
bfcl-live-multiple-264-126-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-264-126-16bedd78
bfcl-live-multiple-265-127-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-265-127-06bedd78
bfcl-live-multiple-266-127-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-266-127-16bedd78
bfcl-live-multiple-267-127-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-267-127-26bedd78
bfcl-live-multiple-268-127-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-268-127-36bedd78
bfcl-live-multiple-269-127-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-269-127-46bedd78
bfcl-live-multiple-27-7-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-27-7-06bedd78
bfcl-live-multiple-270-127-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-270-127-56bedd78
bfcl-live-multiple-271-127-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-271-127-66bedd78
bfcl-live-multiple-272-127-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-272-127-76bedd78
bfcl-live-multiple-273-127-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-273-127-86bedd78
bfcl-live-multiple-274-127-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-274-127-96bedd78
bfcl-live-multiple-275-127-10
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-275-127-106bedd78
bfcl-live-multiple-276-127-11
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-276-127-116bedd78
bfcl-live-multiple-277-128-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-277-128-06bedd78
bfcl-live-multiple-278-128-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-278-128-16bedd78
bfcl-live-multiple-279-128-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-279-128-26bedd78
bfcl-live-multiple-28-8-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-28-8-06bedd78
bfcl-live-multiple-280-128-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-280-128-36bedd78
bfcl-live-multiple-281-128-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-281-128-46bedd78
bfcl-live-multiple-282-128-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-282-128-56bedd78
bfcl-live-multiple-283-128-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-283-128-66bedd78
bfcl-live-multiple-284-128-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-284-128-76bedd78
bfcl-live-multiple-285-129-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-285-129-06bedd78
bfcl-live-multiple-286-129-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-286-129-16bedd78
bfcl-live-multiple-287-129-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-287-129-26bedd78
bfcl-live-multiple-288-129-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-288-129-36bedd78
bfcl-live-multiple-289-129-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-289-129-46bedd78
bfcl-live-multiple-29-9-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-29-9-06bedd78
bfcl-live-multiple-290-129-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-290-129-56bedd78
bfcl-live-multiple-291-130-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-291-130-06bedd78
bfcl-live-multiple-292-130-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-292-130-16bedd78
bfcl-live-multiple-293-130-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-293-130-26bedd78
bfcl-live-multiple-294-130-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-294-130-36bedd78
bfcl-live-multiple-295-130-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-295-130-46bedd78
bfcl-live-multiple-296-130-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-296-130-56bedd78
bfcl-live-multiple-297-130-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-297-130-66bedd78
bfcl-live-multiple-298-130-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-298-130-76bedd78
bfcl-live-multiple-299-130-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-299-130-86bedd78