bfcl
v1.0Berkeley Function-Calling Leaderboard: 3,641 function calling tasks for evaluating LLM tool use capabilities across simple, multiple, parallel, and irrelevance categories.
uvx harbor run -d bfcl@1.0Tasks (3641)
bfcl-live-multiple-3-2-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-3-2-06bedd78
bfcl-live-multiple-30-10-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-30-10-06bedd78
bfcl-live-multiple-300-130-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-300-130-96bedd78
bfcl-live-multiple-301-131-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-301-131-06bedd78
bfcl-live-multiple-302-131-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-302-131-16bedd78
bfcl-live-multiple-303-131-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-303-131-26bedd78
bfcl-live-multiple-304-131-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-304-131-36bedd78
bfcl-live-multiple-305-131-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-305-131-46bedd78
bfcl-live-multiple-306-131-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-306-131-56bedd78
bfcl-live-multiple-307-131-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-307-131-66bedd78
bfcl-live-multiple-308-131-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-308-131-76bedd78
bfcl-live-multiple-309-131-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-309-131-86bedd78
bfcl-live-multiple-31-10-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-31-10-16bedd78
bfcl-live-multiple-310-132-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-310-132-06bedd78
bfcl-live-multiple-311-132-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-311-132-16bedd78
bfcl-live-multiple-312-132-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-312-132-26bedd78
bfcl-live-multiple-313-132-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-313-132-36bedd78
bfcl-live-multiple-314-132-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-314-132-46bedd78
bfcl-live-multiple-315-132-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-315-132-56bedd78
bfcl-live-multiple-316-132-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-316-132-66bedd78
bfcl-live-multiple-317-132-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-317-132-76bedd78
bfcl-live-multiple-318-132-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-318-132-86bedd78
bfcl-live-multiple-319-132-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-319-132-96bedd78
bfcl-live-multiple-32-10-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-32-10-26bedd78
bfcl-live-multiple-320-132-10
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-320-132-106bedd78
bfcl-live-multiple-321-132-11
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-321-132-116bedd78
bfcl-live-multiple-322-132-12
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-322-132-126bedd78
bfcl-live-multiple-323-132-13
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-323-132-136bedd78
bfcl-live-multiple-324-132-14
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-324-132-146bedd78
bfcl-live-multiple-325-132-15
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-325-132-156bedd78
bfcl-live-multiple-326-132-16
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-326-132-166bedd78
bfcl-live-multiple-327-132-17
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-327-132-176bedd78
bfcl-live-multiple-328-132-18
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-328-132-186bedd78
bfcl-live-multiple-329-132-19
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-329-132-196bedd78
bfcl-live-multiple-33-10-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-33-10-36bedd78
bfcl-live-multiple-330-132-20
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-330-132-206bedd78
bfcl-live-multiple-331-132-21
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-331-132-216bedd78
bfcl-live-multiple-332-132-22
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-332-132-226bedd78
bfcl-live-multiple-333-132-23
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-333-132-236bedd78
bfcl-live-multiple-334-132-24
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-334-132-246bedd78
bfcl-live-multiple-335-132-25
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-335-132-256bedd78
bfcl-live-multiple-336-133-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-336-133-06bedd78
bfcl-live-multiple-337-133-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-337-133-16bedd78
bfcl-live-multiple-338-133-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-338-133-26bedd78
bfcl-live-multiple-339-133-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-339-133-36bedd78
bfcl-live-multiple-34-11-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-34-11-06bedd78
bfcl-live-multiple-340-133-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-340-133-46bedd78
bfcl-live-multiple-341-133-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-341-133-56bedd78
bfcl-live-multiple-342-133-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-342-133-66bedd78
bfcl-live-multiple-343-133-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-343-133-76bedd78
bfcl-live-multiple-344-133-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-344-133-86bedd78
bfcl-live-multiple-345-133-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-345-133-96bedd78
bfcl-live-multiple-346-133-10
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-346-133-106bedd78
bfcl-live-multiple-347-133-11
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-347-133-116bedd78
bfcl-live-multiple-348-133-12
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-348-133-126bedd78
bfcl-live-multiple-349-133-13
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-349-133-136bedd78
bfcl-live-multiple-35-11-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-35-11-16bedd78
bfcl-live-multiple-350-133-14
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-350-133-146bedd78
bfcl-live-multiple-351-133-15
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-351-133-156bedd78
bfcl-live-multiple-352-133-16
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-352-133-166bedd78
bfcl-live-multiple-353-133-17
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-353-133-176bedd78
bfcl-live-multiple-354-133-18
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-354-133-186bedd78
bfcl-live-multiple-355-134-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-355-134-06bedd78
bfcl-live-multiple-356-134-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-356-134-16bedd78
bfcl-live-multiple-357-134-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-357-134-26bedd78
bfcl-live-multiple-358-134-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-358-134-36bedd78
bfcl-live-multiple-359-134-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-359-134-46bedd78
bfcl-live-multiple-36-12-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-36-12-06bedd78
bfcl-live-multiple-360-134-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-360-134-56bedd78
bfcl-live-multiple-361-134-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-361-134-66bedd78
bfcl-live-multiple-362-134-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-362-134-76bedd78
bfcl-live-multiple-363-134-8
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-363-134-86bedd78
bfcl-live-multiple-364-134-9
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-364-134-96bedd78
bfcl-live-multiple-365-134-10
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-365-134-106bedd78
bfcl-live-multiple-366-134-11
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-366-134-116bedd78
bfcl-live-multiple-367-134-12
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-367-134-126bedd78
bfcl-live-multiple-368-134-13
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-368-134-136bedd78
bfcl-live-multiple-369-134-14
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-369-134-146bedd78
bfcl-live-multiple-37-13-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-37-13-06bedd78
bfcl-live-multiple-370-134-15
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-370-134-156bedd78
bfcl-live-multiple-371-134-16
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-371-134-166bedd78
bfcl-live-multiple-372-134-17
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-372-134-176bedd78
bfcl-live-multiple-373-134-18
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-373-134-186bedd78
bfcl-live-multiple-374-134-19
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-374-134-196bedd78
bfcl-live-multiple-375-134-20
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-375-134-206bedd78
bfcl-live-multiple-376-135-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-376-135-06bedd78
bfcl-live-multiple-377-135-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-377-135-16bedd78
bfcl-live-multiple-378-135-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-378-135-26bedd78
bfcl-live-multiple-379-136-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-379-136-06bedd78
bfcl-live-multiple-38-14-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-38-14-06bedd78
bfcl-live-multiple-380-136-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-380-136-16bedd78
bfcl-live-multiple-381-136-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-381-136-26bedd78
bfcl-live-multiple-382-137-0
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-382-137-06bedd78
bfcl-live-multiple-383-137-1
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-383-137-16bedd78
bfcl-live-multiple-384-137-2
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-384-137-26bedd78
bfcl-live-multiple-385-137-3
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-385-137-36bedd78
bfcl-live-multiple-386-137-4
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-386-137-46bedd78
bfcl-live-multiple-387-137-5
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-387-137-56bedd78
bfcl-live-multiple-388-137-6
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-388-137-66bedd78
bfcl-live-multiple-389-137-7
uvx harbor run -d bfcl@1.0 -t bfcl-live-multiple-389-137-76bedd78