deveval
v1.0DevEval benchmark: comprehensive evaluation of LLMs across software development lifecycle (implementation, unit testing, acceptance testing) for 21 real-world repositories across Python, C++, Java, and JavaScript
uvx harbor run -d deveval@1.0Tasks (63)
cpp-area-calculation-acceptance-testing
uvx harbor run -d deveval@1.0 -t cpp-area-calculation-acceptance-testing701ce1d
cpp-area-calculation-implementation
uvx harbor run -d deveval@1.0 -t cpp-area-calculation-implementation701ce1d
cpp-area-calculation-unit-testing
uvx harbor run -d deveval@1.0 -t cpp-area-calculation-unit-testing701ce1d
cpp-graph-cpp-acceptance-testing
uvx harbor run -d deveval@1.0 -t cpp-graph-cpp-acceptance-testing701ce1d
cpp-graph-cpp-implementation
uvx harbor run -d deveval@1.0 -t cpp-graph-cpp-implementation701ce1d
cpp-graph-cpp-unit-testing
uvx harbor run -d deveval@1.0 -t cpp-graph-cpp-unit-testing701ce1d
cpp-logistic-system-acceptance-testing
uvx harbor run -d deveval@1.0 -t cpp-logistic-system-acceptance-testing701ce1d
cpp-logistic-system-implementation
uvx harbor run -d deveval@1.0 -t cpp-logistic-system-implementation701ce1d
cpp-logistic-system-unit-testing
uvx harbor run -d deveval@1.0 -t cpp-logistic-system-unit-testing701ce1d
cpp-people-management-acceptance-testing
uvx harbor run -d deveval@1.0 -t cpp-people-management-acceptance-testing701ce1d
cpp-people-management-implementation
uvx harbor run -d deveval@1.0 -t cpp-people-management-implementation701ce1d
cpp-people-management-unit-testing
uvx harbor run -d deveval@1.0 -t cpp-people-management-unit-testing701ce1d
cpp-xlsx2csv-acceptance-testing
uvx harbor run -d deveval@1.0 -t cpp-xlsx2csv-acceptance-testing701ce1d
cpp-xlsx2csv-implementation
uvx harbor run -d deveval@1.0 -t cpp-xlsx2csv-implementation701ce1d
cpp-xlsx2csv-unit-testing
uvx harbor run -d deveval@1.0 -t cpp-xlsx2csv-unit-testing701ce1d
java-actor-relationship-game-acceptance-testing
uvx harbor run -d deveval@1.0 -t java-actor-relationship-game-acceptance-testing701ce1d
java-actor-relationship-game-implementation
uvx harbor run -d deveval@1.0 -t java-actor-relationship-game-implementation701ce1d
java-actor-relationship-game-unit-testing
uvx harbor run -d deveval@1.0 -t java-actor-relationship-game-unit-testing701ce1d
java-idcenter-acceptance-testing
uvx harbor run -d deveval@1.0 -t java-idcenter-acceptance-testing701ce1d
java-idcenter-implementation
uvx harbor run -d deveval@1.0 -t java-idcenter-implementation701ce1d
java-idcenter-unit-testing
uvx harbor run -d deveval@1.0 -t java-idcenter-unit-testing701ce1d
java-image-similarity-acceptance-testing
uvx harbor run -d deveval@1.0 -t java-image-similarity-acceptance-testing701ce1d
java-image-similarity-implementation
uvx harbor run -d deveval@1.0 -t java-image-similarity-implementation701ce1d
java-image-similarity-unit-testing
uvx harbor run -d deveval@1.0 -t java-image-similarity-unit-testing701ce1d
java-java-heap-acceptance-testing
uvx harbor run -d deveval@1.0 -t java-java-heap-acceptance-testing701ce1d
java-java-heap-implementation
uvx harbor run -d deveval@1.0 -t java-java-heap-implementation701ce1d
java-java-heap-unit-testing
uvx harbor run -d deveval@1.0 -t java-java-heap-unit-testing701ce1d
java-redis-cache-acceptance-testing
uvx harbor run -d deveval@1.0 -t java-redis-cache-acceptance-testing701ce1d
java-redis-cache-implementation
uvx harbor run -d deveval@1.0 -t java-redis-cache-implementation701ce1d
java-redis-cache-unit-testing
uvx harbor run -d deveval@1.0 -t java-redis-cache-unit-testing701ce1d
javascript-listen-now-frontend-acceptance-testing
uvx harbor run -d deveval@1.0 -t javascript-listen-now-frontend-acceptance-testing701ce1d
javascript-listen-now-frontend-implementation
uvx harbor run -d deveval@1.0 -t javascript-listen-now-frontend-implementation701ce1d
javascript-listen-now-frontend-unit-testing
uvx harbor run -d deveval@1.0 -t javascript-listen-now-frontend-unit-testing701ce1d
javascript-login-registration-acceptance-testing
uvx harbor run -d deveval@1.0 -t javascript-login-registration-acceptance-testing701ce1d
javascript-login-registration-implementation
uvx harbor run -d deveval@1.0 -t javascript-login-registration-implementation701ce1d
javascript-login-registration-unit-testing
uvx harbor run -d deveval@1.0 -t javascript-login-registration-unit-testing701ce1d
python-arxiv-digest-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-arxiv-digest-acceptance-testing701ce1d
python-arxiv-digest-implementation
uvx harbor run -d deveval@1.0 -t python-arxiv-digest-implementation701ce1d
python-arxiv-digest-unit-testing
uvx harbor run -d deveval@1.0 -t python-arxiv-digest-unit-testing701ce1d
python-chakin-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-chakin-acceptance-testing701ce1d
python-chakin-implementation
uvx harbor run -d deveval@1.0 -t python-chakin-implementation701ce1d
python-chakin-unit-testing
uvx harbor run -d deveval@1.0 -t python-chakin-unit-testing701ce1d
python-geotext-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-geotext-acceptance-testing701ce1d
python-geotext-implementation
uvx harbor run -d deveval@1.0 -t python-geotext-implementation701ce1d
python-geotext-unit-testing
uvx harbor run -d deveval@1.0 -t python-geotext-unit-testing701ce1d
python-hone-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-hone-acceptance-testing701ce1d
python-hone-implementation
uvx harbor run -d deveval@1.0 -t python-hone-implementation701ce1d
python-hone-unit-testing
uvx harbor run -d deveval@1.0 -t python-hone-unit-testing701ce1d
python-hybrid-images-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-hybrid-images-acceptance-testing701ce1d
python-hybrid-images-implementation
uvx harbor run -d deveval@1.0 -t python-hybrid-images-implementation701ce1d
python-hybrid-images-unit-testing
uvx harbor run -d deveval@1.0 -t python-hybrid-images-unit-testing701ce1d
python-lice-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-lice-acceptance-testing701ce1d
python-lice-implementation
uvx harbor run -d deveval@1.0 -t python-lice-implementation701ce1d
python-lice-unit-testing
uvx harbor run -d deveval@1.0 -t python-lice-unit-testing701ce1d
python-particle-swarm-optimization-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-particle-swarm-optimization-acceptance-testing701ce1d
python-particle-swarm-optimization-implementation
uvx harbor run -d deveval@1.0 -t python-particle-swarm-optimization-implementation701ce1d
python-particle-swarm-optimization-unit-testing
uvx harbor run -d deveval@1.0 -t python-particle-swarm-optimization-unit-testing701ce1d
python-readtime-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-readtime-acceptance-testing701ce1d
python-readtime-implementation
uvx harbor run -d deveval@1.0 -t python-readtime-implementation701ce1d
python-readtime-unit-testing
uvx harbor run -d deveval@1.0 -t python-readtime-unit-testing701ce1d
python-stocktrends-acceptance-testing
uvx harbor run -d deveval@1.0 -t python-stocktrends-acceptance-testing701ce1d
python-stocktrends-implementation
uvx harbor run -d deveval@1.0 -t python-stocktrends-implementation701ce1d
python-stocktrends-unit-testing
uvx harbor run -d deveval@1.0 -t python-stocktrends-unit-testing701ce1d