Skip to content

Commit 9c9127b

Browse files
committed
added small, big repo results
1 parent 9670af4 commit 9c9127b

File tree

1 file changed

+366
-0
lines changed

1 file changed

+366
-0
lines changed

index.html

Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,372 @@ <h2 id="results">Results</h2>
353353
<figcaption style="text-align: center;">Table 3: Refactoring results for LIBRARIAN (w/ K = 8) averaged over 10 Code Contests collections</figcaption>
354354
</figure>
355355

356+
<figure class="table-figure">
357+
<table class="table-styled">
358+
<thead>
359+
<tr>
360+
<th><strong>Collection</strong></th>
361+
<th><strong>Agent</strong></th>
362+
<th><strong>LLoC</strong></th>
363+
<th><strong>CC</strong></th>
364+
<th><strong>MDL ratio</strong></th>
365+
<th><strong>Pass rate</strong></th>
366+
</tr>
367+
</thead>
368+
<tbody>
369+
<tr>
370+
<td rowspan="3">datapipe</td>
371+
<td>original</td>
372+
<td>474</td>
373+
<td>116</td>
374+
<td>1.0</td>
375+
<td>1.0</td>
376+
</tr>
377+
<tr>
378+
<td>Cl-Cl</td>
379+
<td>1084</td>
380+
<td>341</td>
381+
<td>9.1</td>
382+
<td>1.0</td>
383+
</tr>
384+
<tr>
385+
<td>Cl-Cx</td>
386+
<td>645</td>
387+
<td>174</td>
388+
<td>2.2</td>
389+
<td>1.0</td>
390+
</tr>
391+
<tr>
392+
<td rowspan="3">state_machine</td>
393+
<td>original</td>
394+
<td>619</td>
395+
<td>222</td>
396+
<td>1.0</td>
397+
<td>1.0</td>
398+
</tr>
399+
<tr>
400+
<td>Cl-Cl</td>
401+
<td>2271</td>
402+
<td>735</td>
403+
<td>6.3</td>
404+
<td>0.9</td>
405+
</tr>
406+
<tr>
407+
<td>Cl-Cx</td>
408+
<td>686</td>
409+
<td>227</td>
410+
<td>1.9</td>
411+
<td>1.0</td>
412+
</tr>
413+
<tr>
414+
<td rowspan="3">config_schema</td>
415+
<td>original</td>
416+
<td>949</td>
417+
<td>284</td>
418+
<td>1.0</td>
419+
<td>1.0</td>
420+
</tr>
421+
<tr>
422+
<td>Cl-Cl</td>
423+
<td>922</td>
424+
<td>339</td>
425+
<td>2.5</td>
426+
<td>failed</td>
427+
</tr>
428+
<tr>
429+
<td>Cl-Cx</td>
430+
<td>626</td>
431+
<td>207</td>
432+
<td>2.1</td>
433+
<td>1.0</td>
434+
</tr>
435+
<tr>
436+
<td rowspan="3">cli_tools</td>
437+
<td>original</td>
438+
<td>875</td>
439+
<td>300</td>
440+
<td>1.0</td>
441+
<td>1.0</td>
442+
</tr>
443+
<tr>
444+
<td>Cl-Cl</td>
445+
<td>2925</td>
446+
<td>909</td>
447+
<td>4.0</td>
448+
<td>1.0</td>
449+
</tr>
450+
<tr>
451+
<td>Cl-Cx</td>
452+
<td>5848</td>
453+
<td>2378</td>
454+
<td>3.1</td>
455+
<td>failed</td>
456+
</tr>
457+
<tr>
458+
<td rowspan="3">cli_form</td>
459+
<td>original</td>
460+
<td>352</td>
461+
<td>174</td>
462+
<td>1.0</td>
463+
<td>1.0</td>
464+
</tr>
465+
<tr>
466+
<td>Cl-Cl</td>
467+
<td>855</td>
468+
<td>342</td>
469+
<td>3.8</td>
470+
<td>1.0</td>
471+
</tr>
472+
<tr>
473+
<td>Cl-Cx</td>
474+
<td>1366</td>
475+
<td>548</td>
476+
<td>2.9</td>
477+
<td>1.0</td>
478+
</tr>
479+
</tbody>
480+
</table>
481+
<figcaption style="text-align: center;">Table 4: Full results on MiniCode-repositories small, using Codex with o4-mini and Claude Code with Claude Sonnet 3.7</figcaption>
482+
</figure>
483+
484+
<figure class="table-figure">
485+
<table class="table-styled">
486+
<thead>
487+
<tr>
488+
<th><strong>Collection</strong></th>
489+
<th><strong>Agent</strong></th>
490+
<th><strong>LLoC</strong></th>
491+
<th><strong>CC</strong></th>
492+
<th><strong>MDL %</strong></th>
493+
<th><strong>Pass %</strong></th>
494+
</tr>
495+
</thead>
496+
<tbody>
497+
<tr>
498+
<td rowspan="3">command_line_task_manager</td>
499+
<td>original</td>
500+
<td>7964</td>
501+
<td>2921</td>
502+
<td>100</td>
503+
<td>100</td>
504+
</tr>
505+
<tr>
506+
<td>sonnet 3.7</td>
507+
<td>10387</td>
508+
<td>3899</td>
509+
<td>130</td>
510+
<td>100</td>
511+
</tr>
512+
<tr>
513+
<td>sonnet 4</td>
514+
<td>9515</td>
515+
<td>3946</td>
516+
<td>125</td>
517+
<td>100</td>
518+
</tr>
519+
<tr>
520+
<td rowspan="3">concurrent_task_scheduler</td>
521+
<td>original</td>
522+
<td>10350</td>
523+
<td>3935</td>
524+
<td>100</td>
525+
<td>100</td>
526+
</tr>
527+
<tr>
528+
<td>sonnet 3.7</td>
529+
<td>18653</td>
530+
<td>7165</td>
531+
<td>190</td>
532+
<td>80</td>
533+
</tr>
534+
<tr>
535+
<td>sonnet 4</td>
536+
<td>11491</td>
537+
<td>4762</td>
538+
<td>122</td>
539+
<td>80</td>
540+
</tr>
541+
<tr>
542+
<td rowspan="3">file_system_analyzer</td>
543+
<td>original</td>
544+
<td>3911</td>
545+
<td>1338</td>
546+
<td>100</td>
547+
<td>100</td>
548+
</tr>
549+
<tr>
550+
<td>sonnet 3.7</td>
551+
<td>6495</td>
552+
<td>2218</td>
553+
<td>160</td>
554+
<td>100</td>
555+
</tr>
556+
<tr>
557+
<td>sonnet 4</td>
558+
<td>5221</td>
559+
<td>1793</td>
560+
<td>130</td>
561+
<td>100</td>
562+
</tr>
563+
<tr>
564+
<td rowspan="3">in_memory_database</td>
565+
<td>original</td>
566+
<td>4565</td>
567+
<td>1671</td>
568+
<td>100</td>
569+
<td>100</td>
570+
</tr>
571+
<tr>
572+
<td>sonnet 3.7</td>
573+
<td>6667</td>
574+
<td>2556</td>
575+
<td>150</td>
576+
<td>100</td>
577+
</tr>
578+
<tr>
579+
<td>sonnet 4</td>
580+
<td>5617</td>
581+
<td>2356</td>
582+
<td>131</td>
583+
<td>100</td>
584+
</tr>
585+
<tr>
586+
<td rowspan="3">incremental_backup_system</td>
587+
<td>original</td>
588+
<td>4587</td>
589+
<td>1482</td>
590+
<td>100</td>
591+
<td>100</td>
592+
</tr>
593+
<tr>
594+
<td>sonnet 3.7</td>
595+
<td>5365</td>
596+
<td>1788</td>
597+
<td>130</td>
598+
<td>failed</td>
599+
</tr>
600+
<tr>
601+
<td>sonnet 4</td>
602+
<td>6620</td>
603+
<td>2168</td>
604+
<td>152</td>
605+
<td>60</td>
606+
</tr>
607+
<tr>
608+
<td rowspan="3">personal_finance_tracker</td>
609+
<td>original</td>
610+
<td>4306</td>
611+
<td>1482</td>
612+
<td>100</td>
613+
<td>100</td>
614+
</tr>
615+
<tr>
616+
<td>sonnet 3.7</td>
617+
<td>7443</td>
618+
<td>2512</td>
619+
<td>180</td>
620+
<td>63</td>
621+
</tr>
622+
<tr>
623+
<td>sonnet 4</td>
624+
<td>9212</td>
625+
<td>3335</td>
626+
<td>215</td>
627+
<td>63</td>
628+
</tr>
629+
<tr>
630+
<td rowspan="3">personal_knowledge_management</td>
631+
<td>original</td>
632+
<td>5341</td>
633+
<td>1832</td>
634+
<td>100</td>
635+
<td>100</td>
636+
</tr>
637+
<tr>
638+
<td>sonnet 3.7</td>
639+
<td>6357</td>
640+
<td>2364</td>
641+
<td>140</td>
642+
<td>100</td>
643+
</tr>
644+
<tr>
645+
<td>sonnet 4</td>
646+
<td>5575</td>
647+
<td>2597</td>
648+
<td>131</td>
649+
<td>80</td>
650+
</tr>
651+
<tr>
652+
<td rowspan="3">query_language_interpreter</td>
653+
<td>original</td>
654+
<td>5181</td>
655+
<td>2105</td>
656+
<td>100</td>
657+
<td>100</td>
658+
</tr>
659+
<tr>
660+
<td>sonnet 3.7</td>
661+
<td>7193</td>
662+
<td>2966</td>
663+
<td>140</td>
664+
<td>100</td>
665+
</tr>
666+
<tr>
667+
<td>sonnet 4</td>
668+
<td>7490</td>
669+
<td>3076</td>
670+
<td>142</td>
671+
<td>100</td>
672+
</tr>
673+
<tr>
674+
<td rowspan="3">text_editor</td>
675+
<td>original</td>
676+
<td>4324</td>
677+
<td>1323</td>
678+
<td>100</td>
679+
<td>100</td>
680+
</tr>
681+
<tr>
682+
<td>sonnet 3.7</td>
683+
<td>5792</td>
684+
<td>1822</td>
685+
<td>140</td>
686+
<td>100</td>
687+
</tr>
688+
<tr>
689+
<td>sonnet 4</td>
690+
<td>6017</td>
691+
<td>2308</td>
692+
<td>148</td>
693+
<td>100</td>
694+
</tr>
695+
<tr>
696+
<td rowspan="3">virtual_machine_emulator</td>
697+
<td>original</td>
698+
<td>6841</td>
699+
<td>2283</td>
700+
<td>100</td>
701+
<td>100</td>
702+
</tr>
703+
<tr>
704+
<td>sonnet 3.7</td>
705+
<td>10108</td>
706+
<td>3580</td>
707+
<td>160</td>
708+
<td>100</td>
709+
</tr>
710+
<tr>
711+
<td>sonnet 4</td>
712+
<td>9220</td>
713+
<td>3303</td>
714+
<td>137</td>
715+
<td>100</td>
716+
</tr>
717+
</tbody>
718+
</table>
719+
<figcaption style="text-align: center;">Table 5: Full results on MiniCode-repositories large, comparing the original code sources with Claude Sonnet 3.7 and Sonnet 4</figcaption>
720+
</figure>
721+
356722
Check out the paper for the full CodeContests results, as well as repository results!
357723
</section>
358724

0 commit comments

Comments
 (0)