Files
deep_pro_judge/kimi-k2.6/ternary_training/training_results.json
T
sleepy 45c3aad453 feat: expand to 6 models, 8 challenges; rewrite README with DeepSeek V4 Pro analysis
- Add Claude Opus 4.7, Kimi K2.6, GLM-5.1 to existing GLM-5, Qwen3-6, MiniMax-M2.7
- Add 5 new challenges: flash attention fwd/bwd, beam search, DFlash, ternary training
- Rewrite README with TL;DR rankings, grade matrix, and DeepSeek V4 Pro attribution
- Add analysis/ folder with cross-model comparisons and per-challenge deep dives
- Add deploy_challenges.sh script
- Expand .gitignore to exclude Python envs, ML weights, and build artifacts
2026-04-27 18:49:22 +02:00

520 lines
13 KiB
JSON

{
"hyperparameters": {
"group_size": 128,
"seq_length": 128,
"batch_size": 2,
"num_steps": 500,
"learning_rate": 5e-05
},
"training": {
"initial_loss": 19.415189743041992,
"final_loss": 2.9524176120758057,
"loss_curve": [
19.415189743041992,
17.6638126373291,
17.5764102935791,
15.895140647888184,
14.59153938293457,
16.177194595336914,
12.788802146911621,
11.541013717651367,
12.024927139282227,
12.84988021850586,
11.819856643676758,
11.076229095458984,
10.465115547180176,
10.294048309326172,
9.291016578674316,
5.1339216232299805,
5.194877624511719,
10.74880313873291,
13.278043746948242,
10.388489723205566,
10.168744087219238,
10.00532341003418,
10.9448881149292,
13.310420989990234,
11.944724082946777,
8.91694450378418,
5.131122589111328,
10.197563171386719,
10.01301097869873,
8.23267936706543,
10.079784393310547,
9.714762687683105,
9.727270126342773,
7.821086406707764,
7.828431606292725,
8.294801712036133,
8.915307998657227,
6.78751802444458,
8.784393310546875,
7.263306617736816,
6.968741416931152,
7.635254859924316,
8.597051620483398,
3.962278366088867,
2.649355888366699,
1.3925496339797974,
2.150885581970215,
2.4210667610168457,
2.107257127761841,
2.2125606536865234,
2.3535611629486084,
2.306110382080078,
2.496791362762451,
1.910773515701294,
2.7408840656280518,
2.522926092147827,
2.7258849143981934,
2.2239017486572266,
7.865502834320068,
11.369744300842285,
7.2785325050354,
10.744400024414062,
11.363978385925293,
11.373944282531738,
11.010241508483887,
9.836434364318848,
9.70583438873291,
9.144129753112793,
9.465516090393066,
8.564160346984863,
9.16439151763916,
9.109588623046875,
8.336548805236816,
7.943811893463135,
7.852457523345947,
6.610472679138184,
4.080750465393066,
4.603875637054443,
7.214062690734863,
14.470048904418945,
14.226180076599121,
13.72647762298584,
10.721973419189453,
10.570021629333496,
10.210411071777344,
10.290371894836426,
7.791189193725586,
7.8287835121154785,
7.902010440826416,
8.494746208190918,
8.886126518249512,
8.344682693481445,
9.480537414550781,
8.99856948852539,
8.164642333984375,
8.365951538085938,
9.024402618408203,
8.6676607131958,
10.06509017944336,
9.371912956237793,
9.17201042175293,
9.499948501586914,
8.475625991821289,
9.137506484985352,
8.084639549255371,
8.213334083557129,
7.3555707931518555,
7.324641227722168,
7.4844536781311035,
8.139140129089355,
7.955804824829102,
8.107175827026367,
6.985445022583008,
6.115233421325684,
6.798851013183594,
2.756054639816284,
4.928526401519775,
9.184700012207031,
9.650903701782227,
7.893393039703369,
7.769137382507324,
7.712228775024414,
8.659494400024414,
9.301843643188477,
9.03166675567627,
7.267263889312744,
8.050270080566406,
8.89819049835205,
7.454459190368652,
7.789579391479492,
8.938220977783203,
8.343205451965332,
7.659829616546631,
7.563717842102051,
7.64760160446167,
6.753893852233887,
7.2767486572265625,
7.687180042266846,
8.177096366882324,
5.205698013305664,
8.55665397644043,
8.401761054992676,
8.025993347167969,
8.522932052612305,
7.386404514312744,
6.299332141876221,
7.9422607421875,
6.485499382019043,
7.92954158782959,
5.921761512756348,
7.883401870727539,
7.638513088226318,
7.558638095855713,
7.362685203552246,
8.297099113464355,
8.487621307373047,
8.52571964263916,
8.659907341003418,
8.015156745910645,
9.298934936523438,
8.222744941711426,
6.188640594482422,
8.977818489074707,
8.637101173400879,
8.659961700439453,
7.4918599128723145,
8.798979759216309,
7.740288257598877,
8.463373184204102,
8.464582443237305,
7.778406620025635,
9.147701263427734,
7.360451698303223,
7.708859443664551,
6.682768821716309,
7.512155055999756,
8.024608612060547,
8.361748695373535,
5.732519149780273,
6.673101425170898,
7.6330132484436035,
8.132368087768555,
7.8759942054748535,
8.514373779296875,
8.397266387939453,
7.0031304359436035,
7.621158123016357,
7.67484188079834,
7.817298889160156,
7.450564861297607,
6.986921310424805,
9.063298225402832,
7.272268772125244,
8.928145408630371,
6.965574264526367,
9.52602767944336,
7.277902126312256,
6.177265167236328,
8.317046165466309,
8.4580078125,
8.824596405029297,
7.85051965713501,
5.829211711883545,
8.68645191192627,
8.018779754638672,
7.682953834533691,
8.003823280334473,
6.92888879776001,
6.7287917137146,
7.22535514831543,
6.919946670532227,
7.498782634735107,
7.409185409545898,
8.3101167678833,
6.284835338592529,
3.541412115097046,
3.9863815307617188,
6.179129123687744,
6.740180492401123,
7.888493537902832,
4.698310852050781,
5.089892864227295,
8.01733112335205,
4.149894714355469,
3.0928893089294434,
9.866519927978516,
8.222246170043945,
5.943643569946289,
8.004118919372559,
5.507823944091797,
8.96957015991211,
6.324719429016113,
8.650246620178223,
8.170387268066406,
8.473105430603027,
8.394067764282227,
5.1943159103393555,
3.4560070037841797,
2.6845388412475586,
2.9381015300750732,
8.991165161132812,
9.567828178405762,
9.947354316711426,
6.080748081207275,
5.691708564758301,
7.181239604949951,
8.073373794555664,
8.77186107635498,
8.518348693847656,
7.958341598510742,
8.752128601074219,
7.485937595367432,
8.58120346069336,
8.627962112426758,
6.968264102935791,
7.434549331665039,
7.358287334442139,
7.684825897216797,
7.424722194671631,
6.908591270446777,
6.278493404388428,
8.345937728881836,
7.803347587585449,
8.391436576843262,
8.13833999633789,
8.466653823852539,
8.621729850769043,
8.297107696533203,
7.952710151672363,
7.728457927703857,
9.069082260131836,
6.80143404006958,
6.168771743774414,
7.780761241912842,
7.264509677886963,
7.721634387969971,
5.931019306182861,
8.71249771118164,
7.045263290405273,
5.595153331756592,
8.606344223022461,
7.333461284637451,
7.434794902801514,
4.909368515014648,
6.529274940490723,
3.2044527530670166,
4.450833320617676,
9.15864086151123,
7.603370189666748,
7.163464069366455,
4.514288902282715,
2.936744213104248,
6.017610549926758,
6.448644161224365,
8.636395454406738,
6.373209476470947,
7.272717475891113,
4.8009185791015625,
6.993277072906494,
7.068300724029541,
7.53340482711792,
7.4401326179504395,
7.977913856506348,
9.181097030639648,
7.183773994445801,
6.776640892028809,
6.810145378112793,
6.086609840393066,
9.078044891357422,
5.633232593536377,
7.695226669311523,
5.442765712738037,
8.75350284576416,
7.758969783782959,
7.245949745178223,
7.80985164642334,
6.605112075805664,
7.24437952041626,
7.7778215408325195,
8.456467628479004,
5.285576343536377,
8.28867244720459,
7.879434585571289,
8.340057373046875,
5.838737487792969,
8.670787811279297,
8.561763763427734,
8.80904769897461,
5.523489952087402,
8.205552101135254,
5.81448221206665,
6.502568244934082,
5.51200532913208,
6.332709789276123,
5.85950231552124,
7.721321105957031,
7.371209144592285,
5.3772382736206055,
7.831151962280273,
6.771039009094238,
5.647019863128662,
3.3475260734558105,
9.21485710144043,
6.554588317871094,
7.803776741027832,
5.230503559112549,
7.31123685836792,
7.461449146270752,
5.785803318023682,
2.818866491317749,
7.119564533233643,
7.815005779266357,
7.14105749130249,
7.022451400756836,
8.005674362182617,
7.6263227462768555,
7.574337482452393,
6.168295383453369,
6.522130012512207,
8.820441246032715,
8.641220092773438,
8.199234008789062,
4.685672760009766,
6.580758571624756,
6.7318220138549805,
7.216886043548584,
4.987853050231934,
6.9638471603393555,
8.238450050354004,
6.355881690979004,
8.457653045654297,
8.574877738952637,
8.558584213256836,
8.179498672485352,
8.395395278930664,
5.779758453369141,
5.897271633148193,
5.965787410736084,
7.879891872406006,
7.1940083503723145,
7.250895023345947,
7.340498447418213,
7.3146209716796875,
7.630643367767334,
5.256970405578613,
6.986878871917725,
5.032907962799072,
6.915760040283203,
7.389677047729492,
7.766031265258789,
7.362154483795166,
7.522637844085693,
4.709517955780029,
6.954688549041748,
6.788074493408203,
7.9603118896484375,
8.153197288513184,
7.945971488952637,
5.763076305389404,
8.035938262939453,
7.177386283874512,
7.629238128662109,
8.1404390335083,
4.857499122619629,
7.7081756591796875,
7.729892730712891,
5.2494425773620605,
7.856828212738037,
7.413257122039795,
5.691137313842773,
6.185434341430664,
6.53693151473999,
8.347500801086426,
8.713299751281738,
8.910021781921387,
8.06331729888916,
8.161259651184082,
6.673550128936768,
7.395747661590576,
6.544902801513672,
7.371769428253174,
8.319907188415527,
6.7722697257995605,
7.3024749755859375,
8.515557289123535,
7.880080699920654,
10.560447692871094,
8.548553466796875,
8.010724067687988,
8.251697540283203,
9.363635063171387,
10.383763313293457,
8.954550743103027,
7.073766708374023,
7.3394365310668945,
7.901332855224609,
5.292531967163086,
7.994369983673096,
7.169919967651367,
8.937761306762695,
7.052704334259033,
7.712167739868164,
6.639589786529541,
4.640880584716797,
6.953775882720947,
7.011972427368164,
6.708223342895508,
7.55882453918457,
7.379924774169922,
7.388876438140869,
6.607176303863525,
6.295664310455322,
6.873457431793213,
6.685941219329834,
7.678892612457275,
6.277175426483154,
6.82502555847168,
6.493975639343262,
5.599217414855957,
2.995514392852783,
4.2061686515808105,
5.388845443725586,
6.046504497528076,
6.199982643127441,
7.248841285705566,
6.691074848175049,
5.309595108032227,
2.932786226272583,
2.7796411514282227,
6.531428813934326,
3.3787026405334473,
6.607399940490723,
5.987377643585205,
5.107828617095947,
6.891719818115234,
6.07973575592041,
5.89137077331543,
4.002294540405273,
4.991847991943359,
6.058988571166992,
6.652078628540039,
5.368412017822266,
6.383002758026123,
5.716227054595947,
5.6958794593811035,
5.975515842437744,
6.719594955444336,
2.4692540168762207,
2.7202696800231934,
2.5226945877075195,
2.80049729347229,
2.9589719772338867,
2.723951816558838,
2.7555041313171387,
2.871811866760254,
2.7948708534240723,
2.851465940475464,
2.599896192550659,
2.8908488750457764,
2.9524176120758057
]
},
"verification": {
"all_ternary": true,
"failed_layers": []
},
"perplexity": 3012.731150040198
}