report almost done

This commit is contained in:
Claudio Maggioni 2023-04-19 16:53:00 +02:00
parent 59904646fa
commit 302f53f5ea
21 changed files with 1159 additions and 502 deletions

View file

@ -1,45 +1,45 @@
,k_means,hierarchical ,k_means,hierarchical
2,0.7008424223503156,0.3958383820498147 2,0.3958383820498147,0.7008424223503156
3,0.5710705368479275,0.4083355324423938 3,0.4083355324423938,0.5710705368479275
4,0.5612355754261723,0.4894345431495262 4,0.4894345431495262,0.5612355754261723
5,0.45447105698494905,0.49390485171067744 5,0.49390485171067744,0.45447105698494905
6,0.4542950961743021,0.49437178337314974 6,0.49437178337314974,0.4542950961743021
7,0.5169337345938171,0.4996628258355101 7,0.4996628258355101,0.5169337345938171
8,0.5048012323625627,0.504680719000111 8,0.504680719000111,0.5048012323625627
9,0.4981437021345769,0.5104029882614454 9,0.5104029882614454,0.4981437021345769
10,0.514873610056946,0.39391549620101274 10,0.39391549620101274,0.514873610056946
11,0.4397616290614397,0.35593829934237226 11,0.35593829934237226,0.4397616290614397
12,0.3966368345309925,0.3965649809723018 12,0.3965649809723018,0.3966368345309925
13,0.40515142998089104,0.4035942512051252 13,0.4035942512051252,0.40515142998089104
14,0.40783453521401053,0.41018624058063885 14,0.41018624058063885,0.40783453521401053
15,0.4239033913796109,0.45557751119565765 15,0.45557751119565765,0.4239033913796109
16,0.42065530265413026,0.47640709656766556 16,0.47640709656766556,0.42065530265413026
17,0.44344469866152514,0.4974425160835303 17,0.4974425160835303,0.44344469866152514
18,0.4400719065542468,0.5290487299051633 18,0.5290487299051633,0.4400719065542468
19,0.44608395823875535,0.5485454650471248 19,0.5485454650471248,0.44608395823875535
20,0.44877269935654723,0.5586056973417746 20,0.5586056973417746,0.44877269935654723
21,0.48118392208651517,0.5385866967307906 21,0.5385866967307906,0.48118392208651517
22,0.48389798035280496,0.538222592035968 22,0.538222592035968,0.48389798035280496
23,0.48663428414368126,0.550727295003801 23,0.550727295003801,0.48663428414368126
24,0.5087496231379599,0.5729072600132372 24,0.5729072600132372,0.5087496231379599
25,0.5308958702007723,0.5954078415061489 25,0.5954078415061489,0.5308958702007723
26,0.533742178035476,0.6182602907647171 26,0.6182602907647171,0.533742178035476
27,0.5366335268898433,0.6415000474402278 27,0.6415000474402278,0.5366335268898433
28,0.5688721496510291,0.6464201697751911 28,0.6464201697751911,0.5688721496510291
29,0.5718756117789308,0.6701951689242575 29,0.6701951689242575,0.5718756117789308
30,0.5749678644659783,0.6738663960033637 30,0.6738663960033637,0.5749678644659783
31,0.5975986067541601,0.6776704976739869 31,0.6776704976739869,0.5975986067541601
32,0.6010454124801283,0.6759936834928909 32,0.6759936834928909,0.6010454124801283
33,0.6047324451505658,0.6780082327270405 33,0.6780082327270405,0.6047324451505658
34,0.6087467116081876,0.6819745883778254 34,0.6819745883778254,0.6087467116081876
35,0.613131689815019,0.6860900076219251 35,0.6860900076219251,0.613131689815019
36,0.6386708325196511,0.6921336553243742 36,0.6921336553243742,0.6386708325196511
37,0.6449490032291169,0.6964066920515507 37,0.6964066920515507,0.6449490032291169
38,0.6764810977640761,0.6932020971027025 38,0.6932020971027025,0.6764810977640761
39,0.6773895830074159,0.6977143227629022 39,0.6977143227629022,0.6773895830074159
40,0.6918179479278735,0.7024070374495096 40,0.7024070374495096,0.6918179479278735
41,0.6964034645667346,0.7072912915110808 41,0.7072912915110808,0.6964034645667346
42,0.7024471122691838,0.712379056158551 42,0.712379056158551,0.7024471122691838
43,0.7256701207957181,0.7176833214293175 43,0.7176833214293175,0.7256701207957181
44,0.7230610997944976,0.7232182069292477 44,0.7232182069292477,0.7230610997944976
45,0.7289990873402858,0.7289990873402857 45,0.7289990873402857,0.7289990873402858

1 k_means hierarchical
2 2 0.7008424223503156 0.3958383820498147 0.3958383820498147 0.7008424223503156
3 3 0.5710705368479275 0.4083355324423938 0.4083355324423938 0.5710705368479275
4 4 0.5612355754261723 0.4894345431495262 0.4894345431495262 0.5612355754261723
5 5 0.45447105698494905 0.49390485171067744 0.49390485171067744 0.45447105698494905
6 6 0.4542950961743021 0.49437178337314974 0.49437178337314974 0.4542950961743021
7 7 0.5169337345938171 0.4996628258355101 0.4996628258355101 0.5169337345938171
8 8 0.5048012323625627 0.504680719000111 0.504680719000111 0.5048012323625627
9 9 0.4981437021345769 0.5104029882614454 0.5104029882614454 0.4981437021345769
10 10 0.514873610056946 0.39391549620101274 0.39391549620101274 0.514873610056946
11 11 0.4397616290614397 0.35593829934237226 0.35593829934237226 0.4397616290614397
12 12 0.3966368345309925 0.3965649809723018 0.3965649809723018 0.3966368345309925
13 13 0.40515142998089104 0.4035942512051252 0.4035942512051252 0.40515142998089104
14 14 0.40783453521401053 0.41018624058063885 0.41018624058063885 0.40783453521401053
15 15 0.4239033913796109 0.45557751119565765 0.45557751119565765 0.4239033913796109
16 16 0.42065530265413026 0.47640709656766556 0.47640709656766556 0.42065530265413026
17 17 0.44344469866152514 0.4974425160835303 0.4974425160835303 0.44344469866152514
18 18 0.4400719065542468 0.5290487299051633 0.5290487299051633 0.4400719065542468
19 19 0.44608395823875535 0.5485454650471248 0.5485454650471248 0.44608395823875535
20 20 0.44877269935654723 0.5586056973417746 0.5586056973417746 0.44877269935654723
21 21 0.48118392208651517 0.5385866967307906 0.5385866967307906 0.48118392208651517
22 22 0.48389798035280496 0.538222592035968 0.538222592035968 0.48389798035280496
23 23 0.48663428414368126 0.550727295003801 0.550727295003801 0.48663428414368126
24 24 0.5087496231379599 0.5729072600132372 0.5729072600132372 0.5087496231379599
25 25 0.5308958702007723 0.5954078415061489 0.5954078415061489 0.5308958702007723
26 26 0.533742178035476 0.6182602907647171 0.6182602907647171 0.533742178035476
27 27 0.5366335268898433 0.6415000474402278 0.6415000474402278 0.5366335268898433
28 28 0.5688721496510291 0.6464201697751911 0.6464201697751911 0.5688721496510291
29 29 0.5718756117789308 0.6701951689242575 0.6701951689242575 0.5718756117789308
30 30 0.5749678644659783 0.6738663960033637 0.6738663960033637 0.5749678644659783
31 31 0.5975986067541601 0.6776704976739869 0.6776704976739869 0.5975986067541601
32 32 0.6010454124801283 0.6759936834928909 0.6759936834928909 0.6010454124801283
33 33 0.6047324451505658 0.6780082327270405 0.6780082327270405 0.6047324451505658
34 34 0.6087467116081876 0.6819745883778254 0.6819745883778254 0.6087467116081876
35 35 0.613131689815019 0.6860900076219251 0.6860900076219251 0.613131689815019
36 36 0.6386708325196511 0.6921336553243742 0.6921336553243742 0.6386708325196511
37 37 0.6449490032291169 0.6964066920515507 0.6964066920515507 0.6449490032291169
38 38 0.6764810977640761 0.6932020971027025 0.6932020971027025 0.6764810977640761
39 39 0.6773895830074159 0.6977143227629022 0.6977143227629022 0.6773895830074159
40 40 0.6918179479278735 0.7024070374495096 0.7024070374495096 0.6918179479278735
41 41 0.6964034645667346 0.7072912915110808 0.7072912915110808 0.6964034645667346
42 42 0.7024471122691838 0.712379056158551 0.712379056158551 0.7024471122691838
43 43 0.7256701207957181 0.7176833214293175 0.7176833214293175 0.7256701207957181
44 44 0.7230610997944976 0.7232182069292477 0.7232182069292477 0.7230610997944976
45 45 0.7289990873402858 0.7289990873402857 0.7289990873402857 0.7289990873402858

View file

@ -0,0 +1,89 @@
,algorithm,k,min,mean,max
0,hierarchical,2,1,58.5,116
1,k_means,2,1,58.5,116
2,hierarchical,3,1,39.0,113
3,k_means,3,1,39.0,115
4,hierarchical,4,1,29.25,113
5,k_means,4,1,29.25,98
6,hierarchical,5,1,23.4,111
7,k_means,5,1,23.4,98
8,hierarchical,6,1,19.5,111
9,k_means,6,1,19.5,98
10,hierarchical,7,1,16.714285714285715,97
11,k_means,7,1,16.714285714285715,98
12,hierarchical,8,1,14.625,97
13,k_means,8,1,14.625,98
14,hierarchical,9,1,13.0,96
15,k_means,9,1,13.0,97
16,hierarchical,10,1,11.7,96
17,k_means,10,1,11.7,92
18,hierarchical,11,1,10.636363636363637,93
19,k_means,11,1,10.636363636363637,89
20,hierarchical,12,1,9.75,86
21,k_means,12,1,9.75,84
22,hierarchical,13,1,9.0,84
23,k_means,13,1,9.0,83
24,hierarchical,14,1,8.357142857142858,84
25,k_means,14,1,8.357142857142858,83
26,hierarchical,15,1,7.8,84
27,k_means,15,1,7.8,77
28,hierarchical,16,1,7.3125,84
29,k_means,16,1,7.3125,75
30,hierarchical,17,1,6.882352941176471,79
31,k_means,17,1,6.882352941176471,73
32,hierarchical,18,1,6.5,79
33,k_means,18,1,6.5,70
34,hierarchical,19,1,6.157894736842105,46
35,k_means,19,1,6.157894736842105,70
36,hierarchical,20,1,5.85,46
37,k_means,20,1,5.85,70
38,hierarchical,21,1,5.571428571428571,46
39,k_means,21,1,5.571428571428571,70
40,hierarchical,22,1,5.318181818181818,46
41,k_means,22,1,5.318181818181818,70
42,hierarchical,23,1,5.086956521739131,46
43,k_means,23,1,5.086956521739131,68
44,hierarchical,24,1,4.875,46
45,k_means,24,1,4.875,66
46,hierarchical,25,1,4.68,46
47,k_means,25,1,4.68,64
48,hierarchical,26,1,4.5,46
49,k_means,26,1,4.5,62
50,hierarchical,27,1,4.333333333333333,46
51,k_means,27,1,4.333333333333333,60
52,hierarchical,28,1,4.178571428571429,46
53,k_means,28,1,4.178571428571429,60
54,hierarchical,29,1,4.0344827586206895,46
55,k_means,29,1,4.0344827586206895,58
56,hierarchical,30,1,3.9,46
57,k_means,30,1,3.9,57
58,hierarchical,31,1,3.774193548387097,46
59,k_means,31,1,3.774193548387097,56
60,hierarchical,32,1,3.65625,46
61,k_means,32,1,3.65625,56
62,hierarchical,33,1,3.5454545454545454,46
63,k_means,33,1,3.5454545454545454,56
64,hierarchical,34,1,3.4411764705882355,46
65,k_means,34,1,3.4411764705882355,55
66,hierarchical,35,1,3.342857142857143,46
67,k_means,35,1,3.342857142857143,54
68,hierarchical,36,1,3.25,46
69,k_means,36,1,3.25,54
70,hierarchical,37,1,3.1621621621621623,46
71,k_means,37,1,3.1621621621621623,53
72,hierarchical,38,1,3.0789473684210527,46
73,k_means,38,1,3.0789473684210527,53
74,hierarchical,39,1,3.0,46
75,k_means,39,1,3.0,52
76,hierarchical,40,1,2.925,46
77,k_means,40,1,2.925,51
78,hierarchical,41,1,2.8536585365853657,46
79,k_means,41,1,2.8536585365853657,50
80,hierarchical,42,1,2.7857142857142856,46
81,k_means,42,1,2.7857142857142856,49
82,hierarchical,43,1,2.7209302325581395,46
83,k_means,43,1,2.7209302325581395,48
84,hierarchical,44,1,2.659090909090909,46
85,k_means,44,1,2.659090909090909,47
86,hierarchical,45,1,2.6,46
87,k_means,45,1,2.6,46
1 algorithm k min mean max
2 0 hierarchical 2 1 58.5 116
3 1 k_means 2 1 58.5 116
4 2 hierarchical 3 1 39.0 113
5 3 k_means 3 1 39.0 115
6 4 hierarchical 4 1 29.25 113
7 5 k_means 4 1 29.25 98
8 6 hierarchical 5 1 23.4 111
9 7 k_means 5 1 23.4 98
10 8 hierarchical 6 1 19.5 111
11 9 k_means 6 1 19.5 98
12 10 hierarchical 7 1 16.714285714285715 97
13 11 k_means 7 1 16.714285714285715 98
14 12 hierarchical 8 1 14.625 97
15 13 k_means 8 1 14.625 98
16 14 hierarchical 9 1 13.0 96
17 15 k_means 9 1 13.0 97
18 16 hierarchical 10 1 11.7 96
19 17 k_means 10 1 11.7 92
20 18 hierarchical 11 1 10.636363636363637 93
21 19 k_means 11 1 10.636363636363637 89
22 20 hierarchical 12 1 9.75 86
23 21 k_means 12 1 9.75 84
24 22 hierarchical 13 1 9.0 84
25 23 k_means 13 1 9.0 83
26 24 hierarchical 14 1 8.357142857142858 84
27 25 k_means 14 1 8.357142857142858 83
28 26 hierarchical 15 1 7.8 84
29 27 k_means 15 1 7.8 77
30 28 hierarchical 16 1 7.3125 84
31 29 k_means 16 1 7.3125 75
32 30 hierarchical 17 1 6.882352941176471 79
33 31 k_means 17 1 6.882352941176471 73
34 32 hierarchical 18 1 6.5 79
35 33 k_means 18 1 6.5 70
36 34 hierarchical 19 1 6.157894736842105 46
37 35 k_means 19 1 6.157894736842105 70
38 36 hierarchical 20 1 5.85 46
39 37 k_means 20 1 5.85 70
40 38 hierarchical 21 1 5.571428571428571 46
41 39 k_means 21 1 5.571428571428571 70
42 40 hierarchical 22 1 5.318181818181818 46
43 41 k_means 22 1 5.318181818181818 70
44 42 hierarchical 23 1 5.086956521739131 46
45 43 k_means 23 1 5.086956521739131 68
46 44 hierarchical 24 1 4.875 46
47 45 k_means 24 1 4.875 66
48 46 hierarchical 25 1 4.68 46
49 47 k_means 25 1 4.68 64
50 48 hierarchical 26 1 4.5 46
51 49 k_means 26 1 4.5 62
52 50 hierarchical 27 1 4.333333333333333 46
53 51 k_means 27 1 4.333333333333333 60
54 52 hierarchical 28 1 4.178571428571429 46
55 53 k_means 28 1 4.178571428571429 60
56 54 hierarchical 29 1 4.0344827586206895 46
57 55 k_means 29 1 4.0344827586206895 58
58 56 hierarchical 30 1 3.9 46
59 57 k_means 30 1 3.9 57
60 58 hierarchical 31 1 3.774193548387097 46
61 59 k_means 31 1 3.774193548387097 56
62 60 hierarchical 32 1 3.65625 46
63 61 k_means 32 1 3.65625 56
64 62 hierarchical 33 1 3.5454545454545454 46
65 63 k_means 33 1 3.5454545454545454 56
66 64 hierarchical 34 1 3.4411764705882355 46
67 65 k_means 34 1 3.4411764705882355 55
68 66 hierarchical 35 1 3.342857142857143 46
69 67 k_means 35 1 3.342857142857143 54
70 68 hierarchical 36 1 3.25 46
71 69 k_means 36 1 3.25 54
72 70 hierarchical 37 1 3.1621621621621623 46
73 71 k_means 37 1 3.1621621621621623 53
74 72 hierarchical 38 1 3.0789473684210527 46
75 73 k_means 38 1 3.0789473684210527 53
76 74 hierarchical 39 1 3.0 46
77 75 k_means 39 1 3.0 52
78 76 hierarchical 40 1 2.925 46
79 77 k_means 40 1 2.925 51
80 78 hierarchical 41 1 2.8536585365853657 46
81 79 k_means 41 1 2.8536585365853657 50
82 80 hierarchical 42 1 2.7857142857142856 46
83 81 k_means 42 1 2.7857142857142856 49
84 82 hierarchical 43 1 2.7209302325581395 46
85 83 k_means 43 1 2.7209302325581395 48
86 84 hierarchical 44 1 2.659090909090909 46
87 85 k_means 44 1 2.659090909090909 47
88 86 hierarchical 45 1 2.6 46
89 87 k_means 45 1 2.6 46

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

View file

@ -1,92 +1,92 @@
,cluster ,cluster
getGrammarDescription,5 getGrammarDescription,0
getElementDeclIsExternal,0 getElementDeclIsExternal,0
getAttributeDeclIsExternal,0 getAttributeDeclIsExternal,0
getAttributeDeclIndex,42 getAttributeDeclIndex,0
startDTD,2 startDTD,0
startParameterEntity,6 startParameterEntity,0
startExternalSubset,41 startExternalSubset,0
endParameterEntity,6 endParameterEntity,0
endExternalSubset,41 endExternalSubset,0
elementDecl,31 elementDecl,1
attributeDecl,39 attributeDecl,1
internalEntityDecl,40 internalEntityDecl,0
externalEntityDecl,40 externalEntityDecl,0
unparsedEntityDecl,40 unparsedEntityDecl,0
notationDecl,38 notationDecl,0
endDTD,8 endDTD,0
setDTDSource,23 setDTDSource,0
getDTDSource,23 getDTDSource,0
textDecl,5 textDecl,0
comment,5 comment,0
processingInstruction,5 processingInstruction,0
startAttlist,5 startAttlist,0
endAttlist,5 endAttlist,0
startConditional,5 startConditional,0
ignoredCharacters,5 ignoredCharacters,0
endConditional,5 endConditional,0
setDTDContentModelSource,11 setDTDContentModelSource,0
getDTDContentModelSource,11 getDTDContentModelSource,0
startContentModel,33 startContentModel,0
startGroup,53 startGroup,0
pcdata,51 pcdata,0
element,34 element,0
separator,44 separator,0
occurrence,32 occurrence,0
endGroup,35 endGroup,0
any,5 any,0
empty,5 empty,0
endContentModel,5 endContentModel,0
isNamespaceAware,5 isNamespaceAware,0
getSymbolTable,52 getSymbolTable,0
getFirstElementDeclIndex,7 getFirstElementDeclIndex,0
getNextElementDeclIndex,7 getNextElementDeclIndex,0
getElementDeclIndex,55 getElementDeclIndex,0
getContentSpecType,37 getContentSpecType,0
getElementDecl,54 getElementDecl,0
getElementDeclName,8 getElementDeclName,0
getFirstAttributeDeclIndex,20 getFirstAttributeDeclIndex,0
getNextAttributeDeclIndex,57 getNextAttributeDeclIndex,0
getAttributeDecl,28 getAttributeDecl,0
isCDATAAttribute,47 isCDATAAttribute,0
getEntityDeclIndex,5 getEntityDeclIndex,0
getEntityDecl,1 getEntityDecl,0
getNotationDeclIndex,5 getNotationDeclIndex,0
getNotationDecl,10 getNotationDecl,0
getContentSpec,9 getContentSpec,0
getContentSpecIndex,19 getContentSpecIndex,0
getContentSpecAsString,43 getContentSpecAsString,0
printElements,50 printElements,0
printAttributes,49 printAttributes,0
addContentSpecToElement,29 addContentSpecToElement,0
getElementContentModelValidator,25 getElementContentModelValidator,0
createElementDecl,36 createElementDecl,0
setElementDecl,17 setElementDecl,0
putElementNameMapping,5 putElementNameMapping,0
setFirstAttributeDeclIndex,20 setFirstAttributeDeclIndex,0
setContentSpecIndex,19 setContentSpecIndex,0
createAttributeDecl,18 createAttributeDecl,0
setAttributeDecl,48 setAttributeDecl,0
createContentSpec,21 createContentSpec,0
setContentSpec,9 setContentSpec,0
createEntityDecl,16 createEntityDecl,0
setEntityDecl,1 setEntityDecl,0
createNotationDecl,56 createNotationDecl,0
setNotationDecl,10 setNotationDecl,0
addContentSpecNode,4 addContentSpecNode,0
addUniqueLeafNode,4 addUniqueLeafNode,0
initializeContentModelStack,2 initializeContentModelStack,0
isImmutable,24 isImmutable,0
appendContentSpec,45 appendContentSpec,0
printAttribute,26 printAttribute,0
createChildModel,3 createChildModel,0
buildSyntaxTree,3 buildSyntaxTree,0
contentSpecTree,46 contentSpecTree,0
ensureElementDeclCapacity,27 ensureElementDeclCapacity,0
ensureAttributeDeclCapacity,30 ensureAttributeDeclCapacity,0
ensureEntityDeclCapacity,14 ensureEntityDeclCapacity,0
ensureNotationDeclCapacity,13 ensureNotationDeclCapacity,0
ensureContentSpecCapacity,12 ensureContentSpecCapacity,0
resize,5 resize,0
isEntityDeclared,22 isEntityDeclared,0
isEntityUnparsed,15 isEntityUnparsed,0

1 cluster
2 getGrammarDescription 5 0
3 getElementDeclIsExternal 0
4 getAttributeDeclIsExternal 0
5 getAttributeDeclIndex 42 0
6 startDTD 2 0
7 startParameterEntity 6 0
8 startExternalSubset 41 0
9 endParameterEntity 6 0
10 endExternalSubset 41 0
11 elementDecl 31 1
12 attributeDecl 39 1
13 internalEntityDecl 40 0
14 externalEntityDecl 40 0
15 unparsedEntityDecl 40 0
16 notationDecl 38 0
17 endDTD 8 0
18 setDTDSource 23 0
19 getDTDSource 23 0
20 textDecl 5 0
21 comment 5 0
22 processingInstruction 5 0
23 startAttlist 5 0
24 endAttlist 5 0
25 startConditional 5 0
26 ignoredCharacters 5 0
27 endConditional 5 0
28 setDTDContentModelSource 11 0
29 getDTDContentModelSource 11 0
30 startContentModel 33 0
31 startGroup 53 0
32 pcdata 51 0
33 element 34 0
34 separator 44 0
35 occurrence 32 0
36 endGroup 35 0
37 any 5 0
38 empty 5 0
39 endContentModel 5 0
40 isNamespaceAware 5 0
41 getSymbolTable 52 0
42 getFirstElementDeclIndex 7 0
43 getNextElementDeclIndex 7 0
44 getElementDeclIndex 55 0
45 getContentSpecType 37 0
46 getElementDecl 54 0
47 getElementDeclName 8 0
48 getFirstAttributeDeclIndex 20 0
49 getNextAttributeDeclIndex 57 0
50 getAttributeDecl 28 0
51 isCDATAAttribute 47 0
52 getEntityDeclIndex 5 0
53 getEntityDecl 1 0
54 getNotationDeclIndex 5 0
55 getNotationDecl 10 0
56 getContentSpec 9 0
57 getContentSpecIndex 19 0
58 getContentSpecAsString 43 0
59 printElements 50 0
60 printAttributes 49 0
61 addContentSpecToElement 29 0
62 getElementContentModelValidator 25 0
63 createElementDecl 36 0
64 setElementDecl 17 0
65 putElementNameMapping 5 0
66 setFirstAttributeDeclIndex 20 0
67 setContentSpecIndex 19 0
68 createAttributeDecl 18 0
69 setAttributeDecl 48 0
70 createContentSpec 21 0
71 setContentSpec 9 0
72 createEntityDecl 16 0
73 setEntityDecl 1 0
74 createNotationDecl 56 0
75 setNotationDecl 10 0
76 addContentSpecNode 4 0
77 addUniqueLeafNode 4 0
78 initializeContentModelStack 2 0
79 isImmutable 24 0
80 appendContentSpec 45 0
81 printAttribute 26 0
82 createChildModel 3 0
83 buildSyntaxTree 3 0
84 contentSpecTree 46 0
85 ensureElementDeclCapacity 27 0
86 ensureAttributeDeclCapacity 30 0
87 ensureEntityDeclCapacity 14 0
88 ensureNotationDeclCapacity 13 0
89 ensureContentSpecCapacity 12 0
90 resize 5 0
91 isEntityDeclared 22 0
92 isEntityUnparsed 15 0

View file

@ -1,92 +1,92 @@
,cluster ,cluster
getGrammarDescription,1 getGrammarDescription,6
getElementDeclIsExternal,0 getElementDeclIsExternal,50
getAttributeDeclIsExternal,0 getAttributeDeclIsExternal,43
getAttributeDeclIndex,1 getAttributeDeclIndex,36
startDTD,1 startDTD,13
startParameterEntity,1 startParameterEntity,23
startExternalSubset,1 startExternalSubset,54
endParameterEntity,1 endParameterEntity,23
endExternalSubset,1 endExternalSubset,54
elementDecl,0 elementDecl,5
attributeDecl,0 attributeDecl,4
internalEntityDecl,1 internalEntityDecl,17
externalEntityDecl,1 externalEntityDecl,17
unparsedEntityDecl,1 unparsedEntityDecl,17
notationDecl,1 notationDecl,40
endDTD,0 endDTD,29
setDTDSource,1 setDTDSource,53
getDTDSource,1 getDTDSource,53
textDecl,1 textDecl,6
comment,1 comment,6
processingInstruction,1 processingInstruction,6
startAttlist,1 startAttlist,6
endAttlist,1 endAttlist,6
startConditional,1 startConditional,6
ignoredCharacters,1 ignoredCharacters,6
endConditional,1 endConditional,6
setDTDContentModelSource,1 setDTDContentModelSource,51
getDTDContentModelSource,1 getDTDContentModelSource,51
startContentModel,1 startContentModel,28
startGroup,1 startGroup,39
pcdata,1 pcdata,52
element,1 element,2
separator,1 separator,49
occurrence,1 occurrence,24
endGroup,1 endGroup,27
any,1 any,6
empty,1 empty,6
endContentModel,1 endContentModel,6
isNamespaceAware,1 isNamespaceAware,6
getSymbolTable,1 getSymbolTable,56
getFirstElementDeclIndex,1 getFirstElementDeclIndex,47
getNextElementDeclIndex,1 getNextElementDeclIndex,47
getElementDeclIndex,1 getElementDeclIndex,57
getContentSpecType,0 getContentSpecType,38
getElementDecl,0 getElementDecl,0
getElementDeclName,0 getElementDeclName,29
getFirstAttributeDeclIndex,0 getFirstAttributeDeclIndex,3
getNextAttributeDeclIndex,0 getNextAttributeDeclIndex,46
getAttributeDecl,0 getAttributeDecl,25
isCDATAAttribute,1 isCDATAAttribute,44
getEntityDeclIndex,1 getEntityDeclIndex,6
getEntityDecl,0 getEntityDecl,8
getNotationDeclIndex,1 getNotationDeclIndex,6
getNotationDecl,0 getNotationDecl,10
getContentSpec,0 getContentSpec,41
getContentSpecIndex,0 getContentSpecIndex,12
getContentSpecAsString,0 getContentSpecAsString,37
printElements,1 printElements,55
printAttributes,1 printAttributes,35
addContentSpecToElement,1 addContentSpecToElement,20
getElementContentModelValidator,0 getElementContentModelValidator,21
createElementDecl,0 createElementDecl,33
setElementDecl,0 setElementDecl,16
putElementNameMapping,1 putElementNameMapping,6
setFirstAttributeDeclIndex,0 setFirstAttributeDeclIndex,3
setContentSpecIndex,0 setContentSpecIndex,12
createAttributeDecl,0 createAttributeDecl,19
setAttributeDecl,0 setAttributeDecl,7
createContentSpec,0 createContentSpec,41
setContentSpec,0 setContentSpec,9
createEntityDecl,0 createEntityDecl,31
setEntityDecl,0 setEntityDecl,8
createNotationDecl,1 createNotationDecl,32
setNotationDecl,0 setNotationDecl,10
addContentSpecNode,1 addContentSpecNode,18
addUniqueLeafNode,1 addUniqueLeafNode,18
initializeContentModelStack,1 initializeContentModelStack,13
isImmutable,1 isImmutable,6
appendContentSpec,1 appendContentSpec,42
printAttribute,1 printAttribute,1
createChildModel,1 createChildModel,11
buildSyntaxTree,1 buildSyntaxTree,11
contentSpecTree,1 contentSpecTree,30
ensureElementDeclCapacity,1 ensureElementDeclCapacity,15
ensureAttributeDeclCapacity,1 ensureAttributeDeclCapacity,14
ensureEntityDeclCapacity,1 ensureEntityDeclCapacity,22
ensureNotationDeclCapacity,1 ensureNotationDeclCapacity,26
ensureContentSpecCapacity,1 ensureContentSpecCapacity,34
resize,1 resize,6
isEntityDeclared,1 isEntityDeclared,48
isEntityUnparsed,0 isEntityUnparsed,45

1 cluster
2 getGrammarDescription 1 6
3 getElementDeclIsExternal 0 50
4 getAttributeDeclIsExternal 0 43
5 getAttributeDeclIndex 1 36
6 startDTD 1 13
7 startParameterEntity 1 23
8 startExternalSubset 1 54
9 endParameterEntity 1 23
10 endExternalSubset 1 54
11 elementDecl 0 5
12 attributeDecl 0 4
13 internalEntityDecl 1 17
14 externalEntityDecl 1 17
15 unparsedEntityDecl 1 17
16 notationDecl 1 40
17 endDTD 0 29
18 setDTDSource 1 53
19 getDTDSource 1 53
20 textDecl 1 6
21 comment 1 6
22 processingInstruction 1 6
23 startAttlist 1 6
24 endAttlist 1 6
25 startConditional 1 6
26 ignoredCharacters 1 6
27 endConditional 1 6
28 setDTDContentModelSource 1 51
29 getDTDContentModelSource 1 51
30 startContentModel 1 28
31 startGroup 1 39
32 pcdata 1 52
33 element 1 2
34 separator 1 49
35 occurrence 1 24
36 endGroup 1 27
37 any 1 6
38 empty 1 6
39 endContentModel 1 6
40 isNamespaceAware 1 6
41 getSymbolTable 1 56
42 getFirstElementDeclIndex 1 47
43 getNextElementDeclIndex 1 47
44 getElementDeclIndex 1 57
45 getContentSpecType 0 38
46 getElementDecl 0
47 getElementDeclName 0 29
48 getFirstAttributeDeclIndex 0 3
49 getNextAttributeDeclIndex 0 46
50 getAttributeDecl 0 25
51 isCDATAAttribute 1 44
52 getEntityDeclIndex 1 6
53 getEntityDecl 0 8
54 getNotationDeclIndex 1 6
55 getNotationDecl 0 10
56 getContentSpec 0 41
57 getContentSpecIndex 0 12
58 getContentSpecAsString 0 37
59 printElements 1 55
60 printAttributes 1 35
61 addContentSpecToElement 1 20
62 getElementContentModelValidator 0 21
63 createElementDecl 0 33
64 setElementDecl 0 16
65 putElementNameMapping 1 6
66 setFirstAttributeDeclIndex 0 3
67 setContentSpecIndex 0 12
68 createAttributeDecl 0 19
69 setAttributeDecl 0 7
70 createContentSpec 0 41
71 setContentSpec 0 9
72 createEntityDecl 0 31
73 setEntityDecl 0 8
74 createNotationDecl 1 32
75 setNotationDecl 0 10
76 addContentSpecNode 1 18
77 addUniqueLeafNode 1 18
78 initializeContentModelStack 1 13
79 isImmutable 1 6
80 appendContentSpec 1 42
81 printAttribute 1
82 createChildModel 1 11
83 buildSyntaxTree 1 11
84 contentSpecTree 1 30
85 ensureElementDeclCapacity 1 15
86 ensureAttributeDeclCapacity 1 14
87 ensureEntityDeclCapacity 1 22
88 ensureNotationDeclCapacity 1 26
89 ensureContentSpecCapacity 1 34
90 resize 1 6
91 isEntityDeclared 1 48
92 isEntityUnparsed 0 45

View file

@ -1,64 +1,64 @@
,k_means,hierarchical ,k_means,hierarchical
2,0.43549549160206547,0.22916634455195753 2,0.22916634455195753,0.43549549160206547
3,0.3737398924595095,0.2246280732293034 3,0.2246280732293034,0.3737398924595095
4,0.3557451009153901,0.22489420158108267 4,0.22489420158108267,0.3557451009153901
5,0.23295505680144496,0.23659327576115802 5,0.23659327576115802,0.23295505680144496
6,0.262133112331066,0.1944787865029721 6,0.1944787865029721,0.262133112331066
7,0.2578980101543562,0.14449036253228517 7,0.14449036253228517,0.2578980101543562
8,0.2549368125378225,0.14148366678653188 8,0.14148366678653188,0.2549368125378225
9,0.2774793093993747,0.13842552961645824 9,0.13842552961645824,0.2774793093993747
10,0.29633149188806335,0.17251507022640497 10,0.17251507022640497,0.29633149188806335
11,0.28457149559807815,0.20347568890084347 11,0.20347568890084347,0.28457149559807815
12,0.2774764884391462,0.23906895503283213 12,0.23906895503283213,0.2774764884391462
13,0.2807117319594596,0.2433263434151139 13,0.2433263434151139,0.2807117319594596
14,0.2756438988231549,0.2378679295617759 14,0.2378679295617759,0.2756438988231549
15,0.2725133030686268,0.23691994972126937 15,0.23691994972126937,0.2725133030686268
16,0.26609972785171476,0.23116431400607626 16,0.23116431400607626,0.26609972785171476
17,0.2622978716191777,0.250626112587838 17,0.250626112587838,0.2622978716191777
18,0.2599277555662332,0.25367962227891766 18,0.25367962227891766,0.2599277555662332
19,0.2627008352505403,0.27152241207311917 19,0.27152241207311917,0.2627008352505403
20,0.27904812684322156,0.2937526253744639 20,0.2937526253744639,0.27904812684322156
21,0.2862853638532431,0.29866907908096096 21,0.29866907908096096,0.2862853638532431
22,0.28363618305324206,0.2982900685039696 22,0.2982900685039696,0.28363618305324206
23,0.27298124922178313,0.29267556171442216 23,0.29267556171442216,0.27298124922178313
24,0.2755401967064185,0.30932258932020334 24,0.30932258932020334,0.2755401967064185
25,0.2699256899168711,0.30931433471981734 25,0.30931433471981734,0.2699256899168711
26,0.27327610109462835,0.309284891816073 26,0.309284891816073,0.27327610109462835
27,0.2741779110906256,0.30820922828647973 27,0.30820922828647973,0.2741779110906256
28,0.2772726745209296,0.306394576589556 28,0.306394576589556,0.2772726745209296
29,0.2763152122041744,0.31629054291989955 29,0.31629054291989955,0.2763152122041744
30,0.27822954116587556,0.31889378927031037 30,0.31889378927031037,0.27822954116587556
31,0.2765547788352012,0.31793632695355517 31,0.31793632695355517,0.2765547788352012
32,0.2873045247363621,0.3198803243841521 32,0.3198803243841521,0.2873045247363621
33,0.28417974562649284,0.3322984924566154 33,0.3322984924566154,0.28417974562649284
34,0.2685472504040367,0.33205224383012144 34,0.33205224383012144,0.2685472504040367
35,0.2640970877653046,0.32946433944653786 35,0.32946433944653786,0.2640970877653046
36,0.26594127941463497,0.34064178452545657 36,0.34064178452545657,0.26594127941463497
37,0.2671662834055061,0.34279546744648637 37,0.34279546744648637,0.2671662834055061
38,0.26972862144514015,0.3520414342812306 38,0.3520414342812306,0.26972862144514015
39,0.2745566131731437,0.35684038034252413 39,0.35684038034252413,0.2745566131731437
40,0.3085760240111521,0.34927826706954956 40,0.34927826706954956,0.3085760240111521
41,0.32756637032777863,0.3490443084779255 41,0.3490443084779255,0.32756637032777863
42,0.3310796986888577,0.34372287471805796 42,0.34372287471805796,0.3310796986888577
43,0.32889480000768656,0.32421890240508233 43,0.32421890240508233,0.32889480000768656
44,0.31610864049926274,0.27315698867962007 44,0.27315698867962007,0.31610864049926274
45,0.3140921194105564,0.27051011105427114 45,0.27051011105427114,0.3140921194105564
46,0.3088953240503273,0.29627121773250714 46,0.29627121773250714,0.3088953240503273
47,0.2693097731576138,0.32261382027270064 47,0.32261382027270064,0.2693097731576138
48,0.2809797636777669,0.3299248655060567 48,0.3299248655060567,0.2809797636777669
49,0.29384518410058824,0.3171387059976329 49,0.3171387059976329,0.29384518410058824
50,0.29793575895571417,0.3442080317722919 50,0.3442080317722919,0.29793575895571417
51,0.3025569827442159,0.3408776851426114 51,0.3408776851426114,0.3025569827442159
52,0.32032808958922193,0.3408776851426114 52,0.3408776851426114,0.32032808958922193
53,0.33852852210954587,0.33765907834246356 53,0.33765907834246356,0.33852852210954587
54,0.339541278009214,0.36565310355269914 54,0.36565310355269914,0.339541278009214
55,0.35774171052953796,0.3818328805784584 55,0.3818328805784584,0.35774171052953796
56,0.37594214304986195,0.3866470678901348 56,0.3866470678901348,0.37594214304986195
57,0.4080257854586148,0.3919955336887361 57,0.3919955336887361,0.4080257854586148
58,0.4046954388289342,0.3979724365432809 58,0.3979724365432809,0.4046954388289342
59,0.4046954388289342,0.38857621891133143 59,0.38857621891133143,0.4046954388289342
60,0.3931263574608019,0.3953492191827632 60,0.3953492191827632,0.3931263574608019
61,0.38155727609266954,0.38748610984623766 61,0.38748610984623766,0.38155727609266954
62,0.37132316722174985,0.39516141319506437 62,0.39516141319506437,0.37132316722174985
63,0.36810456042160206,0.3850224051641811 63,0.3850224051641811,0.36810456042160206
64,0.3565354790534698,0.3785851915638855 64,0.3785851915638855,0.3565354790534698

1 k_means hierarchical
2 2 0.43549549160206547 0.22916634455195753 0.22916634455195753 0.43549549160206547
3 3 0.3737398924595095 0.2246280732293034 0.2246280732293034 0.3737398924595095
4 4 0.3557451009153901 0.22489420158108267 0.22489420158108267 0.3557451009153901
5 5 0.23295505680144496 0.23659327576115802 0.23659327576115802 0.23295505680144496
6 6 0.262133112331066 0.1944787865029721 0.1944787865029721 0.262133112331066
7 7 0.2578980101543562 0.14449036253228517 0.14449036253228517 0.2578980101543562
8 8 0.2549368125378225 0.14148366678653188 0.14148366678653188 0.2549368125378225
9 9 0.2774793093993747 0.13842552961645824 0.13842552961645824 0.2774793093993747
10 10 0.29633149188806335 0.17251507022640497 0.17251507022640497 0.29633149188806335
11 11 0.28457149559807815 0.20347568890084347 0.20347568890084347 0.28457149559807815
12 12 0.2774764884391462 0.23906895503283213 0.23906895503283213 0.2774764884391462
13 13 0.2807117319594596 0.2433263434151139 0.2433263434151139 0.2807117319594596
14 14 0.2756438988231549 0.2378679295617759 0.2378679295617759 0.2756438988231549
15 15 0.2725133030686268 0.23691994972126937 0.23691994972126937 0.2725133030686268
16 16 0.26609972785171476 0.23116431400607626 0.23116431400607626 0.26609972785171476
17 17 0.2622978716191777 0.250626112587838 0.250626112587838 0.2622978716191777
18 18 0.2599277555662332 0.25367962227891766 0.25367962227891766 0.2599277555662332
19 19 0.2627008352505403 0.27152241207311917 0.27152241207311917 0.2627008352505403
20 20 0.27904812684322156 0.2937526253744639 0.2937526253744639 0.27904812684322156
21 21 0.2862853638532431 0.29866907908096096 0.29866907908096096 0.2862853638532431
22 22 0.28363618305324206 0.2982900685039696 0.2982900685039696 0.28363618305324206
23 23 0.27298124922178313 0.29267556171442216 0.29267556171442216 0.27298124922178313
24 24 0.2755401967064185 0.30932258932020334 0.30932258932020334 0.2755401967064185
25 25 0.2699256899168711 0.30931433471981734 0.30931433471981734 0.2699256899168711
26 26 0.27327610109462835 0.309284891816073 0.309284891816073 0.27327610109462835
27 27 0.2741779110906256 0.30820922828647973 0.30820922828647973 0.2741779110906256
28 28 0.2772726745209296 0.306394576589556 0.306394576589556 0.2772726745209296
29 29 0.2763152122041744 0.31629054291989955 0.31629054291989955 0.2763152122041744
30 30 0.27822954116587556 0.31889378927031037 0.31889378927031037 0.27822954116587556
31 31 0.2765547788352012 0.31793632695355517 0.31793632695355517 0.2765547788352012
32 32 0.2873045247363621 0.3198803243841521 0.3198803243841521 0.2873045247363621
33 33 0.28417974562649284 0.3322984924566154 0.3322984924566154 0.28417974562649284
34 34 0.2685472504040367 0.33205224383012144 0.33205224383012144 0.2685472504040367
35 35 0.2640970877653046 0.32946433944653786 0.32946433944653786 0.2640970877653046
36 36 0.26594127941463497 0.34064178452545657 0.34064178452545657 0.26594127941463497
37 37 0.2671662834055061 0.34279546744648637 0.34279546744648637 0.2671662834055061
38 38 0.26972862144514015 0.3520414342812306 0.3520414342812306 0.26972862144514015
39 39 0.2745566131731437 0.35684038034252413 0.35684038034252413 0.2745566131731437
40 40 0.3085760240111521 0.34927826706954956 0.34927826706954956 0.3085760240111521
41 41 0.32756637032777863 0.3490443084779255 0.3490443084779255 0.32756637032777863
42 42 0.3310796986888577 0.34372287471805796 0.34372287471805796 0.3310796986888577
43 43 0.32889480000768656 0.32421890240508233 0.32421890240508233 0.32889480000768656
44 44 0.31610864049926274 0.27315698867962007 0.27315698867962007 0.31610864049926274
45 45 0.3140921194105564 0.27051011105427114 0.27051011105427114 0.3140921194105564
46 46 0.3088953240503273 0.29627121773250714 0.29627121773250714 0.3088953240503273
47 47 0.2693097731576138 0.32261382027270064 0.32261382027270064 0.2693097731576138
48 48 0.2809797636777669 0.3299248655060567 0.3299248655060567 0.2809797636777669
49 49 0.29384518410058824 0.3171387059976329 0.3171387059976329 0.29384518410058824
50 50 0.29793575895571417 0.3442080317722919 0.3442080317722919 0.29793575895571417
51 51 0.3025569827442159 0.3408776851426114 0.3408776851426114 0.3025569827442159
52 52 0.32032808958922193 0.3408776851426114 0.3408776851426114 0.32032808958922193
53 53 0.33852852210954587 0.33765907834246356 0.33765907834246356 0.33852852210954587
54 54 0.339541278009214 0.36565310355269914 0.36565310355269914 0.339541278009214
55 55 0.35774171052953796 0.3818328805784584 0.3818328805784584 0.35774171052953796
56 56 0.37594214304986195 0.3866470678901348 0.3866470678901348 0.37594214304986195
57 57 0.4080257854586148 0.3919955336887361 0.3919955336887361 0.4080257854586148
58 58 0.4046954388289342 0.3979724365432809 0.3979724365432809 0.4046954388289342
59 59 0.4046954388289342 0.38857621891133143 0.38857621891133143 0.4046954388289342
60 60 0.3931263574608019 0.3953492191827632 0.3953492191827632 0.3931263574608019
61 61 0.38155727609266954 0.38748610984623766 0.38748610984623766 0.38155727609266954
62 62 0.37132316722174985 0.39516141319506437 0.39516141319506437 0.37132316722174985
63 63 0.36810456042160206 0.3850224051641811 0.3850224051641811 0.36810456042160206
64 64 0.3565354790534698 0.3785851915638855 0.3785851915638855 0.3565354790534698

View file

@ -0,0 +1,127 @@
,algorithm,k,min,mean,max
0,hierarchical,2,2,45.5,89
1,k_means,2,29,45.5,62
2,hierarchical,3,2,30.333333333333332,85
3,k_means,3,3,30.333333333333332,62
4,hierarchical,4,2,22.75,82
5,k_means,4,3,22.75,56
6,hierarchical,5,2,18.2,64
7,k_means,5,3,18.2,55
8,hierarchical,6,2,15.166666666666666,57
9,k_means,6,3,15.166666666666666,51
10,hierarchical,7,1,13.0,57
11,k_means,7,2,13.0,50
12,hierarchical,8,1,11.375,57
13,k_means,8,1,11.375,57
14,hierarchical,9,1,10.11111111111111,57
15,k_means,9,1,10.11111111111111,57
16,hierarchical,10,1,9.1,50
17,k_means,10,1,9.1,51
18,hierarchical,11,1,8.272727272727273,46
19,k_means,11,1,8.272727272727273,50
20,hierarchical,12,1,7.583333333333333,46
21,k_means,12,1,7.583333333333333,47
22,hierarchical,13,1,7.0,46
23,k_means,13,1,7.0,46
24,hierarchical,14,1,6.5,46
25,k_means,14,1,6.5,46
26,hierarchical,15,1,6.066666666666666,46
27,k_means,15,1,6.066666666666666,46
28,hierarchical,16,1,5.6875,46
29,k_means,16,1,5.6875,46
30,hierarchical,17,1,5.352941176470588,39
31,k_means,17,1,5.352941176470588,44
32,hierarchical,18,1,5.055555555555555,39
33,k_means,18,1,5.055555555555555,44
34,hierarchical,19,1,4.7894736842105265,34
35,k_means,19,1,4.7894736842105265,43
36,hierarchical,20,1,4.55,34
37,k_means,20,1,4.55,41
38,hierarchical,21,1,4.333333333333333,34
39,k_means,21,1,4.333333333333333,40
40,hierarchical,22,1,4.136363636363637,34
41,k_means,22,1,4.136363636363637,40
42,hierarchical,23,1,3.9565217391304346,31
43,k_means,23,1,3.9565217391304346,40
44,hierarchical,24,1,3.7916666666666665,31
45,k_means,24,1,3.7916666666666665,38
46,hierarchical,25,1,3.64,31
47,k_means,25,1,3.64,38
48,hierarchical,26,1,3.5,31
49,k_means,26,1,3.5,38
50,hierarchical,27,1,3.3703703703703702,31
51,k_means,27,1,3.3703703703703702,38
52,hierarchical,28,1,3.25,30
53,k_means,28,1,3.25,38
54,hierarchical,29,1,3.1379310344827585,30
55,k_means,29,1,3.1379310344827585,36
56,hierarchical,30,1,3.033333333333333,30
57,k_means,30,1,3.033333333333333,35
58,hierarchical,31,1,2.935483870967742,30
59,k_means,31,1,2.935483870967742,35
60,hierarchical,32,1,2.84375,30
61,k_means,32,1,2.84375,35
62,hierarchical,33,1,2.757575757575758,30
63,k_means,33,1,2.757575757575758,33
64,hierarchical,34,1,2.676470588235294,30
65,k_means,34,1,2.676470588235294,33
66,hierarchical,35,1,2.6,30
67,k_means,35,1,2.6,33
68,hierarchical,36,1,2.5277777777777777,30
69,k_means,36,1,2.5277777777777777,33
70,hierarchical,37,1,2.4594594594594597,30
71,k_means,37,1,2.4594594594594597,33
72,hierarchical,38,1,2.3947368421052633,30
73,k_means,38,1,2.3947368421052633,33
74,hierarchical,39,1,2.3333333333333335,29
75,k_means,39,1,2.3333333333333335,32
76,hierarchical,40,1,2.275,29
77,k_means,40,1,2.275,32
78,hierarchical,41,1,2.2195121951219514,29
79,k_means,41,1,2.2195121951219514,32
80,hierarchical,42,1,2.1666666666666665,29
81,k_means,42,1,2.1666666666666665,32
82,hierarchical,43,1,2.116279069767442,29
83,k_means,43,1,2.116279069767442,31
84,hierarchical,44,1,2.0681818181818183,29
85,k_means,44,1,2.0681818181818183,31
86,hierarchical,45,1,2.022222222222222,29
87,k_means,45,1,2.022222222222222,31
88,hierarchical,46,1,1.9782608695652173,29
89,k_means,46,1,1.9782608695652173,29
90,hierarchical,47,1,1.9361702127659575,28
91,k_means,47,1,1.9361702127659575,27
92,hierarchical,48,1,1.8958333333333333,17
93,k_means,48,1,1.8958333333333333,27
94,hierarchical,49,1,1.8571428571428572,17
95,k_means,49,1,1.8571428571428572,27
96,hierarchical,50,1,1.82,17
97,k_means,50,1,1.82,25
98,hierarchical,51,1,1.7843137254901962,17
99,k_means,51,1,1.7843137254901962,25
100,hierarchical,52,1,1.75,17
101,k_means,52,1,1.75,25
102,hierarchical,53,1,1.7169811320754718,17
103,k_means,53,1,1.7169811320754718,25
104,hierarchical,54,1,1.6851851851851851,17
105,k_means,54,1,1.6851851851851851,23
106,hierarchical,55,1,1.6545454545454545,17
107,k_means,55,1,1.6545454545454545,21
108,hierarchical,56,1,1.625,17
109,k_means,56,1,1.625,20
110,hierarchical,57,1,1.5964912280701755,17
111,k_means,57,1,1.5964912280701755,19
112,hierarchical,58,1,1.5689655172413792,17
113,k_means,58,1,1.5689655172413792,18
114,hierarchical,59,1,1.5423728813559323,17
115,k_means,59,1,1.5423728813559323,18
116,hierarchical,60,1,1.5166666666666666,17
117,k_means,60,1,1.5166666666666666,17
118,hierarchical,61,1,1.4918032786885247,17
119,k_means,61,1,1.4918032786885247,17
120,hierarchical,62,1,1.467741935483871,17
121,k_means,62,1,1.467741935483871,16
122,hierarchical,63,1,1.4444444444444444,17
123,k_means,63,1,1.4444444444444444,16
124,hierarchical,64,1,1.421875,17
125,k_means,64,1,1.421875,16
1 algorithm k min mean max
2 0 hierarchical 2 2 45.5 89
3 1 k_means 2 29 45.5 62
4 2 hierarchical 3 2 30.333333333333332 85
5 3 k_means 3 3 30.333333333333332 62
6 4 hierarchical 4 2 22.75 82
7 5 k_means 4 3 22.75 56
8 6 hierarchical 5 2 18.2 64
9 7 k_means 5 3 18.2 55
10 8 hierarchical 6 2 15.166666666666666 57
11 9 k_means 6 3 15.166666666666666 51
12 10 hierarchical 7 1 13.0 57
13 11 k_means 7 2 13.0 50
14 12 hierarchical 8 1 11.375 57
15 13 k_means 8 1 11.375 57
16 14 hierarchical 9 1 10.11111111111111 57
17 15 k_means 9 1 10.11111111111111 57
18 16 hierarchical 10 1 9.1 50
19 17 k_means 10 1 9.1 51
20 18 hierarchical 11 1 8.272727272727273 46
21 19 k_means 11 1 8.272727272727273 50
22 20 hierarchical 12 1 7.583333333333333 46
23 21 k_means 12 1 7.583333333333333 47
24 22 hierarchical 13 1 7.0 46
25 23 k_means 13 1 7.0 46
26 24 hierarchical 14 1 6.5 46
27 25 k_means 14 1 6.5 46
28 26 hierarchical 15 1 6.066666666666666 46
29 27 k_means 15 1 6.066666666666666 46
30 28 hierarchical 16 1 5.6875 46
31 29 k_means 16 1 5.6875 46
32 30 hierarchical 17 1 5.352941176470588 39
33 31 k_means 17 1 5.352941176470588 44
34 32 hierarchical 18 1 5.055555555555555 39
35 33 k_means 18 1 5.055555555555555 44
36 34 hierarchical 19 1 4.7894736842105265 34
37 35 k_means 19 1 4.7894736842105265 43
38 36 hierarchical 20 1 4.55 34
39 37 k_means 20 1 4.55 41
40 38 hierarchical 21 1 4.333333333333333 34
41 39 k_means 21 1 4.333333333333333 40
42 40 hierarchical 22 1 4.136363636363637 34
43 41 k_means 22 1 4.136363636363637 40
44 42 hierarchical 23 1 3.9565217391304346 31
45 43 k_means 23 1 3.9565217391304346 40
46 44 hierarchical 24 1 3.7916666666666665 31
47 45 k_means 24 1 3.7916666666666665 38
48 46 hierarchical 25 1 3.64 31
49 47 k_means 25 1 3.64 38
50 48 hierarchical 26 1 3.5 31
51 49 k_means 26 1 3.5 38
52 50 hierarchical 27 1 3.3703703703703702 31
53 51 k_means 27 1 3.3703703703703702 38
54 52 hierarchical 28 1 3.25 30
55 53 k_means 28 1 3.25 38
56 54 hierarchical 29 1 3.1379310344827585 30
57 55 k_means 29 1 3.1379310344827585 36
58 56 hierarchical 30 1 3.033333333333333 30
59 57 k_means 30 1 3.033333333333333 35
60 58 hierarchical 31 1 2.935483870967742 30
61 59 k_means 31 1 2.935483870967742 35
62 60 hierarchical 32 1 2.84375 30
63 61 k_means 32 1 2.84375 35
64 62 hierarchical 33 1 2.757575757575758 30
65 63 k_means 33 1 2.757575757575758 33
66 64 hierarchical 34 1 2.676470588235294 30
67 65 k_means 34 1 2.676470588235294 33
68 66 hierarchical 35 1 2.6 30
69 67 k_means 35 1 2.6 33
70 68 hierarchical 36 1 2.5277777777777777 30
71 69 k_means 36 1 2.5277777777777777 33
72 70 hierarchical 37 1 2.4594594594594597 30
73 71 k_means 37 1 2.4594594594594597 33
74 72 hierarchical 38 1 2.3947368421052633 30
75 73 k_means 38 1 2.3947368421052633 33
76 74 hierarchical 39 1 2.3333333333333335 29
77 75 k_means 39 1 2.3333333333333335 32
78 76 hierarchical 40 1 2.275 29
79 77 k_means 40 1 2.275 32
80 78 hierarchical 41 1 2.2195121951219514 29
81 79 k_means 41 1 2.2195121951219514 32
82 80 hierarchical 42 1 2.1666666666666665 29
83 81 k_means 42 1 2.1666666666666665 32
84 82 hierarchical 43 1 2.116279069767442 29
85 83 k_means 43 1 2.116279069767442 31
86 84 hierarchical 44 1 2.0681818181818183 29
87 85 k_means 44 1 2.0681818181818183 31
88 86 hierarchical 45 1 2.022222222222222 29
89 87 k_means 45 1 2.022222222222222 31
90 88 hierarchical 46 1 1.9782608695652173 29
91 89 k_means 46 1 1.9782608695652173 29
92 90 hierarchical 47 1 1.9361702127659575 28
93 91 k_means 47 1 1.9361702127659575 27
94 92 hierarchical 48 1 1.8958333333333333 17
95 93 k_means 48 1 1.8958333333333333 27
96 94 hierarchical 49 1 1.8571428571428572 17
97 95 k_means 49 1 1.8571428571428572 27
98 96 hierarchical 50 1 1.82 17
99 97 k_means 50 1 1.82 25
100 98 hierarchical 51 1 1.7843137254901962 17
101 99 k_means 51 1 1.7843137254901962 25
102 100 hierarchical 52 1 1.75 17
103 101 k_means 52 1 1.75 25
104 102 hierarchical 53 1 1.7169811320754718 17
105 103 k_means 53 1 1.7169811320754718 25
106 104 hierarchical 54 1 1.6851851851851851 17
107 105 k_means 54 1 1.6851851851851851 23
108 106 hierarchical 55 1 1.6545454545454545 17
109 107 k_means 55 1 1.6545454545454545 21
110 108 hierarchical 56 1 1.625 17
111 109 k_means 56 1 1.625 20
112 110 hierarchical 57 1 1.5964912280701755 17
113 111 k_means 57 1 1.5964912280701755 19
114 112 hierarchical 58 1 1.5689655172413792 17
115 113 k_means 58 1 1.5689655172413792 18
116 114 hierarchical 59 1 1.5423728813559323 17
117 115 k_means 59 1 1.5423728813559323 18
118 116 hierarchical 60 1 1.5166666666666666 17
119 117 k_means 60 1 1.5166666666666666 17
120 118 hierarchical 61 1 1.4918032786885247 17
121 119 k_means 61 1 1.4918032786885247 17
122 120 hierarchical 62 1 1.467741935483871 17
123 121 k_means 62 1 1.467741935483871 16
124 122 hierarchical 63 1 1.4444444444444444 17
125 123 k_means 63 1 1.4444444444444444 16
126 124 hierarchical 64 1 1.421875 17
127 125 k_means 64 1 1.421875 16

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

View file

@ -11,7 +11,7 @@ constructTrees,0
isExistingGrammar,0 isExistingGrammar,0
updateImportListFor,0 updateImportListFor,0
updateImportListWith,0 updateImportListWith,0
buildGlobalNameRegistries,0 buildGlobalNameRegistries,2
traverseSchemas,0 traverseSchemas,0
needReportTNSError,0 needReportTNSError,0
addGlobalAttributeDecl,0 addGlobalAttributeDecl,0

1 cluster
11 isExistingGrammar 0
12 updateImportListFor 0
13 updateImportListWith 0
14 buildGlobalNameRegistries 0 2
15 traverseSchemas 0
16 needReportTNSError 0
17 addGlobalAttributeDecl 0

View file

@ -36,7 +36,7 @@ getGrpOrAttrGrpRedefinedByRestriction,0
resolveKeyRefs,0 resolveKeyRefs,0
getIDRegistry,0 getIDRegistry,0
getIDRegistry_sub,0 getIDRegistry_sub,0
storeKeyRef,2 storeKeyRef,0
resolveSchema,0 resolveSchema,0
resolveSchemaSource,0 resolveSchemaSource,0
getSchemaDocument,0 getSchemaDocument,0

1 cluster
36 resolveKeyRefs 0
37 getIDRegistry 0
38 getIDRegistry_sub 0
39 storeKeyRef 2 0
40 resolveSchema 0
41 resolveSchemaSource 0
42 getSchemaDocument 0

View file

@ -1,64 +1,64 @@
,k_means,hierarchical ,k_means,hierarchical
2,0.598553678618089,0.598553678618089 2,0.598553678618089,0.598553678618089
3,0.5988635577957939,0.4340479638200015 3,0.4340479638200015,0.5988635577957939
4,0.5768968986366794,0.4446910360233003 4,0.4446910360233003,0.5768968986366794
5,0.5600582308059449,0.44970649031040394 5,0.44970649031040394,0.5600582308059449
6,0.4754961922118064,0.42291318953408236 6,0.42291318953408236,0.4754961922118064
7,0.4716129791423394,0.37302776265331616 7,0.37302776265331616,0.4716129791423394
8,0.4215235973451702,0.16585341129364783 8,0.16585341129364783,0.4215235973451702
9,0.4270412461769427,0.1805562766904707 9,0.1805562766904707,0.4270412461769427
10,0.42033460704259445,0.10021703881198853 10,0.10021703881198853,0.42033460704259445
11,0.3979172260202459,0.11227880527684016 11,0.11227880527684016,0.3979172260202459
12,0.3992377842624971,0.13834960978465374 12,0.13834960978465374,0.3992377842624971
13,0.36351812430049024,0.14637482631499601 13,0.14637482631499601,0.36351812430049024
14,0.36086605127470145,0.15339700393049752 14,0.15339700393049752,0.36086605127470145
15,0.27803229144747893,0.17519153912543511 15,0.17519153912543511,0.27803229144747893
16,0.24764306057751692,0.18163489682652323 16,0.18163489682652323,0.24764306057751692
17,0.2546247662068935,0.1920283064393974 17,0.1920283064393974,0.2546247662068935
18,0.27281600254442556,0.1968887014348958 18,0.1968887014348958,0.27281600254442556
19,0.2705186834360297,0.22320550740329767 19,0.22320550740329767,0.2705186834360297
20,0.29969231483298964,0.2278559856358303 20,0.2278559856358303,0.29969231483298964
21,0.31507175636228785,0.21631113790331308 21,0.21631113790331308,0.31507175636228785
22,0.3170839571491974,0.23240216910856668 22,0.23240216910856668,0.3170839571491974
23,0.3201062001500274,0.23590483919206368 23,0.23590483919206368,0.3201062001500274
24,0.24788100874579763,0.24413294581937137 24,0.24413294581937137,0.24788100874579763
25,0.24932439019964475,0.2514892577758059 25,0.2514892577758059,0.24932439019964475
26,0.25682838168308425,0.2573251636281981 26,0.2573251636281981,0.25682838168308425
27,0.25691108409617125,0.2629575908594159 27,0.2629575908594159,0.25691108409617125
28,0.2606141413445487,0.27452933746874875 28,0.27452933746874875,0.2606141413445487
29,0.2538973293819504,0.27138907448677696 29,0.27138907448677696,0.2538973293819504
30,0.26300677134410877,0.27608901099722993 30,0.27608901099722993,0.26300677134410877
31,0.25958787047274295,0.2734068655042204 31,0.2734068655042204,0.25958787047274295
32,0.2620577633391267,0.26668692055998694 32,0.26668692055998694,0.2620577633391267
33,0.2677665846189286,0.27190541883537933 33,0.27190541883537933,0.2677665846189286
34,0.2698493206362974,0.2737953942290021 34,0.2737953942290021,0.2698493206362974
35,0.26871523120875485,0.2792490491212266 35,0.2792490491212266,0.26871523120875485
36,0.2706224838853019,0.285255446778457 36,0.285255446778457,0.2706224838853019
37,0.27443698923839605,0.28791640737048424 37,0.28791640737048424,0.27443698923839605
38,0.27814176822064324,0.279340819685821 38,0.279340819685821,0.27814176822064324
39,0.27606460269195954,0.27183941054653343 39,0.27183941054653343,0.27606460269195954
40,0.27307694582354536,0.2763117563875985 40,0.2763117563875985,0.27307694582354536
41,0.27681213230462487,0.28090687018324295 41,0.28090687018324295,0.27681213230462487
42,0.2763401507651925,0.2823757748346625 42,0.2823757748346625,0.2763401507651925
43,0.2905756087008992,0.284106939165533 43,0.284106939165533,0.2905756087008992
44,0.2901305465431984,0.28892051024774673 44,0.28892051024774673,0.2901305465431984
45,0.2866682178814574,0.31067801214219776 45,0.31067801214219776,0.2866682178814574
46,0.2748147271365624,0.33271255209429573 46,0.33271255209429573,0.2748147271365624
47,0.2824570955002154,0.33809240352772785 47,0.33809240352772785,0.2824570955002154
48,0.28532199483886955,0.342466005901906 48,0.342466005901906,0.28532199483886955
49,0.3194897125271686,0.3472678571815208 49,0.3472678571815208,0.3194897125271686
50,0.32227149882332984,0.3494143489069156 50,0.3494143489069156,0.32227149882332984
51,0.32507315719109064,0.3553262507378467 51,0.3553262507378467,0.32507315719109064
52,0.3483927215781152,0.3577437943048381 52,0.3577437943048381,0.3483927215781152
53,0.35116863658039477,0.36265544445738723 53,0.36265544445738723,0.35116863658039477
54,0.35397195095412226,0.3652777392559547 54,0.3652777392559547,0.35397195095412226
55,0.35681293946359083,0.3635299110583668 55,0.3635299110583668,0.35681293946359083
56,0.35970774816697515,0.36657235082485046 56,0.36657235082485046,0.35970774816697515
57,0.36268328716123316,0.369801843033111 57,0.369801843033111,0.36268328716123316
58,0.365787994620889,0.373236109725014 58,0.373236109725014,0.365787994620889
59,0.36911874689499113,0.3768951711426859 59,0.3768951711426859,0.36911874689499113
60,0.3899012348681259,0.38080172755114144 60,0.38080172755114144,0.3899012348681259
61,0.3925538526988892,0.38498161884368615 61,0.38498161884368615,0.3925538526988892
62,0.3952064705296526,0.38946438071227807 62,0.38946438071227807,0.3952064705296526
63,0.40051170619117926,0.39428392137375445 63,0.39428392137375445,0.40051170619117926
64,0.4003435986724249,0.4003771346837245 64,0.4003771346837245,0.4003435986724249

1 k_means hierarchical
2 2 0.598553678618089 0.598553678618089
3 3 0.5988635577957939 0.4340479638200015 0.4340479638200015 0.5988635577957939
4 4 0.5768968986366794 0.4446910360233003 0.4446910360233003 0.5768968986366794
5 5 0.5600582308059449 0.44970649031040394 0.44970649031040394 0.5600582308059449
6 6 0.4754961922118064 0.42291318953408236 0.42291318953408236 0.4754961922118064
7 7 0.4716129791423394 0.37302776265331616 0.37302776265331616 0.4716129791423394
8 8 0.4215235973451702 0.16585341129364783 0.16585341129364783 0.4215235973451702
9 9 0.4270412461769427 0.1805562766904707 0.1805562766904707 0.4270412461769427
10 10 0.42033460704259445 0.10021703881198853 0.10021703881198853 0.42033460704259445
11 11 0.3979172260202459 0.11227880527684016 0.11227880527684016 0.3979172260202459
12 12 0.3992377842624971 0.13834960978465374 0.13834960978465374 0.3992377842624971
13 13 0.36351812430049024 0.14637482631499601 0.14637482631499601 0.36351812430049024
14 14 0.36086605127470145 0.15339700393049752 0.15339700393049752 0.36086605127470145
15 15 0.27803229144747893 0.17519153912543511 0.17519153912543511 0.27803229144747893
16 16 0.24764306057751692 0.18163489682652323 0.18163489682652323 0.24764306057751692
17 17 0.2546247662068935 0.1920283064393974 0.1920283064393974 0.2546247662068935
18 18 0.27281600254442556 0.1968887014348958 0.1968887014348958 0.27281600254442556
19 19 0.2705186834360297 0.22320550740329767 0.22320550740329767 0.2705186834360297
20 20 0.29969231483298964 0.2278559856358303 0.2278559856358303 0.29969231483298964
21 21 0.31507175636228785 0.21631113790331308 0.21631113790331308 0.31507175636228785
22 22 0.3170839571491974 0.23240216910856668 0.23240216910856668 0.3170839571491974
23 23 0.3201062001500274 0.23590483919206368 0.23590483919206368 0.3201062001500274
24 24 0.24788100874579763 0.24413294581937137 0.24413294581937137 0.24788100874579763
25 25 0.24932439019964475 0.2514892577758059 0.2514892577758059 0.24932439019964475
26 26 0.25682838168308425 0.2573251636281981 0.2573251636281981 0.25682838168308425
27 27 0.25691108409617125 0.2629575908594159 0.2629575908594159 0.25691108409617125
28 28 0.2606141413445487 0.27452933746874875 0.27452933746874875 0.2606141413445487
29 29 0.2538973293819504 0.27138907448677696 0.27138907448677696 0.2538973293819504
30 30 0.26300677134410877 0.27608901099722993 0.27608901099722993 0.26300677134410877
31 31 0.25958787047274295 0.2734068655042204 0.2734068655042204 0.25958787047274295
32 32 0.2620577633391267 0.26668692055998694 0.26668692055998694 0.2620577633391267
33 33 0.2677665846189286 0.27190541883537933 0.27190541883537933 0.2677665846189286
34 34 0.2698493206362974 0.2737953942290021 0.2737953942290021 0.2698493206362974
35 35 0.26871523120875485 0.2792490491212266 0.2792490491212266 0.26871523120875485
36 36 0.2706224838853019 0.285255446778457 0.285255446778457 0.2706224838853019
37 37 0.27443698923839605 0.28791640737048424 0.28791640737048424 0.27443698923839605
38 38 0.27814176822064324 0.279340819685821 0.279340819685821 0.27814176822064324
39 39 0.27606460269195954 0.27183941054653343 0.27183941054653343 0.27606460269195954
40 40 0.27307694582354536 0.2763117563875985 0.2763117563875985 0.27307694582354536
41 41 0.27681213230462487 0.28090687018324295 0.28090687018324295 0.27681213230462487
42 42 0.2763401507651925 0.2823757748346625 0.2823757748346625 0.2763401507651925
43 43 0.2905756087008992 0.284106939165533 0.284106939165533 0.2905756087008992
44 44 0.2901305465431984 0.28892051024774673 0.28892051024774673 0.2901305465431984
45 45 0.2866682178814574 0.31067801214219776 0.31067801214219776 0.2866682178814574
46 46 0.2748147271365624 0.33271255209429573 0.33271255209429573 0.2748147271365624
47 47 0.2824570955002154 0.33809240352772785 0.33809240352772785 0.2824570955002154
48 48 0.28532199483886955 0.342466005901906 0.342466005901906 0.28532199483886955
49 49 0.3194897125271686 0.3472678571815208 0.3472678571815208 0.3194897125271686
50 50 0.32227149882332984 0.3494143489069156 0.3494143489069156 0.32227149882332984
51 51 0.32507315719109064 0.3553262507378467 0.3553262507378467 0.32507315719109064
52 52 0.3483927215781152 0.3577437943048381 0.3577437943048381 0.3483927215781152
53 53 0.35116863658039477 0.36265544445738723 0.36265544445738723 0.35116863658039477
54 54 0.35397195095412226 0.3652777392559547 0.3652777392559547 0.35397195095412226
55 55 0.35681293946359083 0.3635299110583668 0.3635299110583668 0.35681293946359083
56 56 0.35970774816697515 0.36657235082485046 0.36657235082485046 0.35970774816697515
57 57 0.36268328716123316 0.369801843033111 0.369801843033111 0.36268328716123316
58 58 0.365787994620889 0.373236109725014 0.373236109725014 0.365787994620889
59 59 0.36911874689499113 0.3768951711426859 0.3768951711426859 0.36911874689499113
60 60 0.3899012348681259 0.38080172755114144 0.38080172755114144 0.3899012348681259
61 61 0.3925538526988892 0.38498161884368615 0.38498161884368615 0.3925538526988892
62 62 0.3952064705296526 0.38946438071227807 0.38946438071227807 0.3952064705296526
63 63 0.40051170619117926 0.39428392137375445 0.39428392137375445 0.40051170619117926
64 64 0.4003435986724249 0.4003771346837245 0.4003771346837245 0.4003435986724249

View file

@ -0,0 +1,127 @@
,algorithm,k,min,mean,max
0,hierarchical,2,1,53.0,105
1,k_means,2,1,53.0,105
2,hierarchical,3,1,35.333333333333336,104
3,k_means,3,1,35.333333333333336,104
4,hierarchical,4,1,26.5,102
5,k_means,4,1,26.5,102
6,hierarchical,5,1,21.2,102
7,k_means,5,1,21.2,101
8,hierarchical,6,1,17.666666666666668,99
9,k_means,6,1,17.666666666666668,99
10,hierarchical,7,1,15.142857142857142,98
11,k_means,7,1,15.142857142857142,98
12,hierarchical,8,1,13.25,96
13,k_means,8,1,13.25,91
14,hierarchical,9,1,11.777777777777779,96
15,k_means,9,1,11.777777777777779,90
16,hierarchical,10,1,10.6,95
17,k_means,10,1,10.6,86
18,hierarchical,11,1,9.636363636363637,94
19,k_means,11,1,9.636363636363637,84
20,hierarchical,12,1,8.833333333333334,93
21,k_means,12,1,8.833333333333334,82
22,hierarchical,13,1,8.153846153846153,91
23,k_means,13,1,8.153846153846153,81
24,hierarchical,14,1,7.571428571428571,91
25,k_means,14,1,7.571428571428571,80
26,hierarchical,15,1,7.066666666666666,83
27,k_means,15,1,7.066666666666666,76
28,hierarchical,16,1,6.625,83
29,k_means,16,1,6.625,75
30,hierarchical,17,1,6.235294117647059,78
31,k_means,17,1,6.235294117647059,74
32,hierarchical,18,1,5.888888888888889,78
33,k_means,18,1,5.888888888888889,73
34,hierarchical,19,1,5.578947368421052,78
35,k_means,19,1,5.578947368421052,71
36,hierarchical,20,1,5.3,71
37,k_means,20,1,5.3,70
38,hierarchical,21,1,5.0476190476190474,68
39,k_means,21,1,5.0476190476190474,69
40,hierarchical,22,1,4.818181818181818,68
41,k_means,22,1,4.818181818181818,65
42,hierarchical,23,1,4.608695652173913,68
43,k_means,23,1,4.608695652173913,65
44,hierarchical,24,1,4.416666666666667,64
45,k_means,24,1,4.416666666666667,64
46,hierarchical,25,1,4.24,64
47,k_means,25,1,4.24,62
48,hierarchical,26,1,4.076923076923077,64
49,k_means,26,1,4.076923076923077,61
50,hierarchical,27,1,3.925925925925926,64
51,k_means,27,1,3.925925925925926,60
52,hierarchical,28,1,3.7857142857142856,63
53,k_means,28,1,3.7857142857142856,55
54,hierarchical,29,1,3.6551724137931036,63
55,k_means,29,1,3.6551724137931036,55
56,hierarchical,30,1,3.533333333333333,63
57,k_means,30,1,3.533333333333333,54
58,hierarchical,31,1,3.4193548387096775,63
59,k_means,31,1,3.4193548387096775,54
60,hierarchical,32,1,3.3125,52
61,k_means,32,1,3.3125,54
62,hierarchical,33,1,3.212121212121212,52
63,k_means,33,1,3.212121212121212,53
64,hierarchical,34,1,3.1176470588235294,52
65,k_means,34,1,3.1176470588235294,52
66,hierarchical,35,1,3.0285714285714285,52
67,k_means,35,1,3.0285714285714285,51
68,hierarchical,36,1,2.9444444444444446,52
69,k_means,36,1,2.9444444444444446,50
70,hierarchical,37,1,2.864864864864865,52
71,k_means,37,1,2.864864864864865,50
72,hierarchical,38,1,2.789473684210526,51
73,k_means,38,1,2.789473684210526,50
74,hierarchical,39,1,2.717948717948718,51
75,k_means,39,1,2.717948717948718,50
76,hierarchical,40,1,2.65,51
77,k_means,40,1,2.65,49
78,hierarchical,41,1,2.5853658536585367,51
79,k_means,41,1,2.5853658536585367,48
80,hierarchical,42,1,2.5238095238095237,51
81,k_means,42,1,2.5238095238095237,47
82,hierarchical,43,1,2.4651162790697674,47
83,k_means,43,1,2.4651162790697674,47
84,hierarchical,44,1,2.409090909090909,47
85,k_means,44,1,2.409090909090909,46
86,hierarchical,45,1,2.3555555555555556,47
87,k_means,45,1,2.3555555555555556,44
88,hierarchical,46,1,2.3043478260869565,46
89,k_means,46,1,2.3043478260869565,42
90,hierarchical,47,1,2.25531914893617,25
91,k_means,47,1,2.25531914893617,41
92,hierarchical,48,1,2.2083333333333335,25
93,k_means,48,1,2.2083333333333335,41
94,hierarchical,49,1,2.163265306122449,25
95,k_means,49,1,2.163265306122449,41
96,hierarchical,50,1,2.12,25
97,k_means,50,1,2.12,40
98,hierarchical,51,1,2.0784313725490198,25
99,k_means,51,1,2.0784313725490198,39
100,hierarchical,52,1,2.0384615384615383,25
101,k_means,52,1,2.0384615384615383,38
102,hierarchical,53,1,2.0,25
103,k_means,53,1,2.0,38
104,hierarchical,54,1,1.962962962962963,25
105,k_means,54,1,1.962962962962963,37
106,hierarchical,55,1,1.9272727272727272,25
107,k_means,55,1,1.9272727272727272,35
108,hierarchical,56,1,1.8928571428571428,25
109,k_means,56,1,1.8928571428571428,34
110,hierarchical,57,1,1.8596491228070176,25
111,k_means,57,1,1.8596491228070176,33
112,hierarchical,58,1,1.8275862068965518,25
113,k_means,58,1,1.8275862068965518,32
114,hierarchical,59,1,1.7966101694915255,25
115,k_means,59,1,1.7966101694915255,31
116,hierarchical,60,1,1.7666666666666666,25
117,k_means,60,1,1.7666666666666666,30
118,hierarchical,61,1,1.7377049180327868,25
119,k_means,61,1,1.7377049180327868,29
120,hierarchical,62,1,1.7096774193548387,25
121,k_means,62,1,1.7096774193548387,28
122,hierarchical,63,1,1.6825396825396826,25
123,k_means,63,1,1.6825396825396826,27
124,hierarchical,64,1,1.65625,25
125,k_means,64,1,1.65625,27
1 algorithm k min mean max
2 0 hierarchical 2 1 53.0 105
3 1 k_means 2 1 53.0 105
4 2 hierarchical 3 1 35.333333333333336 104
5 3 k_means 3 1 35.333333333333336 104
6 4 hierarchical 4 1 26.5 102
7 5 k_means 4 1 26.5 102
8 6 hierarchical 5 1 21.2 102
9 7 k_means 5 1 21.2 101
10 8 hierarchical 6 1 17.666666666666668 99
11 9 k_means 6 1 17.666666666666668 99
12 10 hierarchical 7 1 15.142857142857142 98
13 11 k_means 7 1 15.142857142857142 98
14 12 hierarchical 8 1 13.25 96
15 13 k_means 8 1 13.25 91
16 14 hierarchical 9 1 11.777777777777779 96
17 15 k_means 9 1 11.777777777777779 90
18 16 hierarchical 10 1 10.6 95
19 17 k_means 10 1 10.6 86
20 18 hierarchical 11 1 9.636363636363637 94
21 19 k_means 11 1 9.636363636363637 84
22 20 hierarchical 12 1 8.833333333333334 93
23 21 k_means 12 1 8.833333333333334 82
24 22 hierarchical 13 1 8.153846153846153 91
25 23 k_means 13 1 8.153846153846153 81
26 24 hierarchical 14 1 7.571428571428571 91
27 25 k_means 14 1 7.571428571428571 80
28 26 hierarchical 15 1 7.066666666666666 83
29 27 k_means 15 1 7.066666666666666 76
30 28 hierarchical 16 1 6.625 83
31 29 k_means 16 1 6.625 75
32 30 hierarchical 17 1 6.235294117647059 78
33 31 k_means 17 1 6.235294117647059 74
34 32 hierarchical 18 1 5.888888888888889 78
35 33 k_means 18 1 5.888888888888889 73
36 34 hierarchical 19 1 5.578947368421052 78
37 35 k_means 19 1 5.578947368421052 71
38 36 hierarchical 20 1 5.3 71
39 37 k_means 20 1 5.3 70
40 38 hierarchical 21 1 5.0476190476190474 68
41 39 k_means 21 1 5.0476190476190474 69
42 40 hierarchical 22 1 4.818181818181818 68
43 41 k_means 22 1 4.818181818181818 65
44 42 hierarchical 23 1 4.608695652173913 68
45 43 k_means 23 1 4.608695652173913 65
46 44 hierarchical 24 1 4.416666666666667 64
47 45 k_means 24 1 4.416666666666667 64
48 46 hierarchical 25 1 4.24 64
49 47 k_means 25 1 4.24 62
50 48 hierarchical 26 1 4.076923076923077 64
51 49 k_means 26 1 4.076923076923077 61
52 50 hierarchical 27 1 3.925925925925926 64
53 51 k_means 27 1 3.925925925925926 60
54 52 hierarchical 28 1 3.7857142857142856 63
55 53 k_means 28 1 3.7857142857142856 55
56 54 hierarchical 29 1 3.6551724137931036 63
57 55 k_means 29 1 3.6551724137931036 55
58 56 hierarchical 30 1 3.533333333333333 63
59 57 k_means 30 1 3.533333333333333 54
60 58 hierarchical 31 1 3.4193548387096775 63
61 59 k_means 31 1 3.4193548387096775 54
62 60 hierarchical 32 1 3.3125 52
63 61 k_means 32 1 3.3125 54
64 62 hierarchical 33 1 3.212121212121212 52
65 63 k_means 33 1 3.212121212121212 53
66 64 hierarchical 34 1 3.1176470588235294 52
67 65 k_means 34 1 3.1176470588235294 52
68 66 hierarchical 35 1 3.0285714285714285 52
69 67 k_means 35 1 3.0285714285714285 51
70 68 hierarchical 36 1 2.9444444444444446 52
71 69 k_means 36 1 2.9444444444444446 50
72 70 hierarchical 37 1 2.864864864864865 52
73 71 k_means 37 1 2.864864864864865 50
74 72 hierarchical 38 1 2.789473684210526 51
75 73 k_means 38 1 2.789473684210526 50
76 74 hierarchical 39 1 2.717948717948718 51
77 75 k_means 39 1 2.717948717948718 50
78 76 hierarchical 40 1 2.65 51
79 77 k_means 40 1 2.65 49
80 78 hierarchical 41 1 2.5853658536585367 51
81 79 k_means 41 1 2.5853658536585367 48
82 80 hierarchical 42 1 2.5238095238095237 51
83 81 k_means 42 1 2.5238095238095237 47
84 82 hierarchical 43 1 2.4651162790697674 47
85 83 k_means 43 1 2.4651162790697674 47
86 84 hierarchical 44 1 2.409090909090909 47
87 85 k_means 44 1 2.409090909090909 46
88 86 hierarchical 45 1 2.3555555555555556 47
89 87 k_means 45 1 2.3555555555555556 44
90 88 hierarchical 46 1 2.3043478260869565 46
91 89 k_means 46 1 2.3043478260869565 42
92 90 hierarchical 47 1 2.25531914893617 25
93 91 k_means 47 1 2.25531914893617 41
94 92 hierarchical 48 1 2.2083333333333335 25
95 93 k_means 48 1 2.2083333333333335 41
96 94 hierarchical 49 1 2.163265306122449 25
97 95 k_means 49 1 2.163265306122449 41
98 96 hierarchical 50 1 2.12 25
99 97 k_means 50 1 2.12 40
100 98 hierarchical 51 1 2.0784313725490198 25
101 99 k_means 51 1 2.0784313725490198 39
102 100 hierarchical 52 1 2.0384615384615383 25
103 101 k_means 52 1 2.0384615384615383 38
104 102 hierarchical 53 1 2.0 25
105 103 k_means 53 1 2.0 38
106 104 hierarchical 54 1 1.962962962962963 25
107 105 k_means 54 1 1.962962962962963 37
108 106 hierarchical 55 1 1.9272727272727272 25
109 107 k_means 55 1 1.9272727272727272 35
110 108 hierarchical 56 1 1.8928571428571428 25
111 109 k_means 56 1 1.8928571428571428 34
112 110 hierarchical 57 1 1.8596491228070176 25
113 111 k_means 57 1 1.8596491228070176 33
114 112 hierarchical 58 1 1.8275862068965518 25
115 113 k_means 58 1 1.8275862068965518 32
116 114 hierarchical 59 1 1.7966101694915255 25
117 115 k_means 59 1 1.7966101694915255 31
118 116 hierarchical 60 1 1.7666666666666666 25
119 117 k_means 60 1 1.7666666666666666 30
120 118 hierarchical 61 1 1.7377049180327868 25
121 119 k_means 61 1 1.7377049180327868 29
122 120 hierarchical 62 1 1.7096774193548387 25
123 121 k_means 62 1 1.7096774193548387 28
124 122 hierarchical 63 1 1.6825396825396826 25
125 123 k_means 63 1 1.6825396825396826 27
126 124 hierarchical 64 1 1.65625 25
127 125 k_means 64 1 1.65625 27

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

View file

@ -1,64 +1,64 @@
,k_means,hierarchical ,k_means,hierarchical
2,0.6855584100867681,0.6979818296524081 2,0.6979818296524081,0.6855584100867681
3,0.6658312390685782,0.5363440260613704 3,0.5363440260613704,0.6658312390685782
4,0.5447405755407478,0.5447405755407478 4,0.5447405755407478,0.5447405755407478
5,0.49469855877597974,0.4950944104608897 5,0.4950944104608897,0.49469855877597974
6,0.49629928069605667,0.3167619075077442 6,0.3167619075077442,0.49629928069605667
7,0.4937183177275972,0.3273304877495634 7,0.3273304877495634,0.4937183177275972
8,0.4903327662796836,0.16989336921679118 8,0.16989336921679118,0.4903327662796836
9,0.33791118198002373,0.17626101482318196 9,0.17626101482318196,0.33791118198002373
10,0.2667320598048964,0.19790344008120894 10,0.19790344008120894,0.2667320598048964
11,0.2629948976926796,0.1943827895435377 11,0.1943827895435377,0.2629948976926796
12,0.25965211932671445,0.20556562870341602 12,0.20556562870341602,0.25965211932671445
13,0.26164323937367595,0.20144552653966163 13,0.20144552653966163,0.26164323937367595
14,0.25806076142240403,0.22548403695669203 14,0.22548403695669203,0.25806076142240403
15,0.26997893998401756,0.22918194758667895 15,0.22918194758667895,0.26997893998401756
16,0.27256451459055664,0.2404290571765335 16,0.2404290571765335,0.27256451459055664
17,0.2608837912623233,0.2345015455494567 17,0.2345015455494567,0.2608837912623233
18,0.241790230179569,0.2390816398182416 18,0.2390816398182416,0.241790230179569
19,0.24484234464495422,0.24094820256010968 19,0.24094820256010968,0.24484234464495422
20,0.2361050155539465,0.2435369787081999 20,0.2435369787081999,0.2361050155539465
21,0.23692336175194548,0.262463283756636 21,0.262463283756636,0.23692336175194548
22,0.23946566771940794,0.2742864390420934 22,0.2742864390420934,0.23946566771940794
23,0.24594283942153175,0.2979619533428987 23,0.2979619533428987,0.24594283942153175
24,0.24734609860636583,0.29936015461670856 24,0.29936015461670856,0.24734609860636583
25,0.2372755932074588,0.30224440986202594 25,0.30224440986202594,0.2372755932074588
26,0.24082497341896647,0.30600924875137986 26,0.30600924875137986,0.24082497341896647
27,0.24547723657004195,0.3147862783718484 27,0.3147862783718484,0.24547723657004195
28,0.2503460498700128,0.31887407003386015 28,0.31887407003386015,0.2503460498700128
29,0.26565769288673047,0.3204052924706567 29,0.3204052924706567,0.26565769288673047
30,0.2951470761811464,0.3106572702067674 30,0.3106572702067674,0.2951470761811464
31,0.30158824153259317,0.31330774028648145 31,0.31330774028648145,0.30158824153259317
32,0.3180875184494547,0.33210454757827634 32,0.33210454757827634,0.3180875184494547
33,0.32604023717225655,0.340503634089749 33,0.340503634089749,0.32604023717225655
34,0.3162922149083673,0.33568440892081625 34,0.33568440892081625,0.3162922149083673
35,0.31716183472339093,0.3545992807283562 35,0.3545992807283562,0.31716183472339093
36,0.3214298482703343,0.3575403386841057 36,0.3575403386841057,0.3214298482703343
37,0.32681546873349715,0.36503026576341707 37,0.36503026576341707,0.32681546873349715
38,0.32732304963529885,0.3738835074795801 38,0.3738835074795801,0.32732304963529885
39,0.32990135488218114,0.3929681262996284 39,0.3929681262996284,0.32990135488218114
40,0.32432743478528314,0.38848432563159185 40,0.38848432563159185,0.32432743478528314
41,0.3198436341172465,0.39163178463382314 41,0.39163178463382314,0.3198436341172465
42,0.32643375049241685,0.3860578645369252 42,0.3860578645369252,0.32643375049241685
43,0.32203761977337186,0.4053389048253979 43,0.4053389048253979,0.32203761977337186
44,0.3346466456087704,0.4217820126417848 44,0.4217820126417848,0.3346466456087704
45,0.33223356867673165,0.4257244699851672 45,0.4257244699851672,0.33223356867673165
46,0.33057959050289126,0.42988772845220063 46,0.42988772845220063,0.33057959050289126
47,0.34588619420359423,0.4342738007458362 47,0.4342738007458362,0.34588619420359423
48,0.3328354412937589,0.4416623097378058 48,0.4416623097378058,0.3328354412937589
49,0.33565227636252953,0.4426712673092602 49,0.4426712673092602,0.33565227636252953
50,0.35908519690010676,0.4440749104193141 50,0.4440749104193141,0.35908519690010676
51,0.3619904717574287,0.4456199850626709 51,0.4456199850626709,0.3619904717574287
52,0.36212543071422687,0.45074758794403463 52,0.45074758794403463,0.36212543071422687
53,0.3651696447072414,0.472336181881003 53,0.472336181881003,0.3651696447072414
54,0.3982691624564969,0.47730382490643575 54,0.47730382490643575,0.3982691624564969
55,0.4013572084387477,0.454200815600248 55,0.454200815600248,0.4013572084387477
56,0.4246326548929088,0.4565666319605046 56,0.4565666319605046,0.4246326548929088
57,0.44773608146478383,0.4522750157266781 57,0.4522750157266781,0.44773608146478383
58,0.45157671443203573,0.4549088773042353 58,0.4549088773042353,0.45157671443203573
59,0.4563204517812888,0.46145616231522185 59,0.46145616231522185,0.4563204517812888
60,0.46300257860702615,0.4641242358826516 60,0.4641242358826516,0.46300257860702615
61,0.4967563269695634,0.47006659021417746 61,0.47006659021417746,0.4967563269695634
62,0.4994732063134373,0.47352022985333136 62,0.47352022985333136,0.4994732063134373
63,0.49518159007961093,0.4775136217473302 63,0.4775136217473302,0.49518159007961093
64,0.4872143751031807,0.4821848224907804 64,0.4821848224907804,0.4872143751031807

1 k_means hierarchical
2 2 0.6855584100867681 0.6979818296524081 0.6979818296524081 0.6855584100867681
3 3 0.6658312390685782 0.5363440260613704 0.5363440260613704 0.6658312390685782
4 4 0.5447405755407478 0.5447405755407478
5 5 0.49469855877597974 0.4950944104608897 0.4950944104608897 0.49469855877597974
6 6 0.49629928069605667 0.3167619075077442 0.3167619075077442 0.49629928069605667
7 7 0.4937183177275972 0.3273304877495634 0.3273304877495634 0.4937183177275972
8 8 0.4903327662796836 0.16989336921679118 0.16989336921679118 0.4903327662796836
9 9 0.33791118198002373 0.17626101482318196 0.17626101482318196 0.33791118198002373
10 10 0.2667320598048964 0.19790344008120894 0.19790344008120894 0.2667320598048964
11 11 0.2629948976926796 0.1943827895435377 0.1943827895435377 0.2629948976926796
12 12 0.25965211932671445 0.20556562870341602 0.20556562870341602 0.25965211932671445
13 13 0.26164323937367595 0.20144552653966163 0.20144552653966163 0.26164323937367595
14 14 0.25806076142240403 0.22548403695669203 0.22548403695669203 0.25806076142240403
15 15 0.26997893998401756 0.22918194758667895 0.22918194758667895 0.26997893998401756
16 16 0.27256451459055664 0.2404290571765335 0.2404290571765335 0.27256451459055664
17 17 0.2608837912623233 0.2345015455494567 0.2345015455494567 0.2608837912623233
18 18 0.241790230179569 0.2390816398182416 0.2390816398182416 0.241790230179569
19 19 0.24484234464495422 0.24094820256010968 0.24094820256010968 0.24484234464495422
20 20 0.2361050155539465 0.2435369787081999 0.2435369787081999 0.2361050155539465
21 21 0.23692336175194548 0.262463283756636 0.262463283756636 0.23692336175194548
22 22 0.23946566771940794 0.2742864390420934 0.2742864390420934 0.23946566771940794
23 23 0.24594283942153175 0.2979619533428987 0.2979619533428987 0.24594283942153175
24 24 0.24734609860636583 0.29936015461670856 0.29936015461670856 0.24734609860636583
25 25 0.2372755932074588 0.30224440986202594 0.30224440986202594 0.2372755932074588
26 26 0.24082497341896647 0.30600924875137986 0.30600924875137986 0.24082497341896647
27 27 0.24547723657004195 0.3147862783718484 0.3147862783718484 0.24547723657004195
28 28 0.2503460498700128 0.31887407003386015 0.31887407003386015 0.2503460498700128
29 29 0.26565769288673047 0.3204052924706567 0.3204052924706567 0.26565769288673047
30 30 0.2951470761811464 0.3106572702067674 0.3106572702067674 0.2951470761811464
31 31 0.30158824153259317 0.31330774028648145 0.31330774028648145 0.30158824153259317
32 32 0.3180875184494547 0.33210454757827634 0.33210454757827634 0.3180875184494547
33 33 0.32604023717225655 0.340503634089749 0.340503634089749 0.32604023717225655
34 34 0.3162922149083673 0.33568440892081625 0.33568440892081625 0.3162922149083673
35 35 0.31716183472339093 0.3545992807283562 0.3545992807283562 0.31716183472339093
36 36 0.3214298482703343 0.3575403386841057 0.3575403386841057 0.3214298482703343
37 37 0.32681546873349715 0.36503026576341707 0.36503026576341707 0.32681546873349715
38 38 0.32732304963529885 0.3738835074795801 0.3738835074795801 0.32732304963529885
39 39 0.32990135488218114 0.3929681262996284 0.3929681262996284 0.32990135488218114
40 40 0.32432743478528314 0.38848432563159185 0.38848432563159185 0.32432743478528314
41 41 0.3198436341172465 0.39163178463382314 0.39163178463382314 0.3198436341172465
42 42 0.32643375049241685 0.3860578645369252 0.3860578645369252 0.32643375049241685
43 43 0.32203761977337186 0.4053389048253979 0.4053389048253979 0.32203761977337186
44 44 0.3346466456087704 0.4217820126417848 0.4217820126417848 0.3346466456087704
45 45 0.33223356867673165 0.4257244699851672 0.4257244699851672 0.33223356867673165
46 46 0.33057959050289126 0.42988772845220063 0.42988772845220063 0.33057959050289126
47 47 0.34588619420359423 0.4342738007458362 0.4342738007458362 0.34588619420359423
48 48 0.3328354412937589 0.4416623097378058 0.4416623097378058 0.3328354412937589
49 49 0.33565227636252953 0.4426712673092602 0.4426712673092602 0.33565227636252953
50 50 0.35908519690010676 0.4440749104193141 0.4440749104193141 0.35908519690010676
51 51 0.3619904717574287 0.4456199850626709 0.4456199850626709 0.3619904717574287
52 52 0.36212543071422687 0.45074758794403463 0.45074758794403463 0.36212543071422687
53 53 0.3651696447072414 0.472336181881003 0.472336181881003 0.3651696447072414
54 54 0.3982691624564969 0.47730382490643575 0.47730382490643575 0.3982691624564969
55 55 0.4013572084387477 0.454200815600248 0.454200815600248 0.4013572084387477
56 56 0.4246326548929088 0.4565666319605046 0.4565666319605046 0.4246326548929088
57 57 0.44773608146478383 0.4522750157266781 0.4522750157266781 0.44773608146478383
58 58 0.45157671443203573 0.4549088773042353 0.4549088773042353 0.45157671443203573
59 59 0.4563204517812888 0.46145616231522185 0.46145616231522185 0.4563204517812888
60 60 0.46300257860702615 0.4641242358826516 0.4641242358826516 0.46300257860702615
61 61 0.4967563269695634 0.47006659021417746 0.47006659021417746 0.4967563269695634
62 62 0.4994732063134373 0.47352022985333136 0.47352022985333136 0.4994732063134373
63 63 0.49518159007961093 0.4775136217473302 0.4775136217473302 0.49518159007961093
64 64 0.4872143751031807 0.4821848224907804 0.4821848224907804 0.4872143751031807

View file

@ -0,0 +1,127 @@
,algorithm,k,min,mean,max
0,hierarchical,2,2,54.0,106
1,k_means,2,1,54.0,107
2,hierarchical,3,1,36.0,106
3,k_means,3,1,36.0,103
4,hierarchical,4,1,27.0,102
5,k_means,4,1,27.0,102
6,hierarchical,5,1,21.6,101
7,k_means,5,1,21.6,102
8,hierarchical,6,1,18.0,100
9,k_means,6,1,18.0,93
10,hierarchical,7,1,15.428571428571429,99
11,k_means,7,1,15.428571428571429,91
12,hierarchical,8,1,13.5,99
13,k_means,8,1,13.5,71
14,hierarchical,9,1,12.0,90
15,k_means,9,1,12.0,68
16,hierarchical,10,1,10.8,80
17,k_means,10,1,10.8,62
18,hierarchical,11,1,9.818181818181818,80
19,k_means,11,1,9.818181818181818,62
20,hierarchical,12,1,9.0,80
21,k_means,12,1,9.0,61
22,hierarchical,13,1,8.307692307692308,79
23,k_means,13,1,8.307692307692308,63
24,hierarchical,14,1,7.714285714285714,77
25,k_means,14,1,7.714285714285714,57
26,hierarchical,15,1,7.2,77
27,k_means,15,1,7.2,56
28,hierarchical,16,1,6.75,76
29,k_means,16,1,6.75,55
30,hierarchical,17,1,6.352941176470588,76
31,k_means,17,1,6.352941176470588,55
32,hierarchical,18,1,6.0,65
33,k_means,18,1,6.0,54
34,hierarchical,19,1,5.684210526315789,65
35,k_means,19,1,5.684210526315789,54
36,hierarchical,20,1,5.4,65
37,k_means,20,1,5.4,53
38,hierarchical,21,1,5.142857142857143,65
39,k_means,21,1,5.142857142857143,53
40,hierarchical,22,1,4.909090909090909,64
41,k_means,22,1,4.909090909090909,51
42,hierarchical,23,1,4.695652173913044,64
43,k_means,23,1,4.695652173913044,47
44,hierarchical,24,1,4.5,64
45,k_means,24,1,4.5,47
46,hierarchical,25,1,4.32,34
47,k_means,25,1,4.32,46
48,hierarchical,26,1,4.153846153846154,34
49,k_means,26,1,4.153846153846154,45
50,hierarchical,27,1,4.0,34
51,k_means,27,1,4.0,42
52,hierarchical,28,1,3.857142857142857,34
53,k_means,28,1,3.857142857142857,41
54,hierarchical,29,1,3.7241379310344827,34
55,k_means,29,1,3.7241379310344827,41
56,hierarchical,30,1,3.6,34
57,k_means,30,1,3.6,41
58,hierarchical,31,1,3.4838709677419355,34
59,k_means,31,1,3.4838709677419355,40
60,hierarchical,32,1,3.375,34
61,k_means,32,1,3.375,38
62,hierarchical,33,1,3.272727272727273,34
63,k_means,33,1,3.272727272727273,36
64,hierarchical,34,1,3.176470588235294,34
65,k_means,34,1,3.176470588235294,36
66,hierarchical,35,1,3.085714285714286,34
67,k_means,35,1,3.085714285714286,34
68,hierarchical,36,1,3.0,34
69,k_means,36,1,3.0,33
70,hierarchical,37,1,2.918918918918919,34
71,k_means,37,1,2.918918918918919,31
72,hierarchical,38,1,2.8421052631578947,34
73,k_means,38,1,2.8421052631578947,31
74,hierarchical,39,1,2.769230769230769,33
75,k_means,39,1,2.769230769230769,29
76,hierarchical,40,1,2.7,33
77,k_means,40,1,2.7,29
78,hierarchical,41,1,2.6341463414634148,33
79,k_means,41,1,2.6341463414634148,28
80,hierarchical,42,1,2.5714285714285716,33
81,k_means,42,1,2.5714285714285716,28
82,hierarchical,43,1,2.511627906976744,33
83,k_means,43,1,2.511627906976744,26
84,hierarchical,44,1,2.4545454545454546,33
85,k_means,44,1,2.4545454545454546,26
86,hierarchical,45,1,2.4,33
87,k_means,45,1,2.4,25
88,hierarchical,46,1,2.347826086956522,33
89,k_means,46,1,2.347826086956522,24
90,hierarchical,47,1,2.297872340425532,33
91,k_means,47,1,2.297872340425532,23
92,hierarchical,48,1,2.25,21
93,k_means,48,1,2.25,23
94,hierarchical,49,1,2.204081632653061,20
95,k_means,49,1,2.204081632653061,23
96,hierarchical,50,1,2.16,18
97,k_means,50,1,2.16,22
98,hierarchical,51,1,2.1176470588235294,17
99,k_means,51,1,2.1176470588235294,21
100,hierarchical,52,1,2.076923076923077,16
101,k_means,52,1,2.076923076923077,20
102,hierarchical,53,1,2.0377358490566038,16
103,k_means,53,1,2.0377358490566038,18
104,hierarchical,54,1,2.0,16
105,k_means,54,1,2.0,18
106,hierarchical,55,1,1.9636363636363636,16
107,k_means,55,1,1.9636363636363636,19
108,hierarchical,56,1,1.9285714285714286,16
109,k_means,56,1,1.9285714285714286,18
110,hierarchical,57,1,1.894736842105263,16
111,k_means,57,1,1.894736842105263,18
112,hierarchical,58,1,1.8620689655172413,16
113,k_means,58,1,1.8620689655172413,18
114,hierarchical,59,1,1.8305084745762712,16
115,k_means,59,1,1.8305084745762712,18
116,hierarchical,60,1,1.8,16
117,k_means,60,1,1.8,17
118,hierarchical,61,1,1.7704918032786885,16
119,k_means,61,1,1.7704918032786885,17
120,hierarchical,62,1,1.7419354838709677,16
121,k_means,62,1,1.7419354838709677,17
122,hierarchical,63,1,1.7142857142857142,16
123,k_means,63,1,1.7142857142857142,17
124,hierarchical,64,1,1.6875,16
125,k_means,64,1,1.6875,17
1 algorithm k min mean max
2 0 hierarchical 2 2 54.0 106
3 1 k_means 2 1 54.0 107
4 2 hierarchical 3 1 36.0 106
5 3 k_means 3 1 36.0 103
6 4 hierarchical 4 1 27.0 102
7 5 k_means 4 1 27.0 102
8 6 hierarchical 5 1 21.6 101
9 7 k_means 5 1 21.6 102
10 8 hierarchical 6 1 18.0 100
11 9 k_means 6 1 18.0 93
12 10 hierarchical 7 1 15.428571428571429 99
13 11 k_means 7 1 15.428571428571429 91
14 12 hierarchical 8 1 13.5 99
15 13 k_means 8 1 13.5 71
16 14 hierarchical 9 1 12.0 90
17 15 k_means 9 1 12.0 68
18 16 hierarchical 10 1 10.8 80
19 17 k_means 10 1 10.8 62
20 18 hierarchical 11 1 9.818181818181818 80
21 19 k_means 11 1 9.818181818181818 62
22 20 hierarchical 12 1 9.0 80
23 21 k_means 12 1 9.0 61
24 22 hierarchical 13 1 8.307692307692308 79
25 23 k_means 13 1 8.307692307692308 63
26 24 hierarchical 14 1 7.714285714285714 77
27 25 k_means 14 1 7.714285714285714 57
28 26 hierarchical 15 1 7.2 77
29 27 k_means 15 1 7.2 56
30 28 hierarchical 16 1 6.75 76
31 29 k_means 16 1 6.75 55
32 30 hierarchical 17 1 6.352941176470588 76
33 31 k_means 17 1 6.352941176470588 55
34 32 hierarchical 18 1 6.0 65
35 33 k_means 18 1 6.0 54
36 34 hierarchical 19 1 5.684210526315789 65
37 35 k_means 19 1 5.684210526315789 54
38 36 hierarchical 20 1 5.4 65
39 37 k_means 20 1 5.4 53
40 38 hierarchical 21 1 5.142857142857143 65
41 39 k_means 21 1 5.142857142857143 53
42 40 hierarchical 22 1 4.909090909090909 64
43 41 k_means 22 1 4.909090909090909 51
44 42 hierarchical 23 1 4.695652173913044 64
45 43 k_means 23 1 4.695652173913044 47
46 44 hierarchical 24 1 4.5 64
47 45 k_means 24 1 4.5 47
48 46 hierarchical 25 1 4.32 34
49 47 k_means 25 1 4.32 46
50 48 hierarchical 26 1 4.153846153846154 34
51 49 k_means 26 1 4.153846153846154 45
52 50 hierarchical 27 1 4.0 34
53 51 k_means 27 1 4.0 42
54 52 hierarchical 28 1 3.857142857142857 34
55 53 k_means 28 1 3.857142857142857 41
56 54 hierarchical 29 1 3.7241379310344827 34
57 55 k_means 29 1 3.7241379310344827 41
58 56 hierarchical 30 1 3.6 34
59 57 k_means 30 1 3.6 41
60 58 hierarchical 31 1 3.4838709677419355 34
61 59 k_means 31 1 3.4838709677419355 40
62 60 hierarchical 32 1 3.375 34
63 61 k_means 32 1 3.375 38
64 62 hierarchical 33 1 3.272727272727273 34
65 63 k_means 33 1 3.272727272727273 36
66 64 hierarchical 34 1 3.176470588235294 34
67 65 k_means 34 1 3.176470588235294 36
68 66 hierarchical 35 1 3.085714285714286 34
69 67 k_means 35 1 3.085714285714286 34
70 68 hierarchical 36 1 3.0 34
71 69 k_means 36 1 3.0 33
72 70 hierarchical 37 1 2.918918918918919 34
73 71 k_means 37 1 2.918918918918919 31
74 72 hierarchical 38 1 2.8421052631578947 34
75 73 k_means 38 1 2.8421052631578947 31
76 74 hierarchical 39 1 2.769230769230769 33
77 75 k_means 39 1 2.769230769230769 29
78 76 hierarchical 40 1 2.7 33
79 77 k_means 40 1 2.7 29
80 78 hierarchical 41 1 2.6341463414634148 33
81 79 k_means 41 1 2.6341463414634148 28
82 80 hierarchical 42 1 2.5714285714285716 33
83 81 k_means 42 1 2.5714285714285716 28
84 82 hierarchical 43 1 2.511627906976744 33
85 83 k_means 43 1 2.511627906976744 26
86 84 hierarchical 44 1 2.4545454545454546 33
87 85 k_means 44 1 2.4545454545454546 26
88 86 hierarchical 45 1 2.4 33
89 87 k_means 45 1 2.4 25
90 88 hierarchical 46 1 2.347826086956522 33
91 89 k_means 46 1 2.347826086956522 24
92 90 hierarchical 47 1 2.297872340425532 33
93 91 k_means 47 1 2.297872340425532 23
94 92 hierarchical 48 1 2.25 21
95 93 k_means 48 1 2.25 23
96 94 hierarchical 49 1 2.204081632653061 20
97 95 k_means 49 1 2.204081632653061 23
98 96 hierarchical 50 1 2.16 18
99 97 k_means 50 1 2.16 22
100 98 hierarchical 51 1 2.1176470588235294 17
101 99 k_means 51 1 2.1176470588235294 21
102 100 hierarchical 52 1 2.076923076923077 16
103 101 k_means 52 1 2.076923076923077 20
104 102 hierarchical 53 1 2.0377358490566038 16
105 103 k_means 53 1 2.0377358490566038 18
106 104 hierarchical 54 1 2.0 16
107 105 k_means 54 1 2.0 18
108 106 hierarchical 55 1 1.9636363636363636 16
109 107 k_means 55 1 1.9636363636363636 19
110 108 hierarchical 56 1 1.9285714285714286 16
111 109 k_means 56 1 1.9285714285714286 18
112 110 hierarchical 57 1 1.894736842105263 16
113 111 k_means 57 1 1.894736842105263 18
114 112 hierarchical 58 1 1.8620689655172413 16
115 113 k_means 58 1 1.8620689655172413 18
116 114 hierarchical 59 1 1.8305084745762712 16
117 115 k_means 59 1 1.8305084745762712 18
118 116 hierarchical 60 1 1.8 16
119 117 k_means 60 1 1.8 17
120 118 hierarchical 61 1 1.7704918032786885 16
121 119 k_means 61 1 1.7704918032786885 17
122 120 hierarchical 62 1 1.7419354838709677 16
123 121 k_means 62 1 1.7419354838709677 17
124 122 hierarchical 63 1 1.7142857142857142 16
125 123 k_means 63 1 1.7142857142857142 17
126 124 hierarchical 64 1 1.6875 16
127 125 k_means 64 1 1.6875 17

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

View file

@ -25,10 +25,16 @@ def intrapairs(path: str) -> set[set[str, str]]:
def main(): def main():
filelist = glob.glob(IN_DIR + '/*_groundtruth.csv') filelist = glob.glob(IN_DIR + '/*_groundtruth.csv')
df_table = pd.DataFrame(columns=pd.MultiIndex.from_tuples([
('KMeans', 'Precision'),
('KMeans', 'Recall'),
('Agglomerative', 'Precision'),
('Agglomerative', 'Recall')]))
df_table.index.name = 'Class Name'
for f in filelist: for f in filelist:
clazz_name = os.path.basename(f) clazz_name = os.path.basename(f)
clazz_name = clazz_name[:clazz_name.rfind('_groundtruth.csv')] clazz_name = clazz_name[:clazz_name.rfind('_groundtruth.csv')]
print(clazz_name)
ground_pairs = intrapairs(f) ground_pairs = intrapairs(f)
for method in ['kmeans', 'hierarchical']: for method in ['kmeans', 'hierarchical']:
@ -39,10 +45,15 @@ def main():
precision = n_common / len(cluster_pairs) precision = n_common / len(cluster_pairs)
recall = n_common / len(ground_pairs) recall = n_common / len(ground_pairs)
print(method + " precision: " + str(precision)) algo = 'KMeans' if method == 'kmeans' else 'Agglomerative'
print(method + " recall: " + str(recall))
print() df_table.loc[clazz_name, [(algo, 'Precision'), (algo, 'Recall')]] = [
str(round(precision * 100, 2)) + '%',
str(round(recall * 100, 2)) + '%'
]
df_table.columns = [x[0] + ' ' + x[1] for x in df_table.columns]
print(df_table.to_markdown())
if __name__ == '__main__': if __name__ == '__main__':

8
report/build.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/bash
set -e
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
cd "$SCRIPT_DIR"
pandoc main.md -o main.pdf

View file

@ -1,7 +1,7 @@
--- ---
author: Claudio Maggioni author: Claudio Maggioni
title: Information Modelling & Analysis -- Project 1 title: Information Modelling & Analysis -- Project 1
geometry: margin=2.5cm,bottom=3cm geometry: margin=2cm,bottom=3cm
--- ---
<!-- <!--
@ -9,59 +9,62 @@ The following shows a minimal submission report for project 1. If you
choose to use this template, replace all template instructions (the choose to use this template, replace all template instructions (the
yellow bits) with your own values. In addition, for any section, if yellow bits) with your own values. In addition, for any section, if
**and only if** anything was unclear or warnings were raised by the **and only if** anything was unclear or warnings were raised by the
code, and you had to take assumptions about the correct implementation code, and you had to take assumptions about the correct implementation (e.g.,
(e.g., about details of a metric), describe your assumptions in one or about details of a metric), describe your assumptions in one or two sentences.
two sentences.
You may - at your own risk - also choose not to use this template. As You may - at your own risk - also choose not to use this template. As long as
long as your submission is a latex-generated, English PDF containing all your submission is a latex-generated, English PDF containing all expected info,
expected info, you'll be fine. you'll be fine. -->
-->
# Code Repository # Code Repository
The code and result files part of this submission can be found at: The code and result files part of this submission can be found at:
::: center ::: center Repository:
Repository: \url{https://github.com/infoMA2023/project-01-god-classes-maggicl} \url{https://github.com/infoMA2023/project-01-god-classes-maggicl}
Commit ID: **TBD** Commit ID: **TBD** :::
:::
# Data Pre-Processing # Data Pre-Processing
## God Classes ## God Classes
The first part of the project requires to label some classes of the _Xerces_ The first part of the project requires to label some classes of the _Xerces_
project as "God classes" based on the number of methods each class has. project as "God classes" based on the number of methods each class has. From
here onwards the Java package prefix `org.apache.xerces` is omitted when discussing
fully qualified domain names of classes for sake of brevity.
Specifically, I label "God classes" the classes that have a number of methods Specifically, I label "God classes" the classes that have a number of methods
six times the standard deviation above the the mean number of methods, i.e. where six times the standard deviation above the the mean number of methods, i.e.
the condition where the condition
$$|M(C)| > \mu(M) + 6\sigma(M)$$ $$|M(C)| > \mu(M) + 6\sigma(M)$$
holds. holds.
To scan and compute the number of methods of each class I use the Python library `javalang`, which implements the Java AST and parser. The Python script To scan and compute the number of methods of each class I use the Python library
`javalang`, which implements the Java AST and parser. The Python script
`./find_god_classes.py` uses this library to parse each file in the project and `./find_god_classes.py` uses this library to parse each file in the project and
compute the number of methods of each class. Note that only non-constructor methods are counted (specifically the code counts the number of `method` nodes in each `ClassDeclaration` node). compute the number of methods of each class. Note that only non-constructor
methods are counted (specifically the code counts the number of `method` nodes
in each `ClassDeclaration` node).
Then, the script computes mean and standard Then, the script computes mean and standard deviation of the number of methods
deviation of the number of methods and filters the list of classes according to the and filters the list of classes according to the condition described above. The
condition described above. The file `god_classes/god_classes.csv` then is outputted file `god_classes/god_classes.csv` then is outputted listing all the god classes
listing all the god classes found. found.
The god classes I identified, and their corresponding number of methods The god classes I identified, and their corresponding number of methods can be
can be found in Table [1](#tab:god_classes){reference-type="ref" found in Table [1](#tab:god_classes){reference-type="ref"
reference="tab:god_classes"}. reference="tab:god_classes"}.
::: {#tab:god_classes} ::: {#tab:god_classes}
| **Class Name** | **# Methods** | | **Class Name** | **# Methods** |
|:------------------------------------------------|------------:| |:------------------------------------------------|------------:|
| org.apache.xerces.impl.xs.traversers.XSDHandler | 118 | | impl.xs.traversers.XSDHandler | 118 |
| org.apache.xerces.impl.dtd.DTDGrammar | 101 | | impl.dtd.DTDGrammar | 101 |
| org.apache.xerces.xinclude.XIncludeHandler | 116 | | xinclude.XIncludeHandler | 116 |
| org.apache.xerces.dom.CoreDocumentImpl | 125 | | dom.CoreDocumentImpl | 125 |
: Identified God Classes : Identified God Classes
::: :::
@ -70,84 +73,198 @@ reference="tab:god_classes"}.
## Feature Vectors ## Feature Vectors
In this part of the project we produce the feature vectors used to later cluster In this part of the project we produce the feature vectors used to later cluster
the methods of each God class into separate clusters. We produce one feature method per the methods of each God class into separate clusters. We produce one feature
non-constructor Java method in each god class. method per non-constructor Java method in each god class.
The columns of each vector represent The columns of each vector represent fields and methods referenced by each
fields and methods referenced by each method, i.e. fields and methods actively used by the method in their method's body. method, i.e. fields and methods actively used by the method in their method's
body.
When analyzing references to fields, additional constraints need to be specified to handle edge cases. When analyzing references to fields, additional constraints need to be specified
Namely, a field's property may be referenced (e.g. an access to array `a` may fetch its `length` property, i.e. `a.length`). In this to handle edge cases. Namely, a field's property may be referenced (e.g. an
cases I consider the qualifier (i.e. the field itself, `a`) itself and not its property. When the qualifier is a class (i.e. access to array `a` may fetch its `length` property, i.e. `a.length`). In this
the code references a property of another class, e.g. `Integer.MAX_VALUE`) we consider the class name itself (i.e. `Integer`) and not cases I consider the qualifier (i.e. the field itself, `a`) itself and not its
the name of the property. Should the qualifier be a subproperty itself (e.g. in `a.b.c`, where `a.b` would be the qualifier according to `javalang`) property. When the qualifier is a class (i.e. the code references a property of
another class, e.g. `Integer.MAX_VALUE`) we consider the class name itself (i.e.
`Integer`) and not the name of the property. Should the qualifier be a
subproperty itself (e.g. in `a.b.c`, where `a.b` would be the qualifier
according to `javalang`)
For methods, I only consider calls to methods of the class itself where the qualifier is unspecified or `this`. Calls to parent methods For methods, I only consider calls to methods of the class itself where the
(i.e. calls like `super.something()`) are not considered. qualifier is unspecified or `this`. Calls to parent methods (i.e. calls like
`super.something()`) are not considered.
The feature vector extraction phase is performed by the Python script `extract_feature_vectors.py`. The script takes `god_classes/god_classes.csv` as input The feature vector extraction phase is performed by the Python script
and loads the AST of each class listed in it. Then, a list of all the fields and methods in the class is built, and each method is scanned to see which fields `extract_feature_vectors.py`. The script takes `god_classes/god_classes.csv` as
and methods it references in its body according to the previously described rules. Then, a CSV per class is built storing all feature vectors. Each file has a name matching to the FQDN (Fully-qualified domain name) of the class. Each CSV row refers to a method in the class, and each CSV column refers to a field, method or referenced class. A cell has the value of 1 when the method of that row references the field, method or class marked by that column, and it has the value 0 otherwise. Columns with only zeros are omitted. input and loads the AST of each class listed in it. Then, a list of all the
fields and methods in the class is built, and each method is scanned to see
which fields and methods it references in its body according to the previously
described rules. Then, a CSV per class is built storing all feature vectors.
Each file has a name matching to the FQDN (Fully-qualified domain name) of the
class. Each CSV row refers to a method in the class, and each CSV column refers
to a field, method or referenced class. A cell has the value of 1 when the
method of that row references the field, method or class marked by that column,
and it has the value 0 otherwise. Columns with only zeros are omitted.
Table [2](#tab:feat_vec){reference-type="ref" reference="tab:feat_vec"} Table [2](#tab:feat_vec){reference-type="ref" reference="tab:feat_vec"} shows
shows aggregate numbers regarding the extracted feature vectors for the aggregate numbers regarding the extracted feature vectors for the god classes.
god classes. Note that the number of attributes refers to the number of fields, methods or classes actually references (i.e. the number of columns after omission of 0s). Note that the number of attributes refers to the number of fields, methods or
classes actually references (i.e. the number of columns after omission of 0s).
::: {#tab:feat_vec} ::: {#tab:feat_vec}
| **Class Name** | **# Feature Vectors** | **# Attributes\*** | | **Class Name** | **# Feature Vectors** | **# Attributes\*** |
|:------------------------------------------------|----------------------:|-----------------:| |:------------------------------------------------|----------------------:|-----------------:|
| org.apache.xerces.impl.xs.traversers.XSDHandler | 106 | 183 | | impl.xs.traversers.XSDHandler | 106 | 183 |
| org.apache.xerces.impl.dtd.DTDGrammar | 91 | 106 | | impl.dtd.DTDGrammar | 91 | 106 |
| org.apache.xerces.xinclude.XIncludeHandler | 108 | 143 | | xinclude.XIncludeHandler | 108 | 143 |
| org.apache.xerces.dom.CoreDocumentImpl | 117 | 63 | | dom.CoreDocumentImpl | 117 | 63 |
: Feature vector summary (\*= used at least once) : Feature vector summary (\*= used at least once)
::: :::
# Clustering {#sec:clustering} # Clustering {#sec:clustering}
In this section I covering the techniques to cluster the methods of each god
class. The project aims to use KMeans clustering and agglomerative hierarchical
clustering to group these methods toghether in cohesive units which could be
potentially refactored out of the god class they belong to.
## Algorithm Configurations ## Algorithm Configurations
Report/comment the algorithm configurations (distance function, linkage To perform KMeans clustering, I use the `cluster.KMeans` Scikit-Learn
rule, etc.). You may do so in any form you feel suited, but a short implementation of the algorithm. I use the default parameters: feature vectors
paragraph of text is probably sufficient. are compared with euclidian distance, centroids are used instead of medioids,
and the initial centroids are computed with the greedy algorithm `kmeans++`. The
random seed is fixed to $0$ to allow for reproducibility between executions of
the clustering script.
To perform Hierarchical clustering, I use the `cluster.AgglomerativeClustering`
Scikit-Learn implementation of the algorithm. Again feature vectors are
compared with euclidian distance, but as a linkage metric I choose to use
complete linkage. As agglomerative clustering is deternministic, no random seed
is needed for this algorithm.
I run the two algorithms for all $k \in [2,65]$, or if less than 65 feature
vectors with distinct values are assigned to the god class, the upper bound of
$k$ is such value.
## Testing Various K & Silhouette Scores ## Testing Various K & Silhouette Scores
\(1\) Report data about the clusters produced by the two algorithms at To find the optimal value of $k$ for both algorithms, the distribution of
various k (#clusters, size of clusters, silhouette scores). You may use cluster sizes and silhouette across values of $k$, and to apply the optimal
any suitable format (table, graph, \...). clustering for each god class I run the command:
\(2\) Briefly comment your results. What is the best configuration, and ```shell
why? Anything else you observed? ./silhouette.py --validate --autorun
```
Feature vectors are read from the `feature_vectors` directory and all the
results are stored in the `clustering` directory.
Figures [1](#fig:xsd){reference-type="ref" reference="fig:xsd"},
[2](#fig:dtd){reference-type="ref" reference="fig:dtd"},
[3](#fig:xinc){reference-type="ref" reference="fig:xinc"}, and
[4](#fig:cimpl){reference-type="ref" reference="fig:cimpl"} show the
distributions of cluster sizes for each god class obtained by running the KMeans
and agglomerative clustering algorithm as described in the previous sections.
For all god classes, the mean of number of elements in each cluster
exponentially decreases as $k$ increases. Aside the first values of $k$ for
class `DTDGrammar` (where it was 2), the minimum cluster size was 1 for all
analyzed clusterings. Conversely, the maximum cluster size varies a lot, almost
always being monotonically non increasing as $k$ increases, occasionally forming
wide plateaus. The silhouette metric distribution instead generally follows a
dogleg-like path, sharply decreasing for the first values of $k$ and slowly
increasing afterwards $k$. This leads the choice of the optimal $k$ number of
clusters for each algorithm to be between really low and really high values.
The figures also show the distribution of the silhouette metric per algorithm
and per value of $k$. The optimal values of $k$ and the respective silhouette
values for each implementation are reported in Table
[3](#tab:sumup){reference-type="ref" reference="tab:sumup"}.
From the values we can gather that agglomerative clustering performs overall
better than KMeans for the god classes in the project. Almost god classes are
optimally clustered with few clusters, with the exception of `CoreDocumentImpl`
being optimally clustered with unit clusters. This could indicate higher
cohesion between implementation details of the other classes, and lower cohesion
in `CoreDocumentImpl` (given the name it would not be surprising if this class
plays the role of an utility class of sort, combining lots of implementation
details affecting different areas of the code).
Agglomerative clustering with complete linkage could perform better than KMeans
due to a more urgent need for separation rather than cohesion in the classes
that were analyzed. Given the high dimensionality of the feature vectures used,
and the fact that eucledian distance is used to compare feature vectors, the
hyper-space of method features for each god class is likely sparse, with
occasional clusters of tightly-knit features. Given the prevailing sparsity,
complete linkage could be suitable here since it avoids to agglomerate distant
clusters above all.
![Clustering metrics for class impl.xs.traversers.XSDHandler](../clustering/org.apache.xerces.impl.xs.traversers.XSDHandler_stats.png){#fig:xsd}
![Clustering metrics for class impl.dtd.DTDGrammar](../clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.png){#fig:dtd}
![Clustering metrics for class xinclude.XIncludeHandler](../clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.png){#fig:xinc}
![Clustering metrics for class dom.CoreDocumentImpl](../clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.png){#fig:cimpl}
::: {#tab:sumup}
| **Class Name** | **KMeans K** | **KMeans silhouette** | **Hierarchical K** | **Hierarchical silhouette** |
|:------------- --------------|-----------:|--------------------:|-----------------:|--------------------------:|
| dom.CoreDocumentImpl | 45 |0.7290 | 45 | 0.7290 |
| impl.xs.traversers.XSDHandler | 2 |0.5986 | 3 | 0.5989 |
| impl.dtd.DTDGrammar | 58 |0.3980 | 2 | 0.4355 |
| xinclude.XIncludeHandler | 2 |0.6980 | 2 | 0.6856 |
: Optimal hyperparameters and corresponding silhouette metrics for KMeans and
Hierarchical clustering algorithm.
:::
# Evaluation # Evaluation
## Ground Truth ## Ground Truth
I computed the ground truth using the command \.... The generated files I computed the ground truth using the Python script `./ground_truth.py` The
are checked into the repository with the names \.... generated files are checked into the repository with the names
`clustering/{className}_groundtruth.csv` where `{className}` is the FQDN of each
god class.
Comment briefly on the strengths & weaknesses of our ground truth. The ground truth in this project is not given but generated according to simple
heuristics. Since no inherent structure or labelling from experts exists to
group the methods in each god class, the project requires to label methods based
on keyword matching whitin each method name. The list of keywords used can be
found in `keyword_list.txt`. This approach allows to have a ground truth at all
with little computational cost and labelling effort, but it assumes the method
name and the chosen keywords are indeed of enough significance to form a
meaningful clustering of methods that form refactorable cohesive units of
functionality.
## Precision and Recall ## Precision and Recall
::: {#tab:eval} ::: {#tab:eval}
---------------- ------------------- -------- ------------- -------- | **Class Name** | **KMeans Precision** | **KMeans Recall** | **Agglomerative Precision** | **Agglomerative Recall** |
**Class Name** **Agglomerative** **K-Means** |:------------------------------------------------|-------------------:|----------------:|--------------------------:|-----------------------:|
Prec. Recall Prec. Recall | xinclude.XIncludeHandler | 69.83% | 97.80% | 69.58% | 95.65% |
\... \... \... \... \... | dom.CoreDocumentImpl | 64.80% | 28.26% | 68.11% | 29.70% |
---------------- ------------------- -------- ------------- -------- | impl.xs.traversers.XSDHandler | 36.17% | 97.24% | 36.45% | 96.11% |
| impl.dtd.DTDGrammar | 87.65% | 6.87% | 52.21% | 94.28% |
: Evaluation Summary : Evaluation Summary
::: :::
Precision and Recall, for the optimal configurations found in Section Precision and Recall, for the optimal configurations found in Section
[3](#sec:clustering){reference-type="ref" reference="sec:clustering"}, [3](#sec:clustering){reference-type="ref" reference="sec:clustering"}, are
are reported in Table [3](#tab:eval){reference-type="ref" reported in Table [4](#tab:eval){reference-type="ref" reference="tab:eval"}.
reference="tab:eval"}.
\begin{center}
\color{red} comment precision and recall values
\end{center}
## Practical Usefulness ## Practical Usefulness
Discuss the practical usefulness of the obtained code refactoring \begin{center}
assistant in a realistic setting (1 paragraph). \color{red}Discuss the practical usefulness of the obtained code refactoring assistant in a
realistic setting (1 paragraph).
\end{center}

Binary file not shown.

View file

@ -8,6 +8,9 @@ import pandas as pd
import argparse import argparse
from k_means import cluster_kmeans from k_means import cluster_kmeans
from hierarchical import cluster_hierarchical from hierarchical import cluster_hierarchical
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
DIR: str = os.path.dirname(os.path.realpath(__file__)) DIR: str = os.path.dirname(os.path.realpath(__file__))
OUT_DIR: str = DIR + '/clustering' OUT_DIR: str = DIR + '/clustering'
@ -20,47 +23,91 @@ def clean_output():
filelist = glob.glob(OUT_DIR + '/*_silhouette.csv') filelist = glob.glob(OUT_DIR + '/*_silhouette.csv')
for f in filelist: for f in filelist:
os.remove(f) os.remove(f)
filelist = glob.glob(OUT_DIR + '/*.png')
for f in filelist:
os.remove(f)
def validate(path: str, clazz_name: str, autorun: bool): def validate(path: str, clazz_name: str, autorun: bool, df_table):
df = pd.DataFrame(columns=['k_means', 'hierarchical'], dtype=float) df = pd.DataFrame(columns=['k_means', 'hierarchical'], dtype=float)
df_stats = pd.DataFrame(columns=['algorithm', 'k', 'min', 'mean', 'max'])
def add_stat(algo: str, k: int, Y: any, i: int):
y_occurs = list(Counter(Y).values()) # count number of elements in each cluster
df_stats.loc[i, :] = [algo, k, np.min(y_occurs), np.mean(y_occurs), np.max(y_occurs)]
# We bound the number of clusters by the number of distinct points in our dataset. # We bound the number of clusters by the number of distinct points in our dataset.
# To count them, we compute the number of "distinct" feature vectors and we # To count them, we compute the number of "distinct" feature vectors and we
# bound to the minimum of K_MAX and this number. # bound to the minimum of K_MAX and this number.
nodup = pd.read_csv(path, index_col=0).drop_duplicates() nodup = pd.read_csv(path, index_col=0).drop_duplicates()
max_distinct = len(nodup) max_distinct = len(nodup)
print("Max distinct:", max_distinct) limit = min(K_MAX, max_distinct)
for n in range(2, min(K_MAX, max_distinct)): i: int = 0
for n in range(2, limit):
X_h, Y_h = cluster_hierarchical(path, n, save_to_disk=False) X_h, Y_h = cluster_hierarchical(path, n, save_to_disk=False)
df.loc[n, 'k_means'] = silhouette_score(X_h, Y_h) add_stat('hierarchical', n, Y_h, i)
i += 1
df.loc[n, 'hierarchical'] = silhouette_score(X_h, Y_h)
X_k, Y_k = cluster_kmeans(path, n, save_to_disk=False) X_k, Y_k = cluster_kmeans(path, n, save_to_disk=False)
df.loc[n, 'hierarchical'] = silhouette_score(X_k, Y_k) add_stat('k_means', n, Y_k, i)
i += 1
df.loc[n, 'k_means'] = silhouette_score(X_k, Y_k)
k_kmeans = df[['k_means']].idxmax()[0] k_kmeans = df[['k_means']].idxmax()[0]
k_hierarchical = df[['hierarchical']].idxmax()[0] k_hierarchical = df[['hierarchical']].idxmax()[0]
print("K_means optimal value: " + str(k_kmeans)) df_table.loc[clazz_name] = [k_kmeans, 0, k_hierarchical, 0]
print("Hierarchical optimal value: " + str(k_hierarchical))
df.to_csv(OUT_DIR + '/' + clazz_name + '_silhouette.csv') df.to_csv(OUT_DIR + '/' + clazz_name + '_silhouette.csv')
df_stats.to_csv(OUT_DIR + '/' + clazz_name + '_stats.csv')
if autorun: if autorun:
cluster_hierarchical(path, k_hierarchical) cluster_hierarchical(path, k_hierarchical)
cluster_kmeans(path, k_kmeans) cluster_kmeans(path, k_kmeans)
# Plot stats
sns.set_theme(palette="hls")
# Initialize the matplotlib figure
f = plt.figure(figsize=(14, 12))
gs = f.add_gridspec(2, 2)
ax1 = f.add_subplot(gs[0, 0])
ax2 = f.add_subplot(gs[0, 1])
ax3 = f.add_subplot(gs[1, :])
df_k = df_stats.loc[df_stats.algorithm == 'k_means', ['k', 'min', 'mean', 'max']].set_index('k', drop=True)
df_h = df_stats.loc[df_stats.algorithm == 'hierarchical', ['k', 'min', 'mean', 'max']].set_index('k', drop=True)
sns.lineplot(data=df_k, palette="tab10", ax=ax1)
sns.lineplot(data=df_h, palette="tab10", ax=ax2)
sns.lineplot(data=df, palette="tab10", ax=ax3)
# Add a legend and informative axis label
ax1.set(ylabel="# of elements", ylim=[0, 130], xlabel="# of clusters", xlim=[2, limit])
ax1.set_title("K-Means cluster sizes")
ax2.set(ylabel="# of elements", ylim=[0, 130], xlabel="# of clusters", xlim=[2, limit])
ax2.set_title("Hierarchical cluster sizes")
ax3.set(ylabel="Silhouette", ylim=[0, 1], xlabel="# of clusters", xlim=[2, limit])
ax3.set_title("Silhouette metrics per # of clusters")
sns.despine(left=True, bottom=True)
f.savefig(OUT_DIR + '/' + clazz_name + '_stats.png')
plt.clf()
def compute_silhouette(path: str, clazz_name: str, suffix: str): def compute_silhouette(path: str, clazz_name: str, suffix: str) -> float:
df_y = pd.read_csv(OUT_DIR + '/' + clazz_name + '_' + suffix + '.csv') df_y = pd.read_csv(OUT_DIR + '/' + clazz_name + '_' + suffix + '.csv')
Y = df_y.iloc[:, 1].values Y = df_y.iloc[:, 1].values
df = pd.read_csv(path) df = pd.read_csv(path)
X = df.drop(df.columns[0], axis=1).to_numpy() X = df.drop(df.columns[0], axis=1).to_numpy()
print("Silhouette for " + suffix + ": " + str(silhouette_score(X, Y))) s = round(silhouette_score(X, Y), 4)
print("Silhouette for " + suffix + ": " + str(s))
return s
def main(): def main():
@ -70,26 +117,30 @@ def main():
parser.add_argument('--autorun', action='store_true', parser.add_argument('--autorun', action='store_true',
help='if validating, computes CSV for optimal clustering automatically') help='if validating, computes CSV for optimal clustering automatically')
args = parser.parse_args() args = parser.parse_args()
if args.validate: if args.validate:
clean_output() clean_output()
df_table = pd.DataFrame(columns=['KMeans K', 'KMeans silhouette', 'Hierarchical K', 'Hierarchical silhouette'])
filelist = glob.glob(IN_DIR + '/*.csv') filelist = glob.glob(IN_DIR + '/*.csv')
for f in filelist: for f in filelist:
clazz_name = os.path.basename(f) clazz_name = os.path.basename(f)
clazz_name = clazz_name[:clazz_name.rfind('.')] clazz_name = clazz_name[:clazz_name.rfind('.')]
print(clazz_name) if args.validate:
validate(f, clazz_name, args.autorun, df_table)
sk = compute_silhouette(f, clazz_name, 'kmeans')
sh = compute_silhouette(f, clazz_name, 'hierarchical')
if args.validate: if args.validate:
validate(f, clazz_name, args.autorun) df_table.loc[clazz_name, 'KMeans silhouette'] = sk
df_table.loc[clazz_name, 'Hierarchical silhouette'] = sh
compute_silhouette(f, clazz_name, 'kmeans') df_table.index.name = 'Class Name'
compute_silhouette(f, clazz_name, 'hierarchical') print(df_table.to_markdown())
print()
if __name__ == '__main__': if __name__ == '__main__':