report almost done
This commit is contained in:
parent
59904646fa
commit
302f53f5ea
21 changed files with 1159 additions and 502 deletions
|
@ -1,45 +1,45 @@
|
||||||
,k_means,hierarchical
|
,k_means,hierarchical
|
||||||
2,0.7008424223503156,0.3958383820498147
|
2,0.3958383820498147,0.7008424223503156
|
||||||
3,0.5710705368479275,0.4083355324423938
|
3,0.4083355324423938,0.5710705368479275
|
||||||
4,0.5612355754261723,0.4894345431495262
|
4,0.4894345431495262,0.5612355754261723
|
||||||
5,0.45447105698494905,0.49390485171067744
|
5,0.49390485171067744,0.45447105698494905
|
||||||
6,0.4542950961743021,0.49437178337314974
|
6,0.49437178337314974,0.4542950961743021
|
||||||
7,0.5169337345938171,0.4996628258355101
|
7,0.4996628258355101,0.5169337345938171
|
||||||
8,0.5048012323625627,0.504680719000111
|
8,0.504680719000111,0.5048012323625627
|
||||||
9,0.4981437021345769,0.5104029882614454
|
9,0.5104029882614454,0.4981437021345769
|
||||||
10,0.514873610056946,0.39391549620101274
|
10,0.39391549620101274,0.514873610056946
|
||||||
11,0.4397616290614397,0.35593829934237226
|
11,0.35593829934237226,0.4397616290614397
|
||||||
12,0.3966368345309925,0.3965649809723018
|
12,0.3965649809723018,0.3966368345309925
|
||||||
13,0.40515142998089104,0.4035942512051252
|
13,0.4035942512051252,0.40515142998089104
|
||||||
14,0.40783453521401053,0.41018624058063885
|
14,0.41018624058063885,0.40783453521401053
|
||||||
15,0.4239033913796109,0.45557751119565765
|
15,0.45557751119565765,0.4239033913796109
|
||||||
16,0.42065530265413026,0.47640709656766556
|
16,0.47640709656766556,0.42065530265413026
|
||||||
17,0.44344469866152514,0.4974425160835303
|
17,0.4974425160835303,0.44344469866152514
|
||||||
18,0.4400719065542468,0.5290487299051633
|
18,0.5290487299051633,0.4400719065542468
|
||||||
19,0.44608395823875535,0.5485454650471248
|
19,0.5485454650471248,0.44608395823875535
|
||||||
20,0.44877269935654723,0.5586056973417746
|
20,0.5586056973417746,0.44877269935654723
|
||||||
21,0.48118392208651517,0.5385866967307906
|
21,0.5385866967307906,0.48118392208651517
|
||||||
22,0.48389798035280496,0.538222592035968
|
22,0.538222592035968,0.48389798035280496
|
||||||
23,0.48663428414368126,0.550727295003801
|
23,0.550727295003801,0.48663428414368126
|
||||||
24,0.5087496231379599,0.5729072600132372
|
24,0.5729072600132372,0.5087496231379599
|
||||||
25,0.5308958702007723,0.5954078415061489
|
25,0.5954078415061489,0.5308958702007723
|
||||||
26,0.533742178035476,0.6182602907647171
|
26,0.6182602907647171,0.533742178035476
|
||||||
27,0.5366335268898433,0.6415000474402278
|
27,0.6415000474402278,0.5366335268898433
|
||||||
28,0.5688721496510291,0.6464201697751911
|
28,0.6464201697751911,0.5688721496510291
|
||||||
29,0.5718756117789308,0.6701951689242575
|
29,0.6701951689242575,0.5718756117789308
|
||||||
30,0.5749678644659783,0.6738663960033637
|
30,0.6738663960033637,0.5749678644659783
|
||||||
31,0.5975986067541601,0.6776704976739869
|
31,0.6776704976739869,0.5975986067541601
|
||||||
32,0.6010454124801283,0.6759936834928909
|
32,0.6759936834928909,0.6010454124801283
|
||||||
33,0.6047324451505658,0.6780082327270405
|
33,0.6780082327270405,0.6047324451505658
|
||||||
34,0.6087467116081876,0.6819745883778254
|
34,0.6819745883778254,0.6087467116081876
|
||||||
35,0.613131689815019,0.6860900076219251
|
35,0.6860900076219251,0.613131689815019
|
||||||
36,0.6386708325196511,0.6921336553243742
|
36,0.6921336553243742,0.6386708325196511
|
||||||
37,0.6449490032291169,0.6964066920515507
|
37,0.6964066920515507,0.6449490032291169
|
||||||
38,0.6764810977640761,0.6932020971027025
|
38,0.6932020971027025,0.6764810977640761
|
||||||
39,0.6773895830074159,0.6977143227629022
|
39,0.6977143227629022,0.6773895830074159
|
||||||
40,0.6918179479278735,0.7024070374495096
|
40,0.7024070374495096,0.6918179479278735
|
||||||
41,0.6964034645667346,0.7072912915110808
|
41,0.7072912915110808,0.6964034645667346
|
||||||
42,0.7024471122691838,0.712379056158551
|
42,0.712379056158551,0.7024471122691838
|
||||||
43,0.7256701207957181,0.7176833214293175
|
43,0.7176833214293175,0.7256701207957181
|
||||||
44,0.7230610997944976,0.7232182069292477
|
44,0.7232182069292477,0.7230610997944976
|
||||||
45,0.7289990873402858,0.7289990873402857
|
45,0.7289990873402857,0.7289990873402858
|
||||||
|
|
|
89
clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.csv
Normal file
89
clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.csv
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
,algorithm,k,min,mean,max
|
||||||
|
0,hierarchical,2,1,58.5,116
|
||||||
|
1,k_means,2,1,58.5,116
|
||||||
|
2,hierarchical,3,1,39.0,113
|
||||||
|
3,k_means,3,1,39.0,115
|
||||||
|
4,hierarchical,4,1,29.25,113
|
||||||
|
5,k_means,4,1,29.25,98
|
||||||
|
6,hierarchical,5,1,23.4,111
|
||||||
|
7,k_means,5,1,23.4,98
|
||||||
|
8,hierarchical,6,1,19.5,111
|
||||||
|
9,k_means,6,1,19.5,98
|
||||||
|
10,hierarchical,7,1,16.714285714285715,97
|
||||||
|
11,k_means,7,1,16.714285714285715,98
|
||||||
|
12,hierarchical,8,1,14.625,97
|
||||||
|
13,k_means,8,1,14.625,98
|
||||||
|
14,hierarchical,9,1,13.0,96
|
||||||
|
15,k_means,9,1,13.0,97
|
||||||
|
16,hierarchical,10,1,11.7,96
|
||||||
|
17,k_means,10,1,11.7,92
|
||||||
|
18,hierarchical,11,1,10.636363636363637,93
|
||||||
|
19,k_means,11,1,10.636363636363637,89
|
||||||
|
20,hierarchical,12,1,9.75,86
|
||||||
|
21,k_means,12,1,9.75,84
|
||||||
|
22,hierarchical,13,1,9.0,84
|
||||||
|
23,k_means,13,1,9.0,83
|
||||||
|
24,hierarchical,14,1,8.357142857142858,84
|
||||||
|
25,k_means,14,1,8.357142857142858,83
|
||||||
|
26,hierarchical,15,1,7.8,84
|
||||||
|
27,k_means,15,1,7.8,77
|
||||||
|
28,hierarchical,16,1,7.3125,84
|
||||||
|
29,k_means,16,1,7.3125,75
|
||||||
|
30,hierarchical,17,1,6.882352941176471,79
|
||||||
|
31,k_means,17,1,6.882352941176471,73
|
||||||
|
32,hierarchical,18,1,6.5,79
|
||||||
|
33,k_means,18,1,6.5,70
|
||||||
|
34,hierarchical,19,1,6.157894736842105,46
|
||||||
|
35,k_means,19,1,6.157894736842105,70
|
||||||
|
36,hierarchical,20,1,5.85,46
|
||||||
|
37,k_means,20,1,5.85,70
|
||||||
|
38,hierarchical,21,1,5.571428571428571,46
|
||||||
|
39,k_means,21,1,5.571428571428571,70
|
||||||
|
40,hierarchical,22,1,5.318181818181818,46
|
||||||
|
41,k_means,22,1,5.318181818181818,70
|
||||||
|
42,hierarchical,23,1,5.086956521739131,46
|
||||||
|
43,k_means,23,1,5.086956521739131,68
|
||||||
|
44,hierarchical,24,1,4.875,46
|
||||||
|
45,k_means,24,1,4.875,66
|
||||||
|
46,hierarchical,25,1,4.68,46
|
||||||
|
47,k_means,25,1,4.68,64
|
||||||
|
48,hierarchical,26,1,4.5,46
|
||||||
|
49,k_means,26,1,4.5,62
|
||||||
|
50,hierarchical,27,1,4.333333333333333,46
|
||||||
|
51,k_means,27,1,4.333333333333333,60
|
||||||
|
52,hierarchical,28,1,4.178571428571429,46
|
||||||
|
53,k_means,28,1,4.178571428571429,60
|
||||||
|
54,hierarchical,29,1,4.0344827586206895,46
|
||||||
|
55,k_means,29,1,4.0344827586206895,58
|
||||||
|
56,hierarchical,30,1,3.9,46
|
||||||
|
57,k_means,30,1,3.9,57
|
||||||
|
58,hierarchical,31,1,3.774193548387097,46
|
||||||
|
59,k_means,31,1,3.774193548387097,56
|
||||||
|
60,hierarchical,32,1,3.65625,46
|
||||||
|
61,k_means,32,1,3.65625,56
|
||||||
|
62,hierarchical,33,1,3.5454545454545454,46
|
||||||
|
63,k_means,33,1,3.5454545454545454,56
|
||||||
|
64,hierarchical,34,1,3.4411764705882355,46
|
||||||
|
65,k_means,34,1,3.4411764705882355,55
|
||||||
|
66,hierarchical,35,1,3.342857142857143,46
|
||||||
|
67,k_means,35,1,3.342857142857143,54
|
||||||
|
68,hierarchical,36,1,3.25,46
|
||||||
|
69,k_means,36,1,3.25,54
|
||||||
|
70,hierarchical,37,1,3.1621621621621623,46
|
||||||
|
71,k_means,37,1,3.1621621621621623,53
|
||||||
|
72,hierarchical,38,1,3.0789473684210527,46
|
||||||
|
73,k_means,38,1,3.0789473684210527,53
|
||||||
|
74,hierarchical,39,1,3.0,46
|
||||||
|
75,k_means,39,1,3.0,52
|
||||||
|
76,hierarchical,40,1,2.925,46
|
||||||
|
77,k_means,40,1,2.925,51
|
||||||
|
78,hierarchical,41,1,2.8536585365853657,46
|
||||||
|
79,k_means,41,1,2.8536585365853657,50
|
||||||
|
80,hierarchical,42,1,2.7857142857142856,46
|
||||||
|
81,k_means,42,1,2.7857142857142856,49
|
||||||
|
82,hierarchical,43,1,2.7209302325581395,46
|
||||||
|
83,k_means,43,1,2.7209302325581395,48
|
||||||
|
84,hierarchical,44,1,2.659090909090909,46
|
||||||
|
85,k_means,44,1,2.659090909090909,47
|
||||||
|
86,hierarchical,45,1,2.6,46
|
||||||
|
87,k_means,45,1,2.6,46
|
|
BIN
clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.png
Normal file
BIN
clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
|
@ -1,92 +1,92 @@
|
||||||
,cluster
|
,cluster
|
||||||
getGrammarDescription,5
|
getGrammarDescription,0
|
||||||
getElementDeclIsExternal,0
|
getElementDeclIsExternal,0
|
||||||
getAttributeDeclIsExternal,0
|
getAttributeDeclIsExternal,0
|
||||||
getAttributeDeclIndex,42
|
getAttributeDeclIndex,0
|
||||||
startDTD,2
|
startDTD,0
|
||||||
startParameterEntity,6
|
startParameterEntity,0
|
||||||
startExternalSubset,41
|
startExternalSubset,0
|
||||||
endParameterEntity,6
|
endParameterEntity,0
|
||||||
endExternalSubset,41
|
endExternalSubset,0
|
||||||
elementDecl,31
|
elementDecl,1
|
||||||
attributeDecl,39
|
attributeDecl,1
|
||||||
internalEntityDecl,40
|
internalEntityDecl,0
|
||||||
externalEntityDecl,40
|
externalEntityDecl,0
|
||||||
unparsedEntityDecl,40
|
unparsedEntityDecl,0
|
||||||
notationDecl,38
|
notationDecl,0
|
||||||
endDTD,8
|
endDTD,0
|
||||||
setDTDSource,23
|
setDTDSource,0
|
||||||
getDTDSource,23
|
getDTDSource,0
|
||||||
textDecl,5
|
textDecl,0
|
||||||
comment,5
|
comment,0
|
||||||
processingInstruction,5
|
processingInstruction,0
|
||||||
startAttlist,5
|
startAttlist,0
|
||||||
endAttlist,5
|
endAttlist,0
|
||||||
startConditional,5
|
startConditional,0
|
||||||
ignoredCharacters,5
|
ignoredCharacters,0
|
||||||
endConditional,5
|
endConditional,0
|
||||||
setDTDContentModelSource,11
|
setDTDContentModelSource,0
|
||||||
getDTDContentModelSource,11
|
getDTDContentModelSource,0
|
||||||
startContentModel,33
|
startContentModel,0
|
||||||
startGroup,53
|
startGroup,0
|
||||||
pcdata,51
|
pcdata,0
|
||||||
element,34
|
element,0
|
||||||
separator,44
|
separator,0
|
||||||
occurrence,32
|
occurrence,0
|
||||||
endGroup,35
|
endGroup,0
|
||||||
any,5
|
any,0
|
||||||
empty,5
|
empty,0
|
||||||
endContentModel,5
|
endContentModel,0
|
||||||
isNamespaceAware,5
|
isNamespaceAware,0
|
||||||
getSymbolTable,52
|
getSymbolTable,0
|
||||||
getFirstElementDeclIndex,7
|
getFirstElementDeclIndex,0
|
||||||
getNextElementDeclIndex,7
|
getNextElementDeclIndex,0
|
||||||
getElementDeclIndex,55
|
getElementDeclIndex,0
|
||||||
getContentSpecType,37
|
getContentSpecType,0
|
||||||
getElementDecl,54
|
getElementDecl,0
|
||||||
getElementDeclName,8
|
getElementDeclName,0
|
||||||
getFirstAttributeDeclIndex,20
|
getFirstAttributeDeclIndex,0
|
||||||
getNextAttributeDeclIndex,57
|
getNextAttributeDeclIndex,0
|
||||||
getAttributeDecl,28
|
getAttributeDecl,0
|
||||||
isCDATAAttribute,47
|
isCDATAAttribute,0
|
||||||
getEntityDeclIndex,5
|
getEntityDeclIndex,0
|
||||||
getEntityDecl,1
|
getEntityDecl,0
|
||||||
getNotationDeclIndex,5
|
getNotationDeclIndex,0
|
||||||
getNotationDecl,10
|
getNotationDecl,0
|
||||||
getContentSpec,9
|
getContentSpec,0
|
||||||
getContentSpecIndex,19
|
getContentSpecIndex,0
|
||||||
getContentSpecAsString,43
|
getContentSpecAsString,0
|
||||||
printElements,50
|
printElements,0
|
||||||
printAttributes,49
|
printAttributes,0
|
||||||
addContentSpecToElement,29
|
addContentSpecToElement,0
|
||||||
getElementContentModelValidator,25
|
getElementContentModelValidator,0
|
||||||
createElementDecl,36
|
createElementDecl,0
|
||||||
setElementDecl,17
|
setElementDecl,0
|
||||||
putElementNameMapping,5
|
putElementNameMapping,0
|
||||||
setFirstAttributeDeclIndex,20
|
setFirstAttributeDeclIndex,0
|
||||||
setContentSpecIndex,19
|
setContentSpecIndex,0
|
||||||
createAttributeDecl,18
|
createAttributeDecl,0
|
||||||
setAttributeDecl,48
|
setAttributeDecl,0
|
||||||
createContentSpec,21
|
createContentSpec,0
|
||||||
setContentSpec,9
|
setContentSpec,0
|
||||||
createEntityDecl,16
|
createEntityDecl,0
|
||||||
setEntityDecl,1
|
setEntityDecl,0
|
||||||
createNotationDecl,56
|
createNotationDecl,0
|
||||||
setNotationDecl,10
|
setNotationDecl,0
|
||||||
addContentSpecNode,4
|
addContentSpecNode,0
|
||||||
addUniqueLeafNode,4
|
addUniqueLeafNode,0
|
||||||
initializeContentModelStack,2
|
initializeContentModelStack,0
|
||||||
isImmutable,24
|
isImmutable,0
|
||||||
appendContentSpec,45
|
appendContentSpec,0
|
||||||
printAttribute,26
|
printAttribute,0
|
||||||
createChildModel,3
|
createChildModel,0
|
||||||
buildSyntaxTree,3
|
buildSyntaxTree,0
|
||||||
contentSpecTree,46
|
contentSpecTree,0
|
||||||
ensureElementDeclCapacity,27
|
ensureElementDeclCapacity,0
|
||||||
ensureAttributeDeclCapacity,30
|
ensureAttributeDeclCapacity,0
|
||||||
ensureEntityDeclCapacity,14
|
ensureEntityDeclCapacity,0
|
||||||
ensureNotationDeclCapacity,13
|
ensureNotationDeclCapacity,0
|
||||||
ensureContentSpecCapacity,12
|
ensureContentSpecCapacity,0
|
||||||
resize,5
|
resize,0
|
||||||
isEntityDeclared,22
|
isEntityDeclared,0
|
||||||
isEntityUnparsed,15
|
isEntityUnparsed,0
|
||||||
|
|
|
|
@ -1,92 +1,92 @@
|
||||||
,cluster
|
,cluster
|
||||||
getGrammarDescription,1
|
getGrammarDescription,6
|
||||||
getElementDeclIsExternal,0
|
getElementDeclIsExternal,50
|
||||||
getAttributeDeclIsExternal,0
|
getAttributeDeclIsExternal,43
|
||||||
getAttributeDeclIndex,1
|
getAttributeDeclIndex,36
|
||||||
startDTD,1
|
startDTD,13
|
||||||
startParameterEntity,1
|
startParameterEntity,23
|
||||||
startExternalSubset,1
|
startExternalSubset,54
|
||||||
endParameterEntity,1
|
endParameterEntity,23
|
||||||
endExternalSubset,1
|
endExternalSubset,54
|
||||||
elementDecl,0
|
elementDecl,5
|
||||||
attributeDecl,0
|
attributeDecl,4
|
||||||
internalEntityDecl,1
|
internalEntityDecl,17
|
||||||
externalEntityDecl,1
|
externalEntityDecl,17
|
||||||
unparsedEntityDecl,1
|
unparsedEntityDecl,17
|
||||||
notationDecl,1
|
notationDecl,40
|
||||||
endDTD,0
|
endDTD,29
|
||||||
setDTDSource,1
|
setDTDSource,53
|
||||||
getDTDSource,1
|
getDTDSource,53
|
||||||
textDecl,1
|
textDecl,6
|
||||||
comment,1
|
comment,6
|
||||||
processingInstruction,1
|
processingInstruction,6
|
||||||
startAttlist,1
|
startAttlist,6
|
||||||
endAttlist,1
|
endAttlist,6
|
||||||
startConditional,1
|
startConditional,6
|
||||||
ignoredCharacters,1
|
ignoredCharacters,6
|
||||||
endConditional,1
|
endConditional,6
|
||||||
setDTDContentModelSource,1
|
setDTDContentModelSource,51
|
||||||
getDTDContentModelSource,1
|
getDTDContentModelSource,51
|
||||||
startContentModel,1
|
startContentModel,28
|
||||||
startGroup,1
|
startGroup,39
|
||||||
pcdata,1
|
pcdata,52
|
||||||
element,1
|
element,2
|
||||||
separator,1
|
separator,49
|
||||||
occurrence,1
|
occurrence,24
|
||||||
endGroup,1
|
endGroup,27
|
||||||
any,1
|
any,6
|
||||||
empty,1
|
empty,6
|
||||||
endContentModel,1
|
endContentModel,6
|
||||||
isNamespaceAware,1
|
isNamespaceAware,6
|
||||||
getSymbolTable,1
|
getSymbolTable,56
|
||||||
getFirstElementDeclIndex,1
|
getFirstElementDeclIndex,47
|
||||||
getNextElementDeclIndex,1
|
getNextElementDeclIndex,47
|
||||||
getElementDeclIndex,1
|
getElementDeclIndex,57
|
||||||
getContentSpecType,0
|
getContentSpecType,38
|
||||||
getElementDecl,0
|
getElementDecl,0
|
||||||
getElementDeclName,0
|
getElementDeclName,29
|
||||||
getFirstAttributeDeclIndex,0
|
getFirstAttributeDeclIndex,3
|
||||||
getNextAttributeDeclIndex,0
|
getNextAttributeDeclIndex,46
|
||||||
getAttributeDecl,0
|
getAttributeDecl,25
|
||||||
isCDATAAttribute,1
|
isCDATAAttribute,44
|
||||||
getEntityDeclIndex,1
|
getEntityDeclIndex,6
|
||||||
getEntityDecl,0
|
getEntityDecl,8
|
||||||
getNotationDeclIndex,1
|
getNotationDeclIndex,6
|
||||||
getNotationDecl,0
|
getNotationDecl,10
|
||||||
getContentSpec,0
|
getContentSpec,41
|
||||||
getContentSpecIndex,0
|
getContentSpecIndex,12
|
||||||
getContentSpecAsString,0
|
getContentSpecAsString,37
|
||||||
printElements,1
|
printElements,55
|
||||||
printAttributes,1
|
printAttributes,35
|
||||||
addContentSpecToElement,1
|
addContentSpecToElement,20
|
||||||
getElementContentModelValidator,0
|
getElementContentModelValidator,21
|
||||||
createElementDecl,0
|
createElementDecl,33
|
||||||
setElementDecl,0
|
setElementDecl,16
|
||||||
putElementNameMapping,1
|
putElementNameMapping,6
|
||||||
setFirstAttributeDeclIndex,0
|
setFirstAttributeDeclIndex,3
|
||||||
setContentSpecIndex,0
|
setContentSpecIndex,12
|
||||||
createAttributeDecl,0
|
createAttributeDecl,19
|
||||||
setAttributeDecl,0
|
setAttributeDecl,7
|
||||||
createContentSpec,0
|
createContentSpec,41
|
||||||
setContentSpec,0
|
setContentSpec,9
|
||||||
createEntityDecl,0
|
createEntityDecl,31
|
||||||
setEntityDecl,0
|
setEntityDecl,8
|
||||||
createNotationDecl,1
|
createNotationDecl,32
|
||||||
setNotationDecl,0
|
setNotationDecl,10
|
||||||
addContentSpecNode,1
|
addContentSpecNode,18
|
||||||
addUniqueLeafNode,1
|
addUniqueLeafNode,18
|
||||||
initializeContentModelStack,1
|
initializeContentModelStack,13
|
||||||
isImmutable,1
|
isImmutable,6
|
||||||
appendContentSpec,1
|
appendContentSpec,42
|
||||||
printAttribute,1
|
printAttribute,1
|
||||||
createChildModel,1
|
createChildModel,11
|
||||||
buildSyntaxTree,1
|
buildSyntaxTree,11
|
||||||
contentSpecTree,1
|
contentSpecTree,30
|
||||||
ensureElementDeclCapacity,1
|
ensureElementDeclCapacity,15
|
||||||
ensureAttributeDeclCapacity,1
|
ensureAttributeDeclCapacity,14
|
||||||
ensureEntityDeclCapacity,1
|
ensureEntityDeclCapacity,22
|
||||||
ensureNotationDeclCapacity,1
|
ensureNotationDeclCapacity,26
|
||||||
ensureContentSpecCapacity,1
|
ensureContentSpecCapacity,34
|
||||||
resize,1
|
resize,6
|
||||||
isEntityDeclared,1
|
isEntityDeclared,48
|
||||||
isEntityUnparsed,0
|
isEntityUnparsed,45
|
||||||
|
|
|
|
@ -1,64 +1,64 @@
|
||||||
,k_means,hierarchical
|
,k_means,hierarchical
|
||||||
2,0.43549549160206547,0.22916634455195753
|
2,0.22916634455195753,0.43549549160206547
|
||||||
3,0.3737398924595095,0.2246280732293034
|
3,0.2246280732293034,0.3737398924595095
|
||||||
4,0.3557451009153901,0.22489420158108267
|
4,0.22489420158108267,0.3557451009153901
|
||||||
5,0.23295505680144496,0.23659327576115802
|
5,0.23659327576115802,0.23295505680144496
|
||||||
6,0.262133112331066,0.1944787865029721
|
6,0.1944787865029721,0.262133112331066
|
||||||
7,0.2578980101543562,0.14449036253228517
|
7,0.14449036253228517,0.2578980101543562
|
||||||
8,0.2549368125378225,0.14148366678653188
|
8,0.14148366678653188,0.2549368125378225
|
||||||
9,0.2774793093993747,0.13842552961645824
|
9,0.13842552961645824,0.2774793093993747
|
||||||
10,0.29633149188806335,0.17251507022640497
|
10,0.17251507022640497,0.29633149188806335
|
||||||
11,0.28457149559807815,0.20347568890084347
|
11,0.20347568890084347,0.28457149559807815
|
||||||
12,0.2774764884391462,0.23906895503283213
|
12,0.23906895503283213,0.2774764884391462
|
||||||
13,0.2807117319594596,0.2433263434151139
|
13,0.2433263434151139,0.2807117319594596
|
||||||
14,0.2756438988231549,0.2378679295617759
|
14,0.2378679295617759,0.2756438988231549
|
||||||
15,0.2725133030686268,0.23691994972126937
|
15,0.23691994972126937,0.2725133030686268
|
||||||
16,0.26609972785171476,0.23116431400607626
|
16,0.23116431400607626,0.26609972785171476
|
||||||
17,0.2622978716191777,0.250626112587838
|
17,0.250626112587838,0.2622978716191777
|
||||||
18,0.2599277555662332,0.25367962227891766
|
18,0.25367962227891766,0.2599277555662332
|
||||||
19,0.2627008352505403,0.27152241207311917
|
19,0.27152241207311917,0.2627008352505403
|
||||||
20,0.27904812684322156,0.2937526253744639
|
20,0.2937526253744639,0.27904812684322156
|
||||||
21,0.2862853638532431,0.29866907908096096
|
21,0.29866907908096096,0.2862853638532431
|
||||||
22,0.28363618305324206,0.2982900685039696
|
22,0.2982900685039696,0.28363618305324206
|
||||||
23,0.27298124922178313,0.29267556171442216
|
23,0.29267556171442216,0.27298124922178313
|
||||||
24,0.2755401967064185,0.30932258932020334
|
24,0.30932258932020334,0.2755401967064185
|
||||||
25,0.2699256899168711,0.30931433471981734
|
25,0.30931433471981734,0.2699256899168711
|
||||||
26,0.27327610109462835,0.309284891816073
|
26,0.309284891816073,0.27327610109462835
|
||||||
27,0.2741779110906256,0.30820922828647973
|
27,0.30820922828647973,0.2741779110906256
|
||||||
28,0.2772726745209296,0.306394576589556
|
28,0.306394576589556,0.2772726745209296
|
||||||
29,0.2763152122041744,0.31629054291989955
|
29,0.31629054291989955,0.2763152122041744
|
||||||
30,0.27822954116587556,0.31889378927031037
|
30,0.31889378927031037,0.27822954116587556
|
||||||
31,0.2765547788352012,0.31793632695355517
|
31,0.31793632695355517,0.2765547788352012
|
||||||
32,0.2873045247363621,0.3198803243841521
|
32,0.3198803243841521,0.2873045247363621
|
||||||
33,0.28417974562649284,0.3322984924566154
|
33,0.3322984924566154,0.28417974562649284
|
||||||
34,0.2685472504040367,0.33205224383012144
|
34,0.33205224383012144,0.2685472504040367
|
||||||
35,0.2640970877653046,0.32946433944653786
|
35,0.32946433944653786,0.2640970877653046
|
||||||
36,0.26594127941463497,0.34064178452545657
|
36,0.34064178452545657,0.26594127941463497
|
||||||
37,0.2671662834055061,0.34279546744648637
|
37,0.34279546744648637,0.2671662834055061
|
||||||
38,0.26972862144514015,0.3520414342812306
|
38,0.3520414342812306,0.26972862144514015
|
||||||
39,0.2745566131731437,0.35684038034252413
|
39,0.35684038034252413,0.2745566131731437
|
||||||
40,0.3085760240111521,0.34927826706954956
|
40,0.34927826706954956,0.3085760240111521
|
||||||
41,0.32756637032777863,0.3490443084779255
|
41,0.3490443084779255,0.32756637032777863
|
||||||
42,0.3310796986888577,0.34372287471805796
|
42,0.34372287471805796,0.3310796986888577
|
||||||
43,0.32889480000768656,0.32421890240508233
|
43,0.32421890240508233,0.32889480000768656
|
||||||
44,0.31610864049926274,0.27315698867962007
|
44,0.27315698867962007,0.31610864049926274
|
||||||
45,0.3140921194105564,0.27051011105427114
|
45,0.27051011105427114,0.3140921194105564
|
||||||
46,0.3088953240503273,0.29627121773250714
|
46,0.29627121773250714,0.3088953240503273
|
||||||
47,0.2693097731576138,0.32261382027270064
|
47,0.32261382027270064,0.2693097731576138
|
||||||
48,0.2809797636777669,0.3299248655060567
|
48,0.3299248655060567,0.2809797636777669
|
||||||
49,0.29384518410058824,0.3171387059976329
|
49,0.3171387059976329,0.29384518410058824
|
||||||
50,0.29793575895571417,0.3442080317722919
|
50,0.3442080317722919,0.29793575895571417
|
||||||
51,0.3025569827442159,0.3408776851426114
|
51,0.3408776851426114,0.3025569827442159
|
||||||
52,0.32032808958922193,0.3408776851426114
|
52,0.3408776851426114,0.32032808958922193
|
||||||
53,0.33852852210954587,0.33765907834246356
|
53,0.33765907834246356,0.33852852210954587
|
||||||
54,0.339541278009214,0.36565310355269914
|
54,0.36565310355269914,0.339541278009214
|
||||||
55,0.35774171052953796,0.3818328805784584
|
55,0.3818328805784584,0.35774171052953796
|
||||||
56,0.37594214304986195,0.3866470678901348
|
56,0.3866470678901348,0.37594214304986195
|
||||||
57,0.4080257854586148,0.3919955336887361
|
57,0.3919955336887361,0.4080257854586148
|
||||||
58,0.4046954388289342,0.3979724365432809
|
58,0.3979724365432809,0.4046954388289342
|
||||||
59,0.4046954388289342,0.38857621891133143
|
59,0.38857621891133143,0.4046954388289342
|
||||||
60,0.3931263574608019,0.3953492191827632
|
60,0.3953492191827632,0.3931263574608019
|
||||||
61,0.38155727609266954,0.38748610984623766
|
61,0.38748610984623766,0.38155727609266954
|
||||||
62,0.37132316722174985,0.39516141319506437
|
62,0.39516141319506437,0.37132316722174985
|
||||||
63,0.36810456042160206,0.3850224051641811
|
63,0.3850224051641811,0.36810456042160206
|
||||||
64,0.3565354790534698,0.3785851915638855
|
64,0.3785851915638855,0.3565354790534698
|
||||||
|
|
|
127
clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.csv
Normal file
127
clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.csv
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
,algorithm,k,min,mean,max
|
||||||
|
0,hierarchical,2,2,45.5,89
|
||||||
|
1,k_means,2,29,45.5,62
|
||||||
|
2,hierarchical,3,2,30.333333333333332,85
|
||||||
|
3,k_means,3,3,30.333333333333332,62
|
||||||
|
4,hierarchical,4,2,22.75,82
|
||||||
|
5,k_means,4,3,22.75,56
|
||||||
|
6,hierarchical,5,2,18.2,64
|
||||||
|
7,k_means,5,3,18.2,55
|
||||||
|
8,hierarchical,6,2,15.166666666666666,57
|
||||||
|
9,k_means,6,3,15.166666666666666,51
|
||||||
|
10,hierarchical,7,1,13.0,57
|
||||||
|
11,k_means,7,2,13.0,50
|
||||||
|
12,hierarchical,8,1,11.375,57
|
||||||
|
13,k_means,8,1,11.375,57
|
||||||
|
14,hierarchical,9,1,10.11111111111111,57
|
||||||
|
15,k_means,9,1,10.11111111111111,57
|
||||||
|
16,hierarchical,10,1,9.1,50
|
||||||
|
17,k_means,10,1,9.1,51
|
||||||
|
18,hierarchical,11,1,8.272727272727273,46
|
||||||
|
19,k_means,11,1,8.272727272727273,50
|
||||||
|
20,hierarchical,12,1,7.583333333333333,46
|
||||||
|
21,k_means,12,1,7.583333333333333,47
|
||||||
|
22,hierarchical,13,1,7.0,46
|
||||||
|
23,k_means,13,1,7.0,46
|
||||||
|
24,hierarchical,14,1,6.5,46
|
||||||
|
25,k_means,14,1,6.5,46
|
||||||
|
26,hierarchical,15,1,6.066666666666666,46
|
||||||
|
27,k_means,15,1,6.066666666666666,46
|
||||||
|
28,hierarchical,16,1,5.6875,46
|
||||||
|
29,k_means,16,1,5.6875,46
|
||||||
|
30,hierarchical,17,1,5.352941176470588,39
|
||||||
|
31,k_means,17,1,5.352941176470588,44
|
||||||
|
32,hierarchical,18,1,5.055555555555555,39
|
||||||
|
33,k_means,18,1,5.055555555555555,44
|
||||||
|
34,hierarchical,19,1,4.7894736842105265,34
|
||||||
|
35,k_means,19,1,4.7894736842105265,43
|
||||||
|
36,hierarchical,20,1,4.55,34
|
||||||
|
37,k_means,20,1,4.55,41
|
||||||
|
38,hierarchical,21,1,4.333333333333333,34
|
||||||
|
39,k_means,21,1,4.333333333333333,40
|
||||||
|
40,hierarchical,22,1,4.136363636363637,34
|
||||||
|
41,k_means,22,1,4.136363636363637,40
|
||||||
|
42,hierarchical,23,1,3.9565217391304346,31
|
||||||
|
43,k_means,23,1,3.9565217391304346,40
|
||||||
|
44,hierarchical,24,1,3.7916666666666665,31
|
||||||
|
45,k_means,24,1,3.7916666666666665,38
|
||||||
|
46,hierarchical,25,1,3.64,31
|
||||||
|
47,k_means,25,1,3.64,38
|
||||||
|
48,hierarchical,26,1,3.5,31
|
||||||
|
49,k_means,26,1,3.5,38
|
||||||
|
50,hierarchical,27,1,3.3703703703703702,31
|
||||||
|
51,k_means,27,1,3.3703703703703702,38
|
||||||
|
52,hierarchical,28,1,3.25,30
|
||||||
|
53,k_means,28,1,3.25,38
|
||||||
|
54,hierarchical,29,1,3.1379310344827585,30
|
||||||
|
55,k_means,29,1,3.1379310344827585,36
|
||||||
|
56,hierarchical,30,1,3.033333333333333,30
|
||||||
|
57,k_means,30,1,3.033333333333333,35
|
||||||
|
58,hierarchical,31,1,2.935483870967742,30
|
||||||
|
59,k_means,31,1,2.935483870967742,35
|
||||||
|
60,hierarchical,32,1,2.84375,30
|
||||||
|
61,k_means,32,1,2.84375,35
|
||||||
|
62,hierarchical,33,1,2.757575757575758,30
|
||||||
|
63,k_means,33,1,2.757575757575758,33
|
||||||
|
64,hierarchical,34,1,2.676470588235294,30
|
||||||
|
65,k_means,34,1,2.676470588235294,33
|
||||||
|
66,hierarchical,35,1,2.6,30
|
||||||
|
67,k_means,35,1,2.6,33
|
||||||
|
68,hierarchical,36,1,2.5277777777777777,30
|
||||||
|
69,k_means,36,1,2.5277777777777777,33
|
||||||
|
70,hierarchical,37,1,2.4594594594594597,30
|
||||||
|
71,k_means,37,1,2.4594594594594597,33
|
||||||
|
72,hierarchical,38,1,2.3947368421052633,30
|
||||||
|
73,k_means,38,1,2.3947368421052633,33
|
||||||
|
74,hierarchical,39,1,2.3333333333333335,29
|
||||||
|
75,k_means,39,1,2.3333333333333335,32
|
||||||
|
76,hierarchical,40,1,2.275,29
|
||||||
|
77,k_means,40,1,2.275,32
|
||||||
|
78,hierarchical,41,1,2.2195121951219514,29
|
||||||
|
79,k_means,41,1,2.2195121951219514,32
|
||||||
|
80,hierarchical,42,1,2.1666666666666665,29
|
||||||
|
81,k_means,42,1,2.1666666666666665,32
|
||||||
|
82,hierarchical,43,1,2.116279069767442,29
|
||||||
|
83,k_means,43,1,2.116279069767442,31
|
||||||
|
84,hierarchical,44,1,2.0681818181818183,29
|
||||||
|
85,k_means,44,1,2.0681818181818183,31
|
||||||
|
86,hierarchical,45,1,2.022222222222222,29
|
||||||
|
87,k_means,45,1,2.022222222222222,31
|
||||||
|
88,hierarchical,46,1,1.9782608695652173,29
|
||||||
|
89,k_means,46,1,1.9782608695652173,29
|
||||||
|
90,hierarchical,47,1,1.9361702127659575,28
|
||||||
|
91,k_means,47,1,1.9361702127659575,27
|
||||||
|
92,hierarchical,48,1,1.8958333333333333,17
|
||||||
|
93,k_means,48,1,1.8958333333333333,27
|
||||||
|
94,hierarchical,49,1,1.8571428571428572,17
|
||||||
|
95,k_means,49,1,1.8571428571428572,27
|
||||||
|
96,hierarchical,50,1,1.82,17
|
||||||
|
97,k_means,50,1,1.82,25
|
||||||
|
98,hierarchical,51,1,1.7843137254901962,17
|
||||||
|
99,k_means,51,1,1.7843137254901962,25
|
||||||
|
100,hierarchical,52,1,1.75,17
|
||||||
|
101,k_means,52,1,1.75,25
|
||||||
|
102,hierarchical,53,1,1.7169811320754718,17
|
||||||
|
103,k_means,53,1,1.7169811320754718,25
|
||||||
|
104,hierarchical,54,1,1.6851851851851851,17
|
||||||
|
105,k_means,54,1,1.6851851851851851,23
|
||||||
|
106,hierarchical,55,1,1.6545454545454545,17
|
||||||
|
107,k_means,55,1,1.6545454545454545,21
|
||||||
|
108,hierarchical,56,1,1.625,17
|
||||||
|
109,k_means,56,1,1.625,20
|
||||||
|
110,hierarchical,57,1,1.5964912280701755,17
|
||||||
|
111,k_means,57,1,1.5964912280701755,19
|
||||||
|
112,hierarchical,58,1,1.5689655172413792,17
|
||||||
|
113,k_means,58,1,1.5689655172413792,18
|
||||||
|
114,hierarchical,59,1,1.5423728813559323,17
|
||||||
|
115,k_means,59,1,1.5423728813559323,18
|
||||||
|
116,hierarchical,60,1,1.5166666666666666,17
|
||||||
|
117,k_means,60,1,1.5166666666666666,17
|
||||||
|
118,hierarchical,61,1,1.4918032786885247,17
|
||||||
|
119,k_means,61,1,1.4918032786885247,17
|
||||||
|
120,hierarchical,62,1,1.467741935483871,17
|
||||||
|
121,k_means,62,1,1.467741935483871,16
|
||||||
|
122,hierarchical,63,1,1.4444444444444444,17
|
||||||
|
123,k_means,63,1,1.4444444444444444,16
|
||||||
|
124,hierarchical,64,1,1.421875,17
|
||||||
|
125,k_means,64,1,1.421875,16
|
|
BIN
clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.png
Normal file
BIN
clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
|
@ -11,7 +11,7 @@ constructTrees,0
|
||||||
isExistingGrammar,0
|
isExistingGrammar,0
|
||||||
updateImportListFor,0
|
updateImportListFor,0
|
||||||
updateImportListWith,0
|
updateImportListWith,0
|
||||||
buildGlobalNameRegistries,0
|
buildGlobalNameRegistries,2
|
||||||
traverseSchemas,0
|
traverseSchemas,0
|
||||||
needReportTNSError,0
|
needReportTNSError,0
|
||||||
addGlobalAttributeDecl,0
|
addGlobalAttributeDecl,0
|
||||||
|
|
|
|
@ -36,7 +36,7 @@ getGrpOrAttrGrpRedefinedByRestriction,0
|
||||||
resolveKeyRefs,0
|
resolveKeyRefs,0
|
||||||
getIDRegistry,0
|
getIDRegistry,0
|
||||||
getIDRegistry_sub,0
|
getIDRegistry_sub,0
|
||||||
storeKeyRef,2
|
storeKeyRef,0
|
||||||
resolveSchema,0
|
resolveSchema,0
|
||||||
resolveSchemaSource,0
|
resolveSchemaSource,0
|
||||||
getSchemaDocument,0
|
getSchemaDocument,0
|
||||||
|
|
|
|
@ -1,64 +1,64 @@
|
||||||
,k_means,hierarchical
|
,k_means,hierarchical
|
||||||
2,0.598553678618089,0.598553678618089
|
2,0.598553678618089,0.598553678618089
|
||||||
3,0.5988635577957939,0.4340479638200015
|
3,0.4340479638200015,0.5988635577957939
|
||||||
4,0.5768968986366794,0.4446910360233003
|
4,0.4446910360233003,0.5768968986366794
|
||||||
5,0.5600582308059449,0.44970649031040394
|
5,0.44970649031040394,0.5600582308059449
|
||||||
6,0.4754961922118064,0.42291318953408236
|
6,0.42291318953408236,0.4754961922118064
|
||||||
7,0.4716129791423394,0.37302776265331616
|
7,0.37302776265331616,0.4716129791423394
|
||||||
8,0.4215235973451702,0.16585341129364783
|
8,0.16585341129364783,0.4215235973451702
|
||||||
9,0.4270412461769427,0.1805562766904707
|
9,0.1805562766904707,0.4270412461769427
|
||||||
10,0.42033460704259445,0.10021703881198853
|
10,0.10021703881198853,0.42033460704259445
|
||||||
11,0.3979172260202459,0.11227880527684016
|
11,0.11227880527684016,0.3979172260202459
|
||||||
12,0.3992377842624971,0.13834960978465374
|
12,0.13834960978465374,0.3992377842624971
|
||||||
13,0.36351812430049024,0.14637482631499601
|
13,0.14637482631499601,0.36351812430049024
|
||||||
14,0.36086605127470145,0.15339700393049752
|
14,0.15339700393049752,0.36086605127470145
|
||||||
15,0.27803229144747893,0.17519153912543511
|
15,0.17519153912543511,0.27803229144747893
|
||||||
16,0.24764306057751692,0.18163489682652323
|
16,0.18163489682652323,0.24764306057751692
|
||||||
17,0.2546247662068935,0.1920283064393974
|
17,0.1920283064393974,0.2546247662068935
|
||||||
18,0.27281600254442556,0.1968887014348958
|
18,0.1968887014348958,0.27281600254442556
|
||||||
19,0.2705186834360297,0.22320550740329767
|
19,0.22320550740329767,0.2705186834360297
|
||||||
20,0.29969231483298964,0.2278559856358303
|
20,0.2278559856358303,0.29969231483298964
|
||||||
21,0.31507175636228785,0.21631113790331308
|
21,0.21631113790331308,0.31507175636228785
|
||||||
22,0.3170839571491974,0.23240216910856668
|
22,0.23240216910856668,0.3170839571491974
|
||||||
23,0.3201062001500274,0.23590483919206368
|
23,0.23590483919206368,0.3201062001500274
|
||||||
24,0.24788100874579763,0.24413294581937137
|
24,0.24413294581937137,0.24788100874579763
|
||||||
25,0.24932439019964475,0.2514892577758059
|
25,0.2514892577758059,0.24932439019964475
|
||||||
26,0.25682838168308425,0.2573251636281981
|
26,0.2573251636281981,0.25682838168308425
|
||||||
27,0.25691108409617125,0.2629575908594159
|
27,0.2629575908594159,0.25691108409617125
|
||||||
28,0.2606141413445487,0.27452933746874875
|
28,0.27452933746874875,0.2606141413445487
|
||||||
29,0.2538973293819504,0.27138907448677696
|
29,0.27138907448677696,0.2538973293819504
|
||||||
30,0.26300677134410877,0.27608901099722993
|
30,0.27608901099722993,0.26300677134410877
|
||||||
31,0.25958787047274295,0.2734068655042204
|
31,0.2734068655042204,0.25958787047274295
|
||||||
32,0.2620577633391267,0.26668692055998694
|
32,0.26668692055998694,0.2620577633391267
|
||||||
33,0.2677665846189286,0.27190541883537933
|
33,0.27190541883537933,0.2677665846189286
|
||||||
34,0.2698493206362974,0.2737953942290021
|
34,0.2737953942290021,0.2698493206362974
|
||||||
35,0.26871523120875485,0.2792490491212266
|
35,0.2792490491212266,0.26871523120875485
|
||||||
36,0.2706224838853019,0.285255446778457
|
36,0.285255446778457,0.2706224838853019
|
||||||
37,0.27443698923839605,0.28791640737048424
|
37,0.28791640737048424,0.27443698923839605
|
||||||
38,0.27814176822064324,0.279340819685821
|
38,0.279340819685821,0.27814176822064324
|
||||||
39,0.27606460269195954,0.27183941054653343
|
39,0.27183941054653343,0.27606460269195954
|
||||||
40,0.27307694582354536,0.2763117563875985
|
40,0.2763117563875985,0.27307694582354536
|
||||||
41,0.27681213230462487,0.28090687018324295
|
41,0.28090687018324295,0.27681213230462487
|
||||||
42,0.2763401507651925,0.2823757748346625
|
42,0.2823757748346625,0.2763401507651925
|
||||||
43,0.2905756087008992,0.284106939165533
|
43,0.284106939165533,0.2905756087008992
|
||||||
44,0.2901305465431984,0.28892051024774673
|
44,0.28892051024774673,0.2901305465431984
|
||||||
45,0.2866682178814574,0.31067801214219776
|
45,0.31067801214219776,0.2866682178814574
|
||||||
46,0.2748147271365624,0.33271255209429573
|
46,0.33271255209429573,0.2748147271365624
|
||||||
47,0.2824570955002154,0.33809240352772785
|
47,0.33809240352772785,0.2824570955002154
|
||||||
48,0.28532199483886955,0.342466005901906
|
48,0.342466005901906,0.28532199483886955
|
||||||
49,0.3194897125271686,0.3472678571815208
|
49,0.3472678571815208,0.3194897125271686
|
||||||
50,0.32227149882332984,0.3494143489069156
|
50,0.3494143489069156,0.32227149882332984
|
||||||
51,0.32507315719109064,0.3553262507378467
|
51,0.3553262507378467,0.32507315719109064
|
||||||
52,0.3483927215781152,0.3577437943048381
|
52,0.3577437943048381,0.3483927215781152
|
||||||
53,0.35116863658039477,0.36265544445738723
|
53,0.36265544445738723,0.35116863658039477
|
||||||
54,0.35397195095412226,0.3652777392559547
|
54,0.3652777392559547,0.35397195095412226
|
||||||
55,0.35681293946359083,0.3635299110583668
|
55,0.3635299110583668,0.35681293946359083
|
||||||
56,0.35970774816697515,0.36657235082485046
|
56,0.36657235082485046,0.35970774816697515
|
||||||
57,0.36268328716123316,0.369801843033111
|
57,0.369801843033111,0.36268328716123316
|
||||||
58,0.365787994620889,0.373236109725014
|
58,0.373236109725014,0.365787994620889
|
||||||
59,0.36911874689499113,0.3768951711426859
|
59,0.3768951711426859,0.36911874689499113
|
||||||
60,0.3899012348681259,0.38080172755114144
|
60,0.38080172755114144,0.3899012348681259
|
||||||
61,0.3925538526988892,0.38498161884368615
|
61,0.38498161884368615,0.3925538526988892
|
||||||
62,0.3952064705296526,0.38946438071227807
|
62,0.38946438071227807,0.3952064705296526
|
||||||
63,0.40051170619117926,0.39428392137375445
|
63,0.39428392137375445,0.40051170619117926
|
||||||
64,0.4003435986724249,0.4003771346837245
|
64,0.4003771346837245,0.4003435986724249
|
||||||
|
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
,algorithm,k,min,mean,max
|
||||||
|
0,hierarchical,2,1,53.0,105
|
||||||
|
1,k_means,2,1,53.0,105
|
||||||
|
2,hierarchical,3,1,35.333333333333336,104
|
||||||
|
3,k_means,3,1,35.333333333333336,104
|
||||||
|
4,hierarchical,4,1,26.5,102
|
||||||
|
5,k_means,4,1,26.5,102
|
||||||
|
6,hierarchical,5,1,21.2,102
|
||||||
|
7,k_means,5,1,21.2,101
|
||||||
|
8,hierarchical,6,1,17.666666666666668,99
|
||||||
|
9,k_means,6,1,17.666666666666668,99
|
||||||
|
10,hierarchical,7,1,15.142857142857142,98
|
||||||
|
11,k_means,7,1,15.142857142857142,98
|
||||||
|
12,hierarchical,8,1,13.25,96
|
||||||
|
13,k_means,8,1,13.25,91
|
||||||
|
14,hierarchical,9,1,11.777777777777779,96
|
||||||
|
15,k_means,9,1,11.777777777777779,90
|
||||||
|
16,hierarchical,10,1,10.6,95
|
||||||
|
17,k_means,10,1,10.6,86
|
||||||
|
18,hierarchical,11,1,9.636363636363637,94
|
||||||
|
19,k_means,11,1,9.636363636363637,84
|
||||||
|
20,hierarchical,12,1,8.833333333333334,93
|
||||||
|
21,k_means,12,1,8.833333333333334,82
|
||||||
|
22,hierarchical,13,1,8.153846153846153,91
|
||||||
|
23,k_means,13,1,8.153846153846153,81
|
||||||
|
24,hierarchical,14,1,7.571428571428571,91
|
||||||
|
25,k_means,14,1,7.571428571428571,80
|
||||||
|
26,hierarchical,15,1,7.066666666666666,83
|
||||||
|
27,k_means,15,1,7.066666666666666,76
|
||||||
|
28,hierarchical,16,1,6.625,83
|
||||||
|
29,k_means,16,1,6.625,75
|
||||||
|
30,hierarchical,17,1,6.235294117647059,78
|
||||||
|
31,k_means,17,1,6.235294117647059,74
|
||||||
|
32,hierarchical,18,1,5.888888888888889,78
|
||||||
|
33,k_means,18,1,5.888888888888889,73
|
||||||
|
34,hierarchical,19,1,5.578947368421052,78
|
||||||
|
35,k_means,19,1,5.578947368421052,71
|
||||||
|
36,hierarchical,20,1,5.3,71
|
||||||
|
37,k_means,20,1,5.3,70
|
||||||
|
38,hierarchical,21,1,5.0476190476190474,68
|
||||||
|
39,k_means,21,1,5.0476190476190474,69
|
||||||
|
40,hierarchical,22,1,4.818181818181818,68
|
||||||
|
41,k_means,22,1,4.818181818181818,65
|
||||||
|
42,hierarchical,23,1,4.608695652173913,68
|
||||||
|
43,k_means,23,1,4.608695652173913,65
|
||||||
|
44,hierarchical,24,1,4.416666666666667,64
|
||||||
|
45,k_means,24,1,4.416666666666667,64
|
||||||
|
46,hierarchical,25,1,4.24,64
|
||||||
|
47,k_means,25,1,4.24,62
|
||||||
|
48,hierarchical,26,1,4.076923076923077,64
|
||||||
|
49,k_means,26,1,4.076923076923077,61
|
||||||
|
50,hierarchical,27,1,3.925925925925926,64
|
||||||
|
51,k_means,27,1,3.925925925925926,60
|
||||||
|
52,hierarchical,28,1,3.7857142857142856,63
|
||||||
|
53,k_means,28,1,3.7857142857142856,55
|
||||||
|
54,hierarchical,29,1,3.6551724137931036,63
|
||||||
|
55,k_means,29,1,3.6551724137931036,55
|
||||||
|
56,hierarchical,30,1,3.533333333333333,63
|
||||||
|
57,k_means,30,1,3.533333333333333,54
|
||||||
|
58,hierarchical,31,1,3.4193548387096775,63
|
||||||
|
59,k_means,31,1,3.4193548387096775,54
|
||||||
|
60,hierarchical,32,1,3.3125,52
|
||||||
|
61,k_means,32,1,3.3125,54
|
||||||
|
62,hierarchical,33,1,3.212121212121212,52
|
||||||
|
63,k_means,33,1,3.212121212121212,53
|
||||||
|
64,hierarchical,34,1,3.1176470588235294,52
|
||||||
|
65,k_means,34,1,3.1176470588235294,52
|
||||||
|
66,hierarchical,35,1,3.0285714285714285,52
|
||||||
|
67,k_means,35,1,3.0285714285714285,51
|
||||||
|
68,hierarchical,36,1,2.9444444444444446,52
|
||||||
|
69,k_means,36,1,2.9444444444444446,50
|
||||||
|
70,hierarchical,37,1,2.864864864864865,52
|
||||||
|
71,k_means,37,1,2.864864864864865,50
|
||||||
|
72,hierarchical,38,1,2.789473684210526,51
|
||||||
|
73,k_means,38,1,2.789473684210526,50
|
||||||
|
74,hierarchical,39,1,2.717948717948718,51
|
||||||
|
75,k_means,39,1,2.717948717948718,50
|
||||||
|
76,hierarchical,40,1,2.65,51
|
||||||
|
77,k_means,40,1,2.65,49
|
||||||
|
78,hierarchical,41,1,2.5853658536585367,51
|
||||||
|
79,k_means,41,1,2.5853658536585367,48
|
||||||
|
80,hierarchical,42,1,2.5238095238095237,51
|
||||||
|
81,k_means,42,1,2.5238095238095237,47
|
||||||
|
82,hierarchical,43,1,2.4651162790697674,47
|
||||||
|
83,k_means,43,1,2.4651162790697674,47
|
||||||
|
84,hierarchical,44,1,2.409090909090909,47
|
||||||
|
85,k_means,44,1,2.409090909090909,46
|
||||||
|
86,hierarchical,45,1,2.3555555555555556,47
|
||||||
|
87,k_means,45,1,2.3555555555555556,44
|
||||||
|
88,hierarchical,46,1,2.3043478260869565,46
|
||||||
|
89,k_means,46,1,2.3043478260869565,42
|
||||||
|
90,hierarchical,47,1,2.25531914893617,25
|
||||||
|
91,k_means,47,1,2.25531914893617,41
|
||||||
|
92,hierarchical,48,1,2.2083333333333335,25
|
||||||
|
93,k_means,48,1,2.2083333333333335,41
|
||||||
|
94,hierarchical,49,1,2.163265306122449,25
|
||||||
|
95,k_means,49,1,2.163265306122449,41
|
||||||
|
96,hierarchical,50,1,2.12,25
|
||||||
|
97,k_means,50,1,2.12,40
|
||||||
|
98,hierarchical,51,1,2.0784313725490198,25
|
||||||
|
99,k_means,51,1,2.0784313725490198,39
|
||||||
|
100,hierarchical,52,1,2.0384615384615383,25
|
||||||
|
101,k_means,52,1,2.0384615384615383,38
|
||||||
|
102,hierarchical,53,1,2.0,25
|
||||||
|
103,k_means,53,1,2.0,38
|
||||||
|
104,hierarchical,54,1,1.962962962962963,25
|
||||||
|
105,k_means,54,1,1.962962962962963,37
|
||||||
|
106,hierarchical,55,1,1.9272727272727272,25
|
||||||
|
107,k_means,55,1,1.9272727272727272,35
|
||||||
|
108,hierarchical,56,1,1.8928571428571428,25
|
||||||
|
109,k_means,56,1,1.8928571428571428,34
|
||||||
|
110,hierarchical,57,1,1.8596491228070176,25
|
||||||
|
111,k_means,57,1,1.8596491228070176,33
|
||||||
|
112,hierarchical,58,1,1.8275862068965518,25
|
||||||
|
113,k_means,58,1,1.8275862068965518,32
|
||||||
|
114,hierarchical,59,1,1.7966101694915255,25
|
||||||
|
115,k_means,59,1,1.7966101694915255,31
|
||||||
|
116,hierarchical,60,1,1.7666666666666666,25
|
||||||
|
117,k_means,60,1,1.7666666666666666,30
|
||||||
|
118,hierarchical,61,1,1.7377049180327868,25
|
||||||
|
119,k_means,61,1,1.7377049180327868,29
|
||||||
|
120,hierarchical,62,1,1.7096774193548387,25
|
||||||
|
121,k_means,62,1,1.7096774193548387,28
|
||||||
|
122,hierarchical,63,1,1.6825396825396826,25
|
||||||
|
123,k_means,63,1,1.6825396825396826,27
|
||||||
|
124,hierarchical,64,1,1.65625,25
|
||||||
|
125,k_means,64,1,1.65625,27
|
|
Binary file not shown.
After Width: | Height: | Size: 100 KiB |
|
@ -1,64 +1,64 @@
|
||||||
,k_means,hierarchical
|
,k_means,hierarchical
|
||||||
2,0.6855584100867681,0.6979818296524081
|
2,0.6979818296524081,0.6855584100867681
|
||||||
3,0.6658312390685782,0.5363440260613704
|
3,0.5363440260613704,0.6658312390685782
|
||||||
4,0.5447405755407478,0.5447405755407478
|
4,0.5447405755407478,0.5447405755407478
|
||||||
5,0.49469855877597974,0.4950944104608897
|
5,0.4950944104608897,0.49469855877597974
|
||||||
6,0.49629928069605667,0.3167619075077442
|
6,0.3167619075077442,0.49629928069605667
|
||||||
7,0.4937183177275972,0.3273304877495634
|
7,0.3273304877495634,0.4937183177275972
|
||||||
8,0.4903327662796836,0.16989336921679118
|
8,0.16989336921679118,0.4903327662796836
|
||||||
9,0.33791118198002373,0.17626101482318196
|
9,0.17626101482318196,0.33791118198002373
|
||||||
10,0.2667320598048964,0.19790344008120894
|
10,0.19790344008120894,0.2667320598048964
|
||||||
11,0.2629948976926796,0.1943827895435377
|
11,0.1943827895435377,0.2629948976926796
|
||||||
12,0.25965211932671445,0.20556562870341602
|
12,0.20556562870341602,0.25965211932671445
|
||||||
13,0.26164323937367595,0.20144552653966163
|
13,0.20144552653966163,0.26164323937367595
|
||||||
14,0.25806076142240403,0.22548403695669203
|
14,0.22548403695669203,0.25806076142240403
|
||||||
15,0.26997893998401756,0.22918194758667895
|
15,0.22918194758667895,0.26997893998401756
|
||||||
16,0.27256451459055664,0.2404290571765335
|
16,0.2404290571765335,0.27256451459055664
|
||||||
17,0.2608837912623233,0.2345015455494567
|
17,0.2345015455494567,0.2608837912623233
|
||||||
18,0.241790230179569,0.2390816398182416
|
18,0.2390816398182416,0.241790230179569
|
||||||
19,0.24484234464495422,0.24094820256010968
|
19,0.24094820256010968,0.24484234464495422
|
||||||
20,0.2361050155539465,0.2435369787081999
|
20,0.2435369787081999,0.2361050155539465
|
||||||
21,0.23692336175194548,0.262463283756636
|
21,0.262463283756636,0.23692336175194548
|
||||||
22,0.23946566771940794,0.2742864390420934
|
22,0.2742864390420934,0.23946566771940794
|
||||||
23,0.24594283942153175,0.2979619533428987
|
23,0.2979619533428987,0.24594283942153175
|
||||||
24,0.24734609860636583,0.29936015461670856
|
24,0.29936015461670856,0.24734609860636583
|
||||||
25,0.2372755932074588,0.30224440986202594
|
25,0.30224440986202594,0.2372755932074588
|
||||||
26,0.24082497341896647,0.30600924875137986
|
26,0.30600924875137986,0.24082497341896647
|
||||||
27,0.24547723657004195,0.3147862783718484
|
27,0.3147862783718484,0.24547723657004195
|
||||||
28,0.2503460498700128,0.31887407003386015
|
28,0.31887407003386015,0.2503460498700128
|
||||||
29,0.26565769288673047,0.3204052924706567
|
29,0.3204052924706567,0.26565769288673047
|
||||||
30,0.2951470761811464,0.3106572702067674
|
30,0.3106572702067674,0.2951470761811464
|
||||||
31,0.30158824153259317,0.31330774028648145
|
31,0.31330774028648145,0.30158824153259317
|
||||||
32,0.3180875184494547,0.33210454757827634
|
32,0.33210454757827634,0.3180875184494547
|
||||||
33,0.32604023717225655,0.340503634089749
|
33,0.340503634089749,0.32604023717225655
|
||||||
34,0.3162922149083673,0.33568440892081625
|
34,0.33568440892081625,0.3162922149083673
|
||||||
35,0.31716183472339093,0.3545992807283562
|
35,0.3545992807283562,0.31716183472339093
|
||||||
36,0.3214298482703343,0.3575403386841057
|
36,0.3575403386841057,0.3214298482703343
|
||||||
37,0.32681546873349715,0.36503026576341707
|
37,0.36503026576341707,0.32681546873349715
|
||||||
38,0.32732304963529885,0.3738835074795801
|
38,0.3738835074795801,0.32732304963529885
|
||||||
39,0.32990135488218114,0.3929681262996284
|
39,0.3929681262996284,0.32990135488218114
|
||||||
40,0.32432743478528314,0.38848432563159185
|
40,0.38848432563159185,0.32432743478528314
|
||||||
41,0.3198436341172465,0.39163178463382314
|
41,0.39163178463382314,0.3198436341172465
|
||||||
42,0.32643375049241685,0.3860578645369252
|
42,0.3860578645369252,0.32643375049241685
|
||||||
43,0.32203761977337186,0.4053389048253979
|
43,0.4053389048253979,0.32203761977337186
|
||||||
44,0.3346466456087704,0.4217820126417848
|
44,0.4217820126417848,0.3346466456087704
|
||||||
45,0.33223356867673165,0.4257244699851672
|
45,0.4257244699851672,0.33223356867673165
|
||||||
46,0.33057959050289126,0.42988772845220063
|
46,0.42988772845220063,0.33057959050289126
|
||||||
47,0.34588619420359423,0.4342738007458362
|
47,0.4342738007458362,0.34588619420359423
|
||||||
48,0.3328354412937589,0.4416623097378058
|
48,0.4416623097378058,0.3328354412937589
|
||||||
49,0.33565227636252953,0.4426712673092602
|
49,0.4426712673092602,0.33565227636252953
|
||||||
50,0.35908519690010676,0.4440749104193141
|
50,0.4440749104193141,0.35908519690010676
|
||||||
51,0.3619904717574287,0.4456199850626709
|
51,0.4456199850626709,0.3619904717574287
|
||||||
52,0.36212543071422687,0.45074758794403463
|
52,0.45074758794403463,0.36212543071422687
|
||||||
53,0.3651696447072414,0.472336181881003
|
53,0.472336181881003,0.3651696447072414
|
||||||
54,0.3982691624564969,0.47730382490643575
|
54,0.47730382490643575,0.3982691624564969
|
||||||
55,0.4013572084387477,0.454200815600248
|
55,0.454200815600248,0.4013572084387477
|
||||||
56,0.4246326548929088,0.4565666319605046
|
56,0.4565666319605046,0.4246326548929088
|
||||||
57,0.44773608146478383,0.4522750157266781
|
57,0.4522750157266781,0.44773608146478383
|
||||||
58,0.45157671443203573,0.4549088773042353
|
58,0.4549088773042353,0.45157671443203573
|
||||||
59,0.4563204517812888,0.46145616231522185
|
59,0.46145616231522185,0.4563204517812888
|
||||||
60,0.46300257860702615,0.4641242358826516
|
60,0.4641242358826516,0.46300257860702615
|
||||||
61,0.4967563269695634,0.47006659021417746
|
61,0.47006659021417746,0.4967563269695634
|
||||||
62,0.4994732063134373,0.47352022985333136
|
62,0.47352022985333136,0.4994732063134373
|
||||||
63,0.49518159007961093,0.4775136217473302
|
63,0.4775136217473302,0.49518159007961093
|
||||||
64,0.4872143751031807,0.4821848224907804
|
64,0.4821848224907804,0.4872143751031807
|
||||||
|
|
|
127
clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.csv
Normal file
127
clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.csv
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
,algorithm,k,min,mean,max
|
||||||
|
0,hierarchical,2,2,54.0,106
|
||||||
|
1,k_means,2,1,54.0,107
|
||||||
|
2,hierarchical,3,1,36.0,106
|
||||||
|
3,k_means,3,1,36.0,103
|
||||||
|
4,hierarchical,4,1,27.0,102
|
||||||
|
5,k_means,4,1,27.0,102
|
||||||
|
6,hierarchical,5,1,21.6,101
|
||||||
|
7,k_means,5,1,21.6,102
|
||||||
|
8,hierarchical,6,1,18.0,100
|
||||||
|
9,k_means,6,1,18.0,93
|
||||||
|
10,hierarchical,7,1,15.428571428571429,99
|
||||||
|
11,k_means,7,1,15.428571428571429,91
|
||||||
|
12,hierarchical,8,1,13.5,99
|
||||||
|
13,k_means,8,1,13.5,71
|
||||||
|
14,hierarchical,9,1,12.0,90
|
||||||
|
15,k_means,9,1,12.0,68
|
||||||
|
16,hierarchical,10,1,10.8,80
|
||||||
|
17,k_means,10,1,10.8,62
|
||||||
|
18,hierarchical,11,1,9.818181818181818,80
|
||||||
|
19,k_means,11,1,9.818181818181818,62
|
||||||
|
20,hierarchical,12,1,9.0,80
|
||||||
|
21,k_means,12,1,9.0,61
|
||||||
|
22,hierarchical,13,1,8.307692307692308,79
|
||||||
|
23,k_means,13,1,8.307692307692308,63
|
||||||
|
24,hierarchical,14,1,7.714285714285714,77
|
||||||
|
25,k_means,14,1,7.714285714285714,57
|
||||||
|
26,hierarchical,15,1,7.2,77
|
||||||
|
27,k_means,15,1,7.2,56
|
||||||
|
28,hierarchical,16,1,6.75,76
|
||||||
|
29,k_means,16,1,6.75,55
|
||||||
|
30,hierarchical,17,1,6.352941176470588,76
|
||||||
|
31,k_means,17,1,6.352941176470588,55
|
||||||
|
32,hierarchical,18,1,6.0,65
|
||||||
|
33,k_means,18,1,6.0,54
|
||||||
|
34,hierarchical,19,1,5.684210526315789,65
|
||||||
|
35,k_means,19,1,5.684210526315789,54
|
||||||
|
36,hierarchical,20,1,5.4,65
|
||||||
|
37,k_means,20,1,5.4,53
|
||||||
|
38,hierarchical,21,1,5.142857142857143,65
|
||||||
|
39,k_means,21,1,5.142857142857143,53
|
||||||
|
40,hierarchical,22,1,4.909090909090909,64
|
||||||
|
41,k_means,22,1,4.909090909090909,51
|
||||||
|
42,hierarchical,23,1,4.695652173913044,64
|
||||||
|
43,k_means,23,1,4.695652173913044,47
|
||||||
|
44,hierarchical,24,1,4.5,64
|
||||||
|
45,k_means,24,1,4.5,47
|
||||||
|
46,hierarchical,25,1,4.32,34
|
||||||
|
47,k_means,25,1,4.32,46
|
||||||
|
48,hierarchical,26,1,4.153846153846154,34
|
||||||
|
49,k_means,26,1,4.153846153846154,45
|
||||||
|
50,hierarchical,27,1,4.0,34
|
||||||
|
51,k_means,27,1,4.0,42
|
||||||
|
52,hierarchical,28,1,3.857142857142857,34
|
||||||
|
53,k_means,28,1,3.857142857142857,41
|
||||||
|
54,hierarchical,29,1,3.7241379310344827,34
|
||||||
|
55,k_means,29,1,3.7241379310344827,41
|
||||||
|
56,hierarchical,30,1,3.6,34
|
||||||
|
57,k_means,30,1,3.6,41
|
||||||
|
58,hierarchical,31,1,3.4838709677419355,34
|
||||||
|
59,k_means,31,1,3.4838709677419355,40
|
||||||
|
60,hierarchical,32,1,3.375,34
|
||||||
|
61,k_means,32,1,3.375,38
|
||||||
|
62,hierarchical,33,1,3.272727272727273,34
|
||||||
|
63,k_means,33,1,3.272727272727273,36
|
||||||
|
64,hierarchical,34,1,3.176470588235294,34
|
||||||
|
65,k_means,34,1,3.176470588235294,36
|
||||||
|
66,hierarchical,35,1,3.085714285714286,34
|
||||||
|
67,k_means,35,1,3.085714285714286,34
|
||||||
|
68,hierarchical,36,1,3.0,34
|
||||||
|
69,k_means,36,1,3.0,33
|
||||||
|
70,hierarchical,37,1,2.918918918918919,34
|
||||||
|
71,k_means,37,1,2.918918918918919,31
|
||||||
|
72,hierarchical,38,1,2.8421052631578947,34
|
||||||
|
73,k_means,38,1,2.8421052631578947,31
|
||||||
|
74,hierarchical,39,1,2.769230769230769,33
|
||||||
|
75,k_means,39,1,2.769230769230769,29
|
||||||
|
76,hierarchical,40,1,2.7,33
|
||||||
|
77,k_means,40,1,2.7,29
|
||||||
|
78,hierarchical,41,1,2.6341463414634148,33
|
||||||
|
79,k_means,41,1,2.6341463414634148,28
|
||||||
|
80,hierarchical,42,1,2.5714285714285716,33
|
||||||
|
81,k_means,42,1,2.5714285714285716,28
|
||||||
|
82,hierarchical,43,1,2.511627906976744,33
|
||||||
|
83,k_means,43,1,2.511627906976744,26
|
||||||
|
84,hierarchical,44,1,2.4545454545454546,33
|
||||||
|
85,k_means,44,1,2.4545454545454546,26
|
||||||
|
86,hierarchical,45,1,2.4,33
|
||||||
|
87,k_means,45,1,2.4,25
|
||||||
|
88,hierarchical,46,1,2.347826086956522,33
|
||||||
|
89,k_means,46,1,2.347826086956522,24
|
||||||
|
90,hierarchical,47,1,2.297872340425532,33
|
||||||
|
91,k_means,47,1,2.297872340425532,23
|
||||||
|
92,hierarchical,48,1,2.25,21
|
||||||
|
93,k_means,48,1,2.25,23
|
||||||
|
94,hierarchical,49,1,2.204081632653061,20
|
||||||
|
95,k_means,49,1,2.204081632653061,23
|
||||||
|
96,hierarchical,50,1,2.16,18
|
||||||
|
97,k_means,50,1,2.16,22
|
||||||
|
98,hierarchical,51,1,2.1176470588235294,17
|
||||||
|
99,k_means,51,1,2.1176470588235294,21
|
||||||
|
100,hierarchical,52,1,2.076923076923077,16
|
||||||
|
101,k_means,52,1,2.076923076923077,20
|
||||||
|
102,hierarchical,53,1,2.0377358490566038,16
|
||||||
|
103,k_means,53,1,2.0377358490566038,18
|
||||||
|
104,hierarchical,54,1,2.0,16
|
||||||
|
105,k_means,54,1,2.0,18
|
||||||
|
106,hierarchical,55,1,1.9636363636363636,16
|
||||||
|
107,k_means,55,1,1.9636363636363636,19
|
||||||
|
108,hierarchical,56,1,1.9285714285714286,16
|
||||||
|
109,k_means,56,1,1.9285714285714286,18
|
||||||
|
110,hierarchical,57,1,1.894736842105263,16
|
||||||
|
111,k_means,57,1,1.894736842105263,18
|
||||||
|
112,hierarchical,58,1,1.8620689655172413,16
|
||||||
|
113,k_means,58,1,1.8620689655172413,18
|
||||||
|
114,hierarchical,59,1,1.8305084745762712,16
|
||||||
|
115,k_means,59,1,1.8305084745762712,18
|
||||||
|
116,hierarchical,60,1,1.8,16
|
||||||
|
117,k_means,60,1,1.8,17
|
||||||
|
118,hierarchical,61,1,1.7704918032786885,16
|
||||||
|
119,k_means,61,1,1.7704918032786885,17
|
||||||
|
120,hierarchical,62,1,1.7419354838709677,16
|
||||||
|
121,k_means,62,1,1.7419354838709677,17
|
||||||
|
122,hierarchical,63,1,1.7142857142857142,16
|
||||||
|
123,k_means,63,1,1.7142857142857142,17
|
||||||
|
124,hierarchical,64,1,1.6875,16
|
||||||
|
125,k_means,64,1,1.6875,17
|
|
BIN
clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.png
Normal file
BIN
clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
|
@ -25,10 +25,16 @@ def intrapairs(path: str) -> set[set[str, str]]:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
filelist = glob.glob(IN_DIR + '/*_groundtruth.csv')
|
filelist = glob.glob(IN_DIR + '/*_groundtruth.csv')
|
||||||
|
df_table = pd.DataFrame(columns=pd.MultiIndex.from_tuples([
|
||||||
|
('KMeans', 'Precision'),
|
||||||
|
('KMeans', 'Recall'),
|
||||||
|
('Agglomerative', 'Precision'),
|
||||||
|
('Agglomerative', 'Recall')]))
|
||||||
|
df_table.index.name = 'Class Name'
|
||||||
|
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
clazz_name = os.path.basename(f)
|
clazz_name = os.path.basename(f)
|
||||||
clazz_name = clazz_name[:clazz_name.rfind('_groundtruth.csv')]
|
clazz_name = clazz_name[:clazz_name.rfind('_groundtruth.csv')]
|
||||||
print(clazz_name)
|
|
||||||
|
|
||||||
ground_pairs = intrapairs(f)
|
ground_pairs = intrapairs(f)
|
||||||
for method in ['kmeans', 'hierarchical']:
|
for method in ['kmeans', 'hierarchical']:
|
||||||
|
@ -39,10 +45,15 @@ def main():
|
||||||
precision = n_common / len(cluster_pairs)
|
precision = n_common / len(cluster_pairs)
|
||||||
recall = n_common / len(ground_pairs)
|
recall = n_common / len(ground_pairs)
|
||||||
|
|
||||||
print(method + " precision: " + str(precision))
|
algo = 'KMeans' if method == 'kmeans' else 'Agglomerative'
|
||||||
print(method + " recall: " + str(recall))
|
|
||||||
|
|
||||||
print()
|
df_table.loc[clazz_name, [(algo, 'Precision'), (algo, 'Recall')]] = [
|
||||||
|
str(round(precision * 100, 2)) + '%',
|
||||||
|
str(round(recall * 100, 2)) + '%'
|
||||||
|
]
|
||||||
|
|
||||||
|
df_table.columns = [x[0] + ' ' + x[1] for x in df_table.columns]
|
||||||
|
print(df_table.to_markdown())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
8
report/build.sh
Executable file
8
report/build.sh
Executable file
|
@ -0,0 +1,8 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
|
||||||
|
|
||||||
|
cd "$SCRIPT_DIR"
|
||||||
|
pandoc main.md -o main.pdf
|
255
report/main.md
255
report/main.md
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
author: Claudio Maggioni
|
author: Claudio Maggioni
|
||||||
title: Information Modelling & Analysis -- Project 1
|
title: Information Modelling & Analysis -- Project 1
|
||||||
geometry: margin=2.5cm,bottom=3cm
|
geometry: margin=2cm,bottom=3cm
|
||||||
---
|
---
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
|
@ -9,59 +9,62 @@ The following shows a minimal submission report for project 1. If you
|
||||||
choose to use this template, replace all template instructions (the
|
choose to use this template, replace all template instructions (the
|
||||||
yellow bits) with your own values. In addition, for any section, if
|
yellow bits) with your own values. In addition, for any section, if
|
||||||
**and only if** anything was unclear or warnings were raised by the
|
**and only if** anything was unclear or warnings were raised by the
|
||||||
code, and you had to take assumptions about the correct implementation
|
code, and you had to take assumptions about the correct implementation (e.g.,
|
||||||
(e.g., about details of a metric), describe your assumptions in one or
|
about details of a metric), describe your assumptions in one or two sentences.
|
||||||
two sentences.
|
|
||||||
|
|
||||||
You may - at your own risk - also choose not to use this template. As
|
You may - at your own risk - also choose not to use this template. As long as
|
||||||
long as your submission is a latex-generated, English PDF containing all
|
your submission is a latex-generated, English PDF containing all expected info,
|
||||||
expected info, you'll be fine.
|
you'll be fine. -->
|
||||||
-->
|
|
||||||
|
|
||||||
# Code Repository
|
# Code Repository
|
||||||
|
|
||||||
The code and result files part of this submission can be found at:
|
The code and result files part of this submission can be found at:
|
||||||
|
|
||||||
::: center
|
::: center Repository:
|
||||||
Repository: \url{https://github.com/infoMA2023/project-01-god-classes-maggicl}
|
\url{https://github.com/infoMA2023/project-01-god-classes-maggicl}
|
||||||
|
|
||||||
Commit ID: **TBD**
|
Commit ID: **TBD** :::
|
||||||
:::
|
|
||||||
|
|
||||||
# Data Pre-Processing
|
# Data Pre-Processing
|
||||||
|
|
||||||
## God Classes
|
## God Classes
|
||||||
|
|
||||||
The first part of the project requires to label some classes of the _Xerces_
|
The first part of the project requires to label some classes of the _Xerces_
|
||||||
project as "God classes" based on the number of methods each class has.
|
project as "God classes" based on the number of methods each class has. From
|
||||||
|
here onwards the Java package prefix `org.apache.xerces` is omitted when discussing
|
||||||
|
fully qualified domain names of classes for sake of brevity.
|
||||||
|
|
||||||
Specifically, I label "God classes" the classes that have a number of methods
|
Specifically, I label "God classes" the classes that have a number of methods
|
||||||
six times the standard deviation above the the mean number of methods, i.e. where
|
six times the standard deviation above the the mean number of methods, i.e.
|
||||||
the condition
|
where the condition
|
||||||
|
|
||||||
$$|M(C)| > \mu(M) + 6\sigma(M)$$
|
$$|M(C)| > \mu(M) + 6\sigma(M)$$
|
||||||
|
|
||||||
holds.
|
holds.
|
||||||
|
|
||||||
To scan and compute the number of methods of each class I use the Python library `javalang`, which implements the Java AST and parser. The Python script
|
To scan and compute the number of methods of each class I use the Python library
|
||||||
|
`javalang`, which implements the Java AST and parser. The Python script
|
||||||
`./find_god_classes.py` uses this library to parse each file in the project and
|
`./find_god_classes.py` uses this library to parse each file in the project and
|
||||||
compute the number of methods of each class. Note that only non-constructor methods are counted (specifically the code counts the number of `method` nodes in each `ClassDeclaration` node).
|
compute the number of methods of each class. Note that only non-constructor
|
||||||
|
methods are counted (specifically the code counts the number of `method` nodes
|
||||||
|
in each `ClassDeclaration` node).
|
||||||
|
|
||||||
Then, the script computes mean and standard
|
Then, the script computes mean and standard deviation of the number of methods
|
||||||
deviation of the number of methods and filters the list of classes according to the
|
and filters the list of classes according to the condition described above. The
|
||||||
condition described above. The file `god_classes/god_classes.csv` then is outputted
|
file `god_classes/god_classes.csv` then is outputted listing all the god classes
|
||||||
listing all the god classes found.
|
found.
|
||||||
|
|
||||||
The god classes I identified, and their corresponding number of methods
|
The god classes I identified, and their corresponding number of methods can be
|
||||||
can be found in Table [1](#tab:god_classes){reference-type="ref"
|
found in Table [1](#tab:god_classes){reference-type="ref"
|
||||||
reference="tab:god_classes"}.
|
reference="tab:god_classes"}.
|
||||||
|
|
||||||
::: {#tab:god_classes}
|
::: {#tab:god_classes}
|
||||||
| **Class Name** | **# Methods** |
|
| **Class Name** | **# Methods** |
|
||||||
|:------------------------------------------------|------------:|
|
|:------------------------------------------------|------------:|
|
||||||
| org.apache.xerces.impl.xs.traversers.XSDHandler | 118 |
|
| impl.xs.traversers.XSDHandler | 118 |
|
||||||
| org.apache.xerces.impl.dtd.DTDGrammar | 101 |
|
| impl.dtd.DTDGrammar | 101 |
|
||||||
| org.apache.xerces.xinclude.XIncludeHandler | 116 |
|
| xinclude.XIncludeHandler | 116 |
|
||||||
| org.apache.xerces.dom.CoreDocumentImpl | 125 |
|
| dom.CoreDocumentImpl | 125 |
|
||||||
|
|
||||||
: Identified God Classes
|
: Identified God Classes
|
||||||
:::
|
:::
|
||||||
|
@ -70,84 +73,198 @@ reference="tab:god_classes"}.
|
||||||
## Feature Vectors
|
## Feature Vectors
|
||||||
|
|
||||||
In this part of the project we produce the feature vectors used to later cluster
|
In this part of the project we produce the feature vectors used to later cluster
|
||||||
the methods of each God class into separate clusters. We produce one feature method per
|
the methods of each God class into separate clusters. We produce one feature
|
||||||
non-constructor Java method in each god class.
|
method per non-constructor Java method in each god class.
|
||||||
|
|
||||||
The columns of each vector represent
|
The columns of each vector represent fields and methods referenced by each
|
||||||
fields and methods referenced by each method, i.e. fields and methods actively used by the method in their method's body.
|
method, i.e. fields and methods actively used by the method in their method's
|
||||||
|
body.
|
||||||
|
|
||||||
When analyzing references to fields, additional constraints need to be specified to handle edge cases.
|
When analyzing references to fields, additional constraints need to be specified
|
||||||
Namely, a field's property may be referenced (e.g. an access to array `a` may fetch its `length` property, i.e. `a.length`). In this
|
to handle edge cases. Namely, a field's property may be referenced (e.g. an
|
||||||
cases I consider the qualifier (i.e. the field itself, `a`) itself and not its property. When the qualifier is a class (i.e.
|
access to array `a` may fetch its `length` property, i.e. `a.length`). In this
|
||||||
the code references a property of another class, e.g. `Integer.MAX_VALUE`) we consider the class name itself (i.e. `Integer`) and not
|
cases I consider the qualifier (i.e. the field itself, `a`) itself and not its
|
||||||
the name of the property. Should the qualifier be a subproperty itself (e.g. in `a.b.c`, where `a.b` would be the qualifier according to `javalang`)
|
property. When the qualifier is a class (i.e. the code references a property of
|
||||||
|
another class, e.g. `Integer.MAX_VALUE`) we consider the class name itself (i.e.
|
||||||
|
`Integer`) and not the name of the property. Should the qualifier be a
|
||||||
|
subproperty itself (e.g. in `a.b.c`, where `a.b` would be the qualifier
|
||||||
|
according to `javalang`)
|
||||||
|
|
||||||
For methods, I only consider calls to methods of the class itself where the qualifier is unspecified or `this`. Calls to parent methods
|
For methods, I only consider calls to methods of the class itself where the
|
||||||
(i.e. calls like `super.something()`) are not considered.
|
qualifier is unspecified or `this`. Calls to parent methods (i.e. calls like
|
||||||
|
`super.something()`) are not considered.
|
||||||
|
|
||||||
The feature vector extraction phase is performed by the Python script `extract_feature_vectors.py`. The script takes `god_classes/god_classes.csv` as input
|
The feature vector extraction phase is performed by the Python script
|
||||||
and loads the AST of each class listed in it. Then, a list of all the fields and methods in the class is built, and each method is scanned to see which fields
|
`extract_feature_vectors.py`. The script takes `god_classes/god_classes.csv` as
|
||||||
and methods it references in its body according to the previously described rules. Then, a CSV per class is built storing all feature vectors. Each file has a name matching to the FQDN (Fully-qualified domain name) of the class. Each CSV row refers to a method in the class, and each CSV column refers to a field, method or referenced class. A cell has the value of 1 when the method of that row references the field, method or class marked by that column, and it has the value 0 otherwise. Columns with only zeros are omitted.
|
input and loads the AST of each class listed in it. Then, a list of all the
|
||||||
|
fields and methods in the class is built, and each method is scanned to see
|
||||||
|
which fields and methods it references in its body according to the previously
|
||||||
|
described rules. Then, a CSV per class is built storing all feature vectors.
|
||||||
|
Each file has a name matching to the FQDN (Fully-qualified domain name) of the
|
||||||
|
class. Each CSV row refers to a method in the class, and each CSV column refers
|
||||||
|
to a field, method or referenced class. A cell has the value of 1 when the
|
||||||
|
method of that row references the field, method or class marked by that column,
|
||||||
|
and it has the value 0 otherwise. Columns with only zeros are omitted.
|
||||||
|
|
||||||
Table [2](#tab:feat_vec){reference-type="ref" reference="tab:feat_vec"}
|
Table [2](#tab:feat_vec){reference-type="ref" reference="tab:feat_vec"} shows
|
||||||
shows aggregate numbers regarding the extracted feature vectors for the
|
aggregate numbers regarding the extracted feature vectors for the god classes.
|
||||||
god classes. Note that the number of attributes refers to the number of fields, methods or classes actually references (i.e. the number of columns after omission of 0s).
|
Note that the number of attributes refers to the number of fields, methods or
|
||||||
|
classes actually references (i.e. the number of columns after omission of 0s).
|
||||||
|
|
||||||
::: {#tab:feat_vec}
|
::: {#tab:feat_vec}
|
||||||
| **Class Name** | **# Feature Vectors** | **# Attributes\*** |
|
| **Class Name** | **# Feature Vectors** | **# Attributes\*** |
|
||||||
|:------------------------------------------------|----------------------:|-----------------:|
|
|:------------------------------------------------|----------------------:|-----------------:|
|
||||||
| org.apache.xerces.impl.xs.traversers.XSDHandler | 106 | 183 |
|
| impl.xs.traversers.XSDHandler | 106 | 183 |
|
||||||
| org.apache.xerces.impl.dtd.DTDGrammar | 91 | 106 |
|
| impl.dtd.DTDGrammar | 91 | 106 |
|
||||||
| org.apache.xerces.xinclude.XIncludeHandler | 108 | 143 |
|
| xinclude.XIncludeHandler | 108 | 143 |
|
||||||
| org.apache.xerces.dom.CoreDocumentImpl | 117 | 63 |
|
| dom.CoreDocumentImpl | 117 | 63 |
|
||||||
|
|
||||||
: Feature vector summary (\*= used at least once)
|
: Feature vector summary (\*= used at least once)
|
||||||
:::
|
:::
|
||||||
|
|
||||||
# Clustering {#sec:clustering}
|
# Clustering {#sec:clustering}
|
||||||
|
|
||||||
|
In this section I covering the techniques to cluster the methods of each god
|
||||||
|
class. The project aims to use KMeans clustering and agglomerative hierarchical
|
||||||
|
clustering to group these methods toghether in cohesive units which could be
|
||||||
|
potentially refactored out of the god class they belong to.
|
||||||
|
|
||||||
## Algorithm Configurations
|
## Algorithm Configurations
|
||||||
|
|
||||||
Report/comment the algorithm configurations (distance function, linkage
|
To perform KMeans clustering, I use the `cluster.KMeans` Scikit-Learn
|
||||||
rule, etc.). You may do so in any form you feel suited, but a short
|
implementation of the algorithm. I use the default parameters: feature vectors
|
||||||
paragraph of text is probably sufficient.
|
are compared with euclidian distance, centroids are used instead of medioids,
|
||||||
|
and the initial centroids are computed with the greedy algorithm `kmeans++`. The
|
||||||
|
random seed is fixed to $0$ to allow for reproducibility between executions of
|
||||||
|
the clustering script.
|
||||||
|
|
||||||
|
To perform Hierarchical clustering, I use the `cluster.AgglomerativeClustering`
|
||||||
|
Scikit-Learn implementation of the algorithm. Again feature vectors are
|
||||||
|
compared with euclidian distance, but as a linkage metric I choose to use
|
||||||
|
complete linkage. As agglomerative clustering is deternministic, no random seed
|
||||||
|
is needed for this algorithm.
|
||||||
|
|
||||||
|
I run the two algorithms for all $k \in [2,65]$, or if less than 65 feature
|
||||||
|
vectors with distinct values are assigned to the god class, the upper bound of
|
||||||
|
$k$ is such value.
|
||||||
|
|
||||||
## Testing Various K & Silhouette Scores
|
## Testing Various K & Silhouette Scores
|
||||||
|
|
||||||
\(1\) Report data about the clusters produced by the two algorithms at
|
To find the optimal value of $k$ for both algorithms, the distribution of
|
||||||
various k (#clusters, size of clusters, silhouette scores). You may use
|
cluster sizes and silhouette across values of $k$, and to apply the optimal
|
||||||
any suitable format (table, graph, \...).
|
clustering for each god class I run the command:
|
||||||
|
|
||||||
\(2\) Briefly comment your results. What is the best configuration, and
|
```shell
|
||||||
why? Anything else you observed?
|
./silhouette.py --validate --autorun
|
||||||
|
```
|
||||||
|
|
||||||
|
Feature vectors are read from the `feature_vectors` directory and all the
|
||||||
|
results are stored in the `clustering` directory.
|
||||||
|
|
||||||
|
Figures [1](#fig:xsd){reference-type="ref" reference="fig:xsd"},
|
||||||
|
[2](#fig:dtd){reference-type="ref" reference="fig:dtd"},
|
||||||
|
[3](#fig:xinc){reference-type="ref" reference="fig:xinc"}, and
|
||||||
|
[4](#fig:cimpl){reference-type="ref" reference="fig:cimpl"} show the
|
||||||
|
distributions of cluster sizes for each god class obtained by running the KMeans
|
||||||
|
and agglomerative clustering algorithm as described in the previous sections.
|
||||||
|
|
||||||
|
For all god classes, the mean of number of elements in each cluster
|
||||||
|
exponentially decreases as $k$ increases. Aside the first values of $k$ for
|
||||||
|
class `DTDGrammar` (where it was 2), the minimum cluster size was 1 for all
|
||||||
|
analyzed clusterings. Conversely, the maximum cluster size varies a lot, almost
|
||||||
|
always being monotonically non increasing as $k$ increases, occasionally forming
|
||||||
|
wide plateaus. The silhouette metric distribution instead generally follows a
|
||||||
|
dogleg-like path, sharply decreasing for the first values of $k$ and slowly
|
||||||
|
increasing afterwards $k$. This leads the choice of the optimal $k$ number of
|
||||||
|
clusters for each algorithm to be between really low and really high values.
|
||||||
|
|
||||||
|
The figures also show the distribution of the silhouette metric per algorithm
|
||||||
|
and per value of $k$. The optimal values of $k$ and the respective silhouette
|
||||||
|
values for each implementation are reported in Table
|
||||||
|
[3](#tab:sumup){reference-type="ref" reference="tab:sumup"}.
|
||||||
|
|
||||||
|
From the values we can gather that agglomerative clustering performs overall
|
||||||
|
better than KMeans for the god classes in the project. Almost god classes are
|
||||||
|
optimally clustered with few clusters, with the exception of `CoreDocumentImpl`
|
||||||
|
being optimally clustered with unit clusters. This could indicate higher
|
||||||
|
cohesion between implementation details of the other classes, and lower cohesion
|
||||||
|
in `CoreDocumentImpl` (given the name it would not be surprising if this class
|
||||||
|
plays the role of an utility class of sort, combining lots of implementation
|
||||||
|
details affecting different areas of the code).
|
||||||
|
|
||||||
|
Agglomerative clustering with complete linkage could perform better than KMeans
|
||||||
|
due to a more urgent need for separation rather than cohesion in the classes
|
||||||
|
that were analyzed. Given the high dimensionality of the feature vectures used,
|
||||||
|
and the fact that eucledian distance is used to compare feature vectors, the
|
||||||
|
hyper-space of method features for each god class is likely sparse, with
|
||||||
|
occasional clusters of tightly-knit features. Given the prevailing sparsity,
|
||||||
|
complete linkage could be suitable here since it avoids to agglomerate distant
|
||||||
|
clusters above all.
|
||||||
|
|
||||||
|
![Clustering metrics for class impl.xs.traversers.XSDHandler](../clustering/org.apache.xerces.impl.xs.traversers.XSDHandler_stats.png){#fig:xsd}
|
||||||
|
|
||||||
|
![Clustering metrics for class impl.dtd.DTDGrammar](../clustering/org.apache.xerces.impl.dtd.DTDGrammar_stats.png){#fig:dtd}
|
||||||
|
|
||||||
|
![Clustering metrics for class xinclude.XIncludeHandler](../clustering/org.apache.xerces.xinclude.XIncludeHandler_stats.png){#fig:xinc}
|
||||||
|
|
||||||
|
![Clustering metrics for class dom.CoreDocumentImpl](../clustering/org.apache.xerces.dom.CoreDocumentImpl_stats.png){#fig:cimpl}
|
||||||
|
|
||||||
|
::: {#tab:sumup}
|
||||||
|
| **Class Name** | **KMeans K** | **KMeans silhouette** | **Hierarchical K** | **Hierarchical silhouette** |
|
||||||
|
|:------------- --------------|-----------:|--------------------:|-----------------:|--------------------------:|
|
||||||
|
| dom.CoreDocumentImpl | 45 |0.7290 | 45 | 0.7290 |
|
||||||
|
| impl.xs.traversers.XSDHandler | 2 |0.5986 | 3 | 0.5989 |
|
||||||
|
| impl.dtd.DTDGrammar | 58 |0.3980 | 2 | 0.4355 |
|
||||||
|
| xinclude.XIncludeHandler | 2 |0.6980 | 2 | 0.6856 |
|
||||||
|
|
||||||
|
: Optimal hyperparameters and corresponding silhouette metrics for KMeans and
|
||||||
|
Hierarchical clustering algorithm.
|
||||||
|
:::
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
|
|
||||||
## Ground Truth
|
## Ground Truth
|
||||||
|
|
||||||
I computed the ground truth using the command \.... The generated files
|
I computed the ground truth using the Python script `./ground_truth.py` The
|
||||||
are checked into the repository with the names \....
|
generated files are checked into the repository with the names
|
||||||
|
`clustering/{className}_groundtruth.csv` where `{className}` is the FQDN of each
|
||||||
|
god class.
|
||||||
|
|
||||||
Comment briefly on the strengths & weaknesses of our ground truth.
|
The ground truth in this project is not given but generated according to simple
|
||||||
|
heuristics. Since no inherent structure or labelling from experts exists to
|
||||||
|
group the methods in each god class, the project requires to label methods based
|
||||||
|
on keyword matching whitin each method name. The list of keywords used can be
|
||||||
|
found in `keyword_list.txt`. This approach allows to have a ground truth at all
|
||||||
|
with little computational cost and labelling effort, but it assumes the method
|
||||||
|
name and the chosen keywords are indeed of enough significance to form a
|
||||||
|
meaningful clustering of methods that form refactorable cohesive units of
|
||||||
|
functionality.
|
||||||
|
|
||||||
## Precision and Recall
|
## Precision and Recall
|
||||||
|
|
||||||
::: {#tab:eval}
|
::: {#tab:eval}
|
||||||
---------------- ------------------- -------- ------------- --------
|
| **Class Name** | **KMeans Precision** | **KMeans Recall** | **Agglomerative Precision** | **Agglomerative Recall** |
|
||||||
**Class Name** **Agglomerative** **K-Means**
|
|:------------------------------------------------|-------------------:|----------------:|--------------------------:|-----------------------:|
|
||||||
Prec. Recall Prec. Recall
|
| xinclude.XIncludeHandler | 69.83% | 97.80% | 69.58% | 95.65% |
|
||||||
\... \... \... \... \...
|
| dom.CoreDocumentImpl | 64.80% | 28.26% | 68.11% | 29.70% |
|
||||||
---------------- ------------------- -------- ------------- --------
|
| impl.xs.traversers.XSDHandler | 36.17% | 97.24% | 36.45% | 96.11% |
|
||||||
|
| impl.dtd.DTDGrammar | 87.65% | 6.87% | 52.21% | 94.28% |
|
||||||
|
|
||||||
: Evaluation Summary
|
: Evaluation Summary
|
||||||
:::
|
:::
|
||||||
|
|
||||||
Precision and Recall, for the optimal configurations found in Section
|
Precision and Recall, for the optimal configurations found in Section
|
||||||
[3](#sec:clustering){reference-type="ref" reference="sec:clustering"},
|
[3](#sec:clustering){reference-type="ref" reference="sec:clustering"}, are
|
||||||
are reported in Table [3](#tab:eval){reference-type="ref"
|
reported in Table [4](#tab:eval){reference-type="ref" reference="tab:eval"}.
|
||||||
reference="tab:eval"}.
|
|
||||||
|
\begin{center}
|
||||||
|
\color{red} comment precision and recall values
|
||||||
|
\end{center}
|
||||||
|
|
||||||
## Practical Usefulness
|
## Practical Usefulness
|
||||||
|
|
||||||
Discuss the practical usefulness of the obtained code refactoring
|
\begin{center}
|
||||||
assistant in a realistic setting (1 paragraph).
|
\color{red}Discuss the practical usefulness of the obtained code refactoring assistant in a
|
||||||
|
realistic setting (1 paragraph).
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
BIN
report/main.pdf
BIN
report/main.pdf
Binary file not shown.
|
@ -8,6 +8,9 @@ import pandas as pd
|
||||||
import argparse
|
import argparse
|
||||||
from k_means import cluster_kmeans
|
from k_means import cluster_kmeans
|
||||||
from hierarchical import cluster_hierarchical
|
from hierarchical import cluster_hierarchical
|
||||||
|
from collections import Counter
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
OUT_DIR: str = DIR + '/clustering'
|
OUT_DIR: str = DIR + '/clustering'
|
||||||
|
@ -20,47 +23,91 @@ def clean_output():
|
||||||
filelist = glob.glob(OUT_DIR + '/*_silhouette.csv')
|
filelist = glob.glob(OUT_DIR + '/*_silhouette.csv')
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
os.remove(f)
|
os.remove(f)
|
||||||
|
filelist = glob.glob(OUT_DIR + '/*.png')
|
||||||
|
for f in filelist:
|
||||||
|
os.remove(f)
|
||||||
|
|
||||||
|
|
||||||
def validate(path: str, clazz_name: str, autorun: bool):
|
def validate(path: str, clazz_name: str, autorun: bool, df_table):
|
||||||
df = pd.DataFrame(columns=['k_means', 'hierarchical'], dtype=float)
|
df = pd.DataFrame(columns=['k_means', 'hierarchical'], dtype=float)
|
||||||
|
df_stats = pd.DataFrame(columns=['algorithm', 'k', 'min', 'mean', 'max'])
|
||||||
|
|
||||||
|
def add_stat(algo: str, k: int, Y: any, i: int):
|
||||||
|
y_occurs = list(Counter(Y).values()) # count number of elements in each cluster
|
||||||
|
df_stats.loc[i, :] = [algo, k, np.min(y_occurs), np.mean(y_occurs), np.max(y_occurs)]
|
||||||
|
|
||||||
# We bound the number of clusters by the number of distinct points in our dataset.
|
# We bound the number of clusters by the number of distinct points in our dataset.
|
||||||
# To count them, we compute the number of "distinct" feature vectors and we
|
# To count them, we compute the number of "distinct" feature vectors and we
|
||||||
# bound to the minimum of K_MAX and this number.
|
# bound to the minimum of K_MAX and this number.
|
||||||
nodup = pd.read_csv(path, index_col=0).drop_duplicates()
|
nodup = pd.read_csv(path, index_col=0).drop_duplicates()
|
||||||
max_distinct = len(nodup)
|
max_distinct = len(nodup)
|
||||||
print("Max distinct:", max_distinct)
|
limit = min(K_MAX, max_distinct)
|
||||||
|
|
||||||
for n in range(2, min(K_MAX, max_distinct)):
|
i: int = 0
|
||||||
|
for n in range(2, limit):
|
||||||
X_h, Y_h = cluster_hierarchical(path, n, save_to_disk=False)
|
X_h, Y_h = cluster_hierarchical(path, n, save_to_disk=False)
|
||||||
df.loc[n, 'k_means'] = silhouette_score(X_h, Y_h)
|
add_stat('hierarchical', n, Y_h, i)
|
||||||
|
i += 1
|
||||||
|
df.loc[n, 'hierarchical'] = silhouette_score(X_h, Y_h)
|
||||||
|
|
||||||
X_k, Y_k = cluster_kmeans(path, n, save_to_disk=False)
|
X_k, Y_k = cluster_kmeans(path, n, save_to_disk=False)
|
||||||
df.loc[n, 'hierarchical'] = silhouette_score(X_k, Y_k)
|
add_stat('k_means', n, Y_k, i)
|
||||||
|
i += 1
|
||||||
|
df.loc[n, 'k_means'] = silhouette_score(X_k, Y_k)
|
||||||
|
|
||||||
k_kmeans = df[['k_means']].idxmax()[0]
|
k_kmeans = df[['k_means']].idxmax()[0]
|
||||||
k_hierarchical = df[['hierarchical']].idxmax()[0]
|
k_hierarchical = df[['hierarchical']].idxmax()[0]
|
||||||
|
|
||||||
print("K_means optimal value: " + str(k_kmeans))
|
df_table.loc[clazz_name] = [k_kmeans, 0, k_hierarchical, 0]
|
||||||
print("Hierarchical optimal value: " + str(k_hierarchical))
|
|
||||||
|
|
||||||
df.to_csv(OUT_DIR + '/' + clazz_name + '_silhouette.csv')
|
df.to_csv(OUT_DIR + '/' + clazz_name + '_silhouette.csv')
|
||||||
|
df_stats.to_csv(OUT_DIR + '/' + clazz_name + '_stats.csv')
|
||||||
|
|
||||||
if autorun:
|
if autorun:
|
||||||
cluster_hierarchical(path, k_hierarchical)
|
cluster_hierarchical(path, k_hierarchical)
|
||||||
cluster_kmeans(path, k_kmeans)
|
cluster_kmeans(path, k_kmeans)
|
||||||
|
|
||||||
|
# Plot stats
|
||||||
|
sns.set_theme(palette="hls")
|
||||||
|
|
||||||
|
# Initialize the matplotlib figure
|
||||||
|
f = plt.figure(figsize=(14, 12))
|
||||||
|
gs = f.add_gridspec(2, 2)
|
||||||
|
ax1 = f.add_subplot(gs[0, 0])
|
||||||
|
ax2 = f.add_subplot(gs[0, 1])
|
||||||
|
ax3 = f.add_subplot(gs[1, :])
|
||||||
|
|
||||||
|
df_k = df_stats.loc[df_stats.algorithm == 'k_means', ['k', 'min', 'mean', 'max']].set_index('k', drop=True)
|
||||||
|
df_h = df_stats.loc[df_stats.algorithm == 'hierarchical', ['k', 'min', 'mean', 'max']].set_index('k', drop=True)
|
||||||
|
|
||||||
|
sns.lineplot(data=df_k, palette="tab10", ax=ax1)
|
||||||
|
sns.lineplot(data=df_h, palette="tab10", ax=ax2)
|
||||||
|
sns.lineplot(data=df, palette="tab10", ax=ax3)
|
||||||
|
|
||||||
|
# Add a legend and informative axis label
|
||||||
|
ax1.set(ylabel="# of elements", ylim=[0, 130], xlabel="# of clusters", xlim=[2, limit])
|
||||||
|
ax1.set_title("K-Means cluster sizes")
|
||||||
|
ax2.set(ylabel="# of elements", ylim=[0, 130], xlabel="# of clusters", xlim=[2, limit])
|
||||||
|
ax2.set_title("Hierarchical cluster sizes")
|
||||||
|
ax3.set(ylabel="Silhouette", ylim=[0, 1], xlabel="# of clusters", xlim=[2, limit])
|
||||||
|
ax3.set_title("Silhouette metrics per # of clusters")
|
||||||
|
|
||||||
|
sns.despine(left=True, bottom=True)
|
||||||
|
f.savefig(OUT_DIR + '/' + clazz_name + '_stats.png')
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
def compute_silhouette(path: str, clazz_name: str, suffix: str):
|
def compute_silhouette(path: str, clazz_name: str, suffix: str) -> float:
|
||||||
df_y = pd.read_csv(OUT_DIR + '/' + clazz_name + '_' + suffix + '.csv')
|
df_y = pd.read_csv(OUT_DIR + '/' + clazz_name + '_' + suffix + '.csv')
|
||||||
Y = df_y.iloc[:, 1].values
|
Y = df_y.iloc[:, 1].values
|
||||||
|
|
||||||
df = pd.read_csv(path)
|
df = pd.read_csv(path)
|
||||||
X = df.drop(df.columns[0], axis=1).to_numpy()
|
X = df.drop(df.columns[0], axis=1).to_numpy()
|
||||||
|
|
||||||
print("Silhouette for " + suffix + ": " + str(silhouette_score(X, Y)))
|
s = round(silhouette_score(X, Y), 4)
|
||||||
|
|
||||||
|
print("Silhouette for " + suffix + ": " + str(s))
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -70,26 +117,30 @@ def main():
|
||||||
parser.add_argument('--autorun', action='store_true',
|
parser.add_argument('--autorun', action='store_true',
|
||||||
help='if validating, computes CSV for optimal clustering automatically')
|
help='if validating, computes CSV for optimal clustering automatically')
|
||||||
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.validate:
|
if args.validate:
|
||||||
clean_output()
|
clean_output()
|
||||||
|
|
||||||
|
df_table = pd.DataFrame(columns=['KMeans K', 'KMeans silhouette', 'Hierarchical K', 'Hierarchical silhouette'])
|
||||||
|
|
||||||
filelist = glob.glob(IN_DIR + '/*.csv')
|
filelist = glob.glob(IN_DIR + '/*.csv')
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
clazz_name = os.path.basename(f)
|
clazz_name = os.path.basename(f)
|
||||||
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
||||||
|
|
||||||
print(clazz_name)
|
if args.validate:
|
||||||
|
validate(f, clazz_name, args.autorun, df_table)
|
||||||
|
|
||||||
|
sk = compute_silhouette(f, clazz_name, 'kmeans')
|
||||||
|
sh = compute_silhouette(f, clazz_name, 'hierarchical')
|
||||||
|
|
||||||
if args.validate:
|
if args.validate:
|
||||||
validate(f, clazz_name, args.autorun)
|
df_table.loc[clazz_name, 'KMeans silhouette'] = sk
|
||||||
|
df_table.loc[clazz_name, 'Hierarchical silhouette'] = sh
|
||||||
|
|
||||||
compute_silhouette(f, clazz_name, 'kmeans')
|
df_table.index.name = 'Class Name'
|
||||||
compute_silhouette(f, clazz_name, 'hierarchical')
|
print(df_table.to_markdown())
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Reference in a new issue