done part 3 and part 4
This commit is contained in:
parent
38704ac213
commit
703b77a39d
24 changed files with 1667 additions and 3 deletions
|
@ -0,0 +1,118 @@
|
||||||
|
,cluster
|
||||||
|
getOwnerDocument,0
|
||||||
|
getNodeType,0
|
||||||
|
getNodeName,0
|
||||||
|
cloneNode,0
|
||||||
|
insertBefore,0
|
||||||
|
removeChild,0
|
||||||
|
replaceChild,0
|
||||||
|
getTextContent,10
|
||||||
|
setTextContent,10
|
||||||
|
getFeature,0
|
||||||
|
createAttribute,1
|
||||||
|
createCDATASection,1
|
||||||
|
createComment,1
|
||||||
|
createDocumentFragment,1
|
||||||
|
createElement,1
|
||||||
|
createEntityReference,1
|
||||||
|
createProcessingInstruction,1
|
||||||
|
createTextNode,1
|
||||||
|
getDoctype,0
|
||||||
|
getDocumentElement,0
|
||||||
|
getElementsByTagName,0
|
||||||
|
getImplementation,0
|
||||||
|
setErrorChecking,9
|
||||||
|
setStrictErrorChecking,9
|
||||||
|
getErrorChecking,9
|
||||||
|
getStrictErrorChecking,9
|
||||||
|
getInputEncoding,6
|
||||||
|
setInputEncoding,6
|
||||||
|
setXmlEncoding,6
|
||||||
|
setEncoding,6
|
||||||
|
getXmlEncoding,6
|
||||||
|
getEncoding,6
|
||||||
|
setXmlVersion,0
|
||||||
|
setVersion,0
|
||||||
|
getXmlVersion,0
|
||||||
|
getVersion,0
|
||||||
|
setXmlStandalone,5
|
||||||
|
setStandalone,5
|
||||||
|
getXmlStandalone,5
|
||||||
|
getStandalone,5
|
||||||
|
getDocumentURI,4
|
||||||
|
canRenameElements,0
|
||||||
|
renameNode,0
|
||||||
|
replaceRenameElement,0
|
||||||
|
normalizeDocument,0
|
||||||
|
getDomConfig,0
|
||||||
|
getBaseURI,4
|
||||||
|
setDocumentURI,4
|
||||||
|
getAsync,0
|
||||||
|
setAsync,0
|
||||||
|
abort,0
|
||||||
|
load,0
|
||||||
|
loadXML,0
|
||||||
|
saveXML,0
|
||||||
|
setMutationEvents,0
|
||||||
|
getMutationEvents,0
|
||||||
|
createDocumentType,1
|
||||||
|
createEntity,1
|
||||||
|
createNotation,1
|
||||||
|
createElementDefinition,1
|
||||||
|
getNodeNumber,0
|
||||||
|
importNode,0
|
||||||
|
adoptNode,0
|
||||||
|
undeferChildren,0
|
||||||
|
getElementById,0
|
||||||
|
clearIdentifiers,7
|
||||||
|
putIdentifier,7
|
||||||
|
getIdentifier,7
|
||||||
|
removeIdentifier,7
|
||||||
|
getIdentifiers,7
|
||||||
|
createElementNS,1
|
||||||
|
createAttributeNS,1
|
||||||
|
getElementsByTagNameNS,0
|
||||||
|
clone,0
|
||||||
|
isXMLName,0
|
||||||
|
isValidQName,0
|
||||||
|
isKidOK,0
|
||||||
|
changed,0
|
||||||
|
changes,0
|
||||||
|
getNodeListCache,3
|
||||||
|
freeNodeListCache,3
|
||||||
|
setUserData,8
|
||||||
|
getUserData,8
|
||||||
|
getUserDataRecord,8
|
||||||
|
removeUserDataTable,8
|
||||||
|
setUserDataTable,8
|
||||||
|
callUserDataHandlers,8
|
||||||
|
checkNamespaceWF,0
|
||||||
|
checkDOMNSErr,0
|
||||||
|
checkQName,0
|
||||||
|
isXML11Version,0
|
||||||
|
isNormalizeDocRequired,0
|
||||||
|
isXMLVersionChanged,0
|
||||||
|
addEventListener,0
|
||||||
|
removeEventListener,0
|
||||||
|
copyEventListeners,0
|
||||||
|
dispatchEvent,0
|
||||||
|
replacedText,0
|
||||||
|
deletedText,0
|
||||||
|
insertedText,0
|
||||||
|
modifyingCharacterData,0
|
||||||
|
modifiedCharacterData,0
|
||||||
|
insertingNode,0
|
||||||
|
insertedNode,0
|
||||||
|
removingNode,0
|
||||||
|
removedNode,0
|
||||||
|
replacingNode,0
|
||||||
|
replacedNode,0
|
||||||
|
replacingData,0
|
||||||
|
replacedCharacterData,0
|
||||||
|
modifiedAttrValue,0
|
||||||
|
setAttrNode,0
|
||||||
|
removedAttrNode,0
|
||||||
|
renamedAttrNode,0
|
||||||
|
renamedElement,0
|
||||||
|
readObject,2
|
||||||
|
writeObject,2
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
,cluster
|
||||||
|
getOwnerDocument,0
|
||||||
|
getNodeType,3
|
||||||
|
getNodeName,0
|
||||||
|
cloneNode,0
|
||||||
|
insertBefore,7
|
||||||
|
removeChild,3
|
||||||
|
replaceChild,7
|
||||||
|
getTextContent,0
|
||||||
|
setTextContent,0
|
||||||
|
getFeature,0
|
||||||
|
createAttribute,2
|
||||||
|
createCDATASection,0
|
||||||
|
createComment,0
|
||||||
|
createDocumentFragment,0
|
||||||
|
createElement,2
|
||||||
|
createEntityReference,2
|
||||||
|
createProcessingInstruction,2
|
||||||
|
createTextNode,0
|
||||||
|
getDoctype,0
|
||||||
|
getDocumentElement,0
|
||||||
|
getElementsByTagName,0
|
||||||
|
getImplementation,0
|
||||||
|
setErrorChecking,0
|
||||||
|
setStrictErrorChecking,0
|
||||||
|
getErrorChecking,0
|
||||||
|
getStrictErrorChecking,0
|
||||||
|
getInputEncoding,0
|
||||||
|
setInputEncoding,0
|
||||||
|
setXmlEncoding,0
|
||||||
|
setEncoding,0
|
||||||
|
getXmlEncoding,0
|
||||||
|
getEncoding,0
|
||||||
|
setXmlVersion,2
|
||||||
|
setVersion,0
|
||||||
|
getXmlVersion,0
|
||||||
|
getVersion,0
|
||||||
|
setXmlStandalone,0
|
||||||
|
setStandalone,0
|
||||||
|
getXmlStandalone,0
|
||||||
|
getStandalone,0
|
||||||
|
getDocumentURI,0
|
||||||
|
canRenameElements,0
|
||||||
|
renameNode,1
|
||||||
|
replaceRenameElement,6
|
||||||
|
normalizeDocument,0
|
||||||
|
getDomConfig,0
|
||||||
|
getBaseURI,0
|
||||||
|
setDocumentURI,0
|
||||||
|
getAsync,0
|
||||||
|
setAsync,2
|
||||||
|
abort,0
|
||||||
|
load,0
|
||||||
|
loadXML,0
|
||||||
|
saveXML,2
|
||||||
|
setMutationEvents,0
|
||||||
|
getMutationEvents,0
|
||||||
|
createDocumentType,0
|
||||||
|
createEntity,2
|
||||||
|
createNotation,2
|
||||||
|
createElementDefinition,2
|
||||||
|
getNodeNumber,0
|
||||||
|
importNode,8
|
||||||
|
adoptNode,1
|
||||||
|
undeferChildren,0
|
||||||
|
getElementById,0
|
||||||
|
clearIdentifiers,0
|
||||||
|
putIdentifier,0
|
||||||
|
getIdentifier,0
|
||||||
|
removeIdentifier,0
|
||||||
|
getIdentifiers,0
|
||||||
|
createElementNS,0
|
||||||
|
createAttributeNS,0
|
||||||
|
getElementsByTagNameNS,0
|
||||||
|
clone,0
|
||||||
|
isXMLName,0
|
||||||
|
isValidQName,0
|
||||||
|
isKidOK,3
|
||||||
|
changed,0
|
||||||
|
changes,0
|
||||||
|
getNodeListCache,4
|
||||||
|
freeNodeListCache,0
|
||||||
|
setUserData,0
|
||||||
|
getUserData,0
|
||||||
|
getUserDataRecord,0
|
||||||
|
removeUserDataTable,0
|
||||||
|
setUserDataTable,0
|
||||||
|
callUserDataHandlers,0
|
||||||
|
checkNamespaceWF,2
|
||||||
|
checkDOMNSErr,5
|
||||||
|
checkQName,2
|
||||||
|
isXML11Version,0
|
||||||
|
isNormalizeDocRequired,0
|
||||||
|
isXMLVersionChanged,0
|
||||||
|
addEventListener,0
|
||||||
|
removeEventListener,0
|
||||||
|
copyEventListeners,0
|
||||||
|
dispatchEvent,0
|
||||||
|
replacedText,0
|
||||||
|
deletedText,0
|
||||||
|
insertedText,0
|
||||||
|
modifyingCharacterData,0
|
||||||
|
modifiedCharacterData,0
|
||||||
|
insertingNode,0
|
||||||
|
insertedNode,0
|
||||||
|
removingNode,0
|
||||||
|
removedNode,0
|
||||||
|
replacingNode,0
|
||||||
|
replacedNode,0
|
||||||
|
replacingData,0
|
||||||
|
replacedCharacterData,0
|
||||||
|
modifiedAttrValue,0
|
||||||
|
setAttrNode,0
|
||||||
|
removedAttrNode,0
|
||||||
|
renamedAttrNode,0
|
||||||
|
renamedElement,0
|
||||||
|
readObject,0
|
||||||
|
writeObject,0
|
|
118
clustering/org.apache.xerces.dom.CoreDocumentImpl_kmeans.csv
Normal file
118
clustering/org.apache.xerces.dom.CoreDocumentImpl_kmeans.csv
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
,cluster
|
||||||
|
getOwnerDocument,0
|
||||||
|
getNodeType,3
|
||||||
|
getNodeName,0
|
||||||
|
cloneNode,8
|
||||||
|
insertBefore,2
|
||||||
|
removeChild,3
|
||||||
|
replaceChild,2
|
||||||
|
getTextContent,0
|
||||||
|
setTextContent,0
|
||||||
|
getFeature,0
|
||||||
|
createAttribute,1
|
||||||
|
createCDATASection,0
|
||||||
|
createComment,0
|
||||||
|
createDocumentFragment,0
|
||||||
|
createElement,1
|
||||||
|
createEntityReference,1
|
||||||
|
createProcessingInstruction,1
|
||||||
|
createTextNode,0
|
||||||
|
getDoctype,0
|
||||||
|
getDocumentElement,0
|
||||||
|
getElementsByTagName,0
|
||||||
|
getImplementation,0
|
||||||
|
setErrorChecking,0
|
||||||
|
setStrictErrorChecking,0
|
||||||
|
getErrorChecking,0
|
||||||
|
getStrictErrorChecking,0
|
||||||
|
getInputEncoding,0
|
||||||
|
setInputEncoding,0
|
||||||
|
setXmlEncoding,0
|
||||||
|
setEncoding,0
|
||||||
|
getXmlEncoding,0
|
||||||
|
getEncoding,0
|
||||||
|
setXmlVersion,1
|
||||||
|
setVersion,0
|
||||||
|
getXmlVersion,0
|
||||||
|
getVersion,0
|
||||||
|
setXmlStandalone,0
|
||||||
|
setStandalone,0
|
||||||
|
getXmlStandalone,0
|
||||||
|
getStandalone,0
|
||||||
|
getDocumentURI,0
|
||||||
|
canRenameElements,0
|
||||||
|
renameNode,4
|
||||||
|
replaceRenameElement,5
|
||||||
|
normalizeDocument,0
|
||||||
|
getDomConfig,0
|
||||||
|
getBaseURI,0
|
||||||
|
setDocumentURI,0
|
||||||
|
getAsync,0
|
||||||
|
setAsync,1
|
||||||
|
abort,0
|
||||||
|
load,0
|
||||||
|
loadXML,0
|
||||||
|
saveXML,1
|
||||||
|
setMutationEvents,0
|
||||||
|
getMutationEvents,0
|
||||||
|
createDocumentType,0
|
||||||
|
createEntity,1
|
||||||
|
createNotation,1
|
||||||
|
createElementDefinition,1
|
||||||
|
getNodeNumber,0
|
||||||
|
importNode,6
|
||||||
|
adoptNode,4
|
||||||
|
undeferChildren,0
|
||||||
|
getElementById,0
|
||||||
|
clearIdentifiers,0
|
||||||
|
putIdentifier,0
|
||||||
|
getIdentifier,0
|
||||||
|
removeIdentifier,0
|
||||||
|
getIdentifiers,0
|
||||||
|
createElementNS,0
|
||||||
|
createAttributeNS,0
|
||||||
|
getElementsByTagNameNS,0
|
||||||
|
clone,0
|
||||||
|
isXMLName,0
|
||||||
|
isValidQName,0
|
||||||
|
isKidOK,3
|
||||||
|
changed,0
|
||||||
|
changes,0
|
||||||
|
getNodeListCache,0
|
||||||
|
freeNodeListCache,0
|
||||||
|
setUserData,0
|
||||||
|
getUserData,0
|
||||||
|
getUserDataRecord,0
|
||||||
|
removeUserDataTable,0
|
||||||
|
setUserDataTable,0
|
||||||
|
callUserDataHandlers,0
|
||||||
|
checkNamespaceWF,1
|
||||||
|
checkDOMNSErr,7
|
||||||
|
checkQName,1
|
||||||
|
isXML11Version,0
|
||||||
|
isNormalizeDocRequired,0
|
||||||
|
isXMLVersionChanged,0
|
||||||
|
addEventListener,0
|
||||||
|
removeEventListener,0
|
||||||
|
copyEventListeners,0
|
||||||
|
dispatchEvent,0
|
||||||
|
replacedText,0
|
||||||
|
deletedText,0
|
||||||
|
insertedText,0
|
||||||
|
modifyingCharacterData,0
|
||||||
|
modifiedCharacterData,0
|
||||||
|
insertingNode,0
|
||||||
|
insertedNode,0
|
||||||
|
removingNode,0
|
||||||
|
removedNode,0
|
||||||
|
replacingNode,0
|
||||||
|
replacedNode,0
|
||||||
|
replacingData,0
|
||||||
|
replacedCharacterData,0
|
||||||
|
modifiedAttrValue,0
|
||||||
|
setAttrNode,0
|
||||||
|
removedAttrNode,0
|
||||||
|
renamedAttrNode,0
|
||||||
|
renamedElement,0
|
||||||
|
readObject,0
|
||||||
|
writeObject,0
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
,k_means,hierarchical
|
||||||
|
2,0.880893568779594,0.880893568779594
|
||||||
|
3,0.865058028908998,0.8832576727129332
|
||||||
|
4,0.8716739969277025,0.9044225339758202
|
||||||
|
5,0.8801678593155939,0.9033046629733426
|
||||||
|
6,0.9027364726730837,0.9090951948593619
|
||||||
|
7,0.9172474681026531,0.9392626024065982
|
||||||
|
8,0.9426743010154992,0.9453062501090475
|
||||||
|
9,0.9512213095625077,0.9512213095625078
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
,cluster
|
||||||
|
getGrammarDescription,0
|
||||||
|
getElementDeclIsExternal,0
|
||||||
|
getAttributeDeclIsExternal,0
|
||||||
|
getAttributeDeclIndex,0
|
||||||
|
startDTD,0
|
||||||
|
startParameterEntity,11
|
||||||
|
startExternalSubset,12
|
||||||
|
endParameterEntity,11
|
||||||
|
endExternalSubset,12
|
||||||
|
elementDecl,0
|
||||||
|
attributeDecl,0
|
||||||
|
internalEntityDecl,0
|
||||||
|
externalEntityDecl,0
|
||||||
|
unparsedEntityDecl,0
|
||||||
|
notationDecl,0
|
||||||
|
endDTD,0
|
||||||
|
setDTDSource,0
|
||||||
|
getDTDSource,0
|
||||||
|
textDecl,0
|
||||||
|
comment,0
|
||||||
|
processingInstruction,0
|
||||||
|
startAttlist,0
|
||||||
|
endAttlist,0
|
||||||
|
startConditional,0
|
||||||
|
ignoredCharacters,0
|
||||||
|
endConditional,0
|
||||||
|
setDTDContentModelSource,10
|
||||||
|
getDTDContentModelSource,10
|
||||||
|
startContentModel,10
|
||||||
|
startGroup,0
|
||||||
|
pcdata,0
|
||||||
|
element,0
|
||||||
|
separator,0
|
||||||
|
occurrence,0
|
||||||
|
endGroup,0
|
||||||
|
any,0
|
||||||
|
empty,0
|
||||||
|
endContentModel,10
|
||||||
|
isNamespaceAware,0
|
||||||
|
getSymbolTable,0
|
||||||
|
getFirstElementDeclIndex,0
|
||||||
|
getNextElementDeclIndex,0
|
||||||
|
getElementDeclIndex,0
|
||||||
|
getContentSpecType,10
|
||||||
|
getElementDecl,0
|
||||||
|
getElementDeclName,0
|
||||||
|
getFirstAttributeDeclIndex,0
|
||||||
|
getNextAttributeDeclIndex,0
|
||||||
|
getAttributeDecl,0
|
||||||
|
isCDATAAttribute,0
|
||||||
|
getEntityDeclIndex,0
|
||||||
|
getEntityDecl,0
|
||||||
|
getNotationDeclIndex,0
|
||||||
|
getNotationDecl,0
|
||||||
|
getContentSpec,10
|
||||||
|
getContentSpecIndex,10
|
||||||
|
getContentSpecAsString,10
|
||||||
|
printElements,0
|
||||||
|
printAttributes,0
|
||||||
|
addContentSpecToElement,10
|
||||||
|
getElementContentModelValidator,10
|
||||||
|
createElementDecl,1
|
||||||
|
setElementDecl,0
|
||||||
|
putElementNameMapping,0
|
||||||
|
setFirstAttributeDeclIndex,0
|
||||||
|
setContentSpecIndex,10
|
||||||
|
createAttributeDecl,1
|
||||||
|
setAttributeDecl,0
|
||||||
|
createContentSpec,1
|
||||||
|
setContentSpec,10
|
||||||
|
createEntityDecl,1
|
||||||
|
setEntityDecl,0
|
||||||
|
createNotationDecl,1
|
||||||
|
setNotationDecl,0
|
||||||
|
addContentSpecNode,10
|
||||||
|
addUniqueLeafNode,0
|
||||||
|
initializeContentModelStack,10
|
||||||
|
isImmutable,0
|
||||||
|
appendContentSpec,10
|
||||||
|
printAttribute,0
|
||||||
|
createChildModel,1
|
||||||
|
buildSyntaxTree,0
|
||||||
|
contentSpecTree,10
|
||||||
|
ensureElementDeclCapacity,0
|
||||||
|
ensureAttributeDeclCapacity,0
|
||||||
|
ensureEntityDeclCapacity,0
|
||||||
|
ensureNotationDeclCapacity,0
|
||||||
|
ensureContentSpecCapacity,10
|
||||||
|
resize,0
|
||||||
|
isEntityDeclared,0
|
||||||
|
isEntityUnparsed,0
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
,cluster
|
||||||
|
getGrammarDescription,4
|
||||||
|
getElementDeclIsExternal,4
|
||||||
|
getAttributeDeclIsExternal,4
|
||||||
|
getAttributeDeclIndex,0
|
||||||
|
startDTD,4
|
||||||
|
startParameterEntity,15
|
||||||
|
startExternalSubset,4
|
||||||
|
endParameterEntity,4
|
||||||
|
endExternalSubset,4
|
||||||
|
elementDecl,14
|
||||||
|
attributeDecl,13
|
||||||
|
internalEntityDecl,4
|
||||||
|
externalEntityDecl,4
|
||||||
|
unparsedEntityDecl,4
|
||||||
|
notationDecl,4
|
||||||
|
endDTD,4
|
||||||
|
setDTDSource,4
|
||||||
|
getDTDSource,4
|
||||||
|
textDecl,4
|
||||||
|
comment,4
|
||||||
|
processingInstruction,4
|
||||||
|
startAttlist,4
|
||||||
|
endAttlist,4
|
||||||
|
startConditional,4
|
||||||
|
ignoredCharacters,4
|
||||||
|
endConditional,4
|
||||||
|
setDTDContentModelSource,4
|
||||||
|
getDTDContentModelSource,4
|
||||||
|
startContentModel,4
|
||||||
|
startGroup,4
|
||||||
|
pcdata,4
|
||||||
|
element,2
|
||||||
|
separator,5
|
||||||
|
occurrence,5
|
||||||
|
endGroup,4
|
||||||
|
any,4
|
||||||
|
empty,4
|
||||||
|
endContentModel,4
|
||||||
|
isNamespaceAware,4
|
||||||
|
getSymbolTable,4
|
||||||
|
getFirstElementDeclIndex,4
|
||||||
|
getNextElementDeclIndex,4
|
||||||
|
getElementDeclIndex,4
|
||||||
|
getContentSpecType,4
|
||||||
|
getElementDecl,1
|
||||||
|
getElementDeclName,4
|
||||||
|
getFirstAttributeDeclIndex,4
|
||||||
|
getNextAttributeDeclIndex,4
|
||||||
|
getAttributeDecl,4
|
||||||
|
isCDATAAttribute,0
|
||||||
|
getEntityDeclIndex,4
|
||||||
|
getEntityDecl,4
|
||||||
|
getNotationDeclIndex,4
|
||||||
|
getNotationDecl,4
|
||||||
|
getContentSpec,4
|
||||||
|
getContentSpecIndex,4
|
||||||
|
getContentSpecAsString,2
|
||||||
|
printElements,4
|
||||||
|
printAttributes,4
|
||||||
|
addContentSpecToElement,11
|
||||||
|
getElementContentModelValidator,1
|
||||||
|
createElementDecl,4
|
||||||
|
setElementDecl,4
|
||||||
|
putElementNameMapping,4
|
||||||
|
setFirstAttributeDeclIndex,4
|
||||||
|
setContentSpecIndex,4
|
||||||
|
createAttributeDecl,8
|
||||||
|
setAttributeDecl,4
|
||||||
|
createContentSpec,4
|
||||||
|
setContentSpec,4
|
||||||
|
createEntityDecl,4
|
||||||
|
setEntityDecl,4
|
||||||
|
createNotationDecl,4
|
||||||
|
setNotationDecl,4
|
||||||
|
addContentSpecNode,4
|
||||||
|
addUniqueLeafNode,2
|
||||||
|
initializeContentModelStack,10
|
||||||
|
isImmutable,4
|
||||||
|
appendContentSpec,2
|
||||||
|
printAttribute,4
|
||||||
|
createChildModel,2
|
||||||
|
buildSyntaxTree,2
|
||||||
|
contentSpecTree,2
|
||||||
|
ensureElementDeclCapacity,7
|
||||||
|
ensureAttributeDeclCapacity,9
|
||||||
|
ensureEntityDeclCapacity,12
|
||||||
|
ensureNotationDeclCapacity,3
|
||||||
|
ensureContentSpecCapacity,6
|
||||||
|
resize,4
|
||||||
|
isEntityDeclared,4
|
||||||
|
isEntityUnparsed,4
|
|
92
clustering/org.apache.xerces.impl.dtd.DTDGrammar_kmeans.csv
Normal file
92
clustering/org.apache.xerces.impl.dtd.DTDGrammar_kmeans.csv
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
,cluster
|
||||||
|
getGrammarDescription,1
|
||||||
|
getElementDeclIsExternal,1
|
||||||
|
getAttributeDeclIsExternal,1
|
||||||
|
getAttributeDeclIndex,15
|
||||||
|
startDTD,1
|
||||||
|
startParameterEntity,12
|
||||||
|
startExternalSubset,1
|
||||||
|
endParameterEntity,1
|
||||||
|
endExternalSubset,1
|
||||||
|
elementDecl,9
|
||||||
|
attributeDecl,13
|
||||||
|
internalEntityDecl,1
|
||||||
|
externalEntityDecl,1
|
||||||
|
unparsedEntityDecl,1
|
||||||
|
notationDecl,1
|
||||||
|
endDTD,1
|
||||||
|
setDTDSource,1
|
||||||
|
getDTDSource,1
|
||||||
|
textDecl,1
|
||||||
|
comment,1
|
||||||
|
processingInstruction,1
|
||||||
|
startAttlist,1
|
||||||
|
endAttlist,1
|
||||||
|
startConditional,1
|
||||||
|
ignoredCharacters,1
|
||||||
|
endConditional,1
|
||||||
|
setDTDContentModelSource,1
|
||||||
|
getDTDContentModelSource,1
|
||||||
|
startContentModel,1
|
||||||
|
startGroup,1
|
||||||
|
pcdata,1
|
||||||
|
element,3
|
||||||
|
separator,8
|
||||||
|
occurrence,8
|
||||||
|
endGroup,1
|
||||||
|
any,1
|
||||||
|
empty,1
|
||||||
|
endContentModel,1
|
||||||
|
isNamespaceAware,1
|
||||||
|
getSymbolTable,1
|
||||||
|
getFirstElementDeclIndex,1
|
||||||
|
getNextElementDeclIndex,1
|
||||||
|
getElementDeclIndex,1
|
||||||
|
getContentSpecType,1
|
||||||
|
getElementDecl,0
|
||||||
|
getElementDeclName,1
|
||||||
|
getFirstAttributeDeclIndex,1
|
||||||
|
getNextAttributeDeclIndex,1
|
||||||
|
getAttributeDecl,1
|
||||||
|
isCDATAAttribute,7
|
||||||
|
getEntityDeclIndex,1
|
||||||
|
getEntityDecl,1
|
||||||
|
getNotationDeclIndex,1
|
||||||
|
getNotationDecl,1
|
||||||
|
getContentSpec,1
|
||||||
|
getContentSpecIndex,1
|
||||||
|
getContentSpecAsString,3
|
||||||
|
printElements,1
|
||||||
|
printAttributes,1
|
||||||
|
addContentSpecToElement,14
|
||||||
|
getElementContentModelValidator,0
|
||||||
|
createElementDecl,1
|
||||||
|
setElementDecl,1
|
||||||
|
putElementNameMapping,1
|
||||||
|
setFirstAttributeDeclIndex,1
|
||||||
|
setContentSpecIndex,1
|
||||||
|
createAttributeDecl,7
|
||||||
|
setAttributeDecl,1
|
||||||
|
createContentSpec,1
|
||||||
|
setContentSpec,1
|
||||||
|
createEntityDecl,1
|
||||||
|
setEntityDecl,1
|
||||||
|
createNotationDecl,1
|
||||||
|
setNotationDecl,1
|
||||||
|
addContentSpecNode,1
|
||||||
|
addUniqueLeafNode,3
|
||||||
|
initializeContentModelStack,11
|
||||||
|
isImmutable,1
|
||||||
|
appendContentSpec,3
|
||||||
|
printAttribute,1
|
||||||
|
createChildModel,3
|
||||||
|
buildSyntaxTree,3
|
||||||
|
contentSpecTree,3
|
||||||
|
ensureElementDeclCapacity,6
|
||||||
|
ensureAttributeDeclCapacity,2
|
||||||
|
ensureEntityDeclCapacity,4
|
||||||
|
ensureNotationDeclCapacity,5
|
||||||
|
ensureContentSpecCapacity,10
|
||||||
|
resize,1
|
||||||
|
isEntityDeclared,1
|
||||||
|
isEntityUnparsed,1
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
,k_means,hierarchical
|
||||||
|
2,0.7973480585031026,0.5874087027846128
|
||||||
|
3,0.7929515822141272,0.6083207410570212
|
||||||
|
4,0.7875327826881011,0.692671697230321
|
||||||
|
5,0.7432807419504763,0.7141629304171452
|
||||||
|
6,0.7211961130227403,0.7297457578156361
|
||||||
|
7,0.7108188520737106,0.7538651674386223
|
||||||
|
8,0.7521496486020739,0.7823024861034127
|
||||||
|
9,0.7975294826166544,0.8082189521577593
|
||||||
|
10,0.8045822208703368,0.8104671295723321
|
||||||
|
11,0.8091345582763917,0.826856236491233
|
||||||
|
12,0.8140357581974259,0.834060274351163
|
||||||
|
13,0.8468647347587057,0.8414782883217177
|
||||||
|
14,0.8416685816849977,0.8402352442946155
|
||||||
|
15,0.8494389858738608,0.8512242552836264
|
||||||
|
16,0.8571428571428571,0.8571428571428571
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
,cluster
|
||||||
|
null2EmptyString,0
|
||||||
|
emptyString2Null,0
|
||||||
|
doc2SystemId,0
|
||||||
|
parseSchema,0
|
||||||
|
validateAnnotations,0
|
||||||
|
createAnnotationValidator,1
|
||||||
|
getGrammar,0
|
||||||
|
findGrammar,0
|
||||||
|
constructTrees,0
|
||||||
|
isExistingGrammar,0
|
||||||
|
updateImportListFor,0
|
||||||
|
updateImportListWith,0
|
||||||
|
buildGlobalNameRegistries,13
|
||||||
|
traverseSchemas,0
|
||||||
|
needReportTNSError,9
|
||||||
|
addGlobalAttributeDecl,13
|
||||||
|
addGlobalAttributeGroupDecl,13
|
||||||
|
addGlobalElementDecl,13
|
||||||
|
addGlobalGroupDecl,13
|
||||||
|
addGlobalNotationDecl,13
|
||||||
|
addGlobalTypeDecl,13
|
||||||
|
addIDConstraintDecl,0
|
||||||
|
getGlobalAttributeDecl,13
|
||||||
|
getGlobalAttributeGroupDecl,13
|
||||||
|
getGlobalElementDecl,13
|
||||||
|
getGlobalGroupDecl,13
|
||||||
|
getGlobalNotationDecl,13
|
||||||
|
getGlobalTypeDecl,13
|
||||||
|
getIDConstraintDecl,0
|
||||||
|
getGlobalDecl,13
|
||||||
|
getGlobalDeclFromGrammar,13
|
||||||
|
traverseGlobalDecl,13
|
||||||
|
schemaDocument2SystemId,0
|
||||||
|
getGrpOrAttrGrpRedefinedByRestriction,0
|
||||||
|
resolveKeyRefs,0
|
||||||
|
getIDRegistry,0
|
||||||
|
getIDRegistry_sub,0
|
||||||
|
storeKeyRef,0
|
||||||
|
resolveSchema,0
|
||||||
|
resolveSchemaSource,0
|
||||||
|
getSchemaDocument,0
|
||||||
|
getSchemaDocument0,0
|
||||||
|
getSchemaDocument1,0
|
||||||
|
expandGrammars,0
|
||||||
|
existingGrammars,0
|
||||||
|
canAddComponents,14
|
||||||
|
canAddComponent,14
|
||||||
|
addGrammars,0
|
||||||
|
addGrammarComponents,14
|
||||||
|
createGrammarFrom,1
|
||||||
|
addNewGrammarLocations,0
|
||||||
|
addNewImportedGrammars,0
|
||||||
|
updateImportList,0
|
||||||
|
addNewGrammarComponents,14
|
||||||
|
addGlobalElementDecls,13
|
||||||
|
addGlobalAttributeDecls,13
|
||||||
|
addGlobalAttributeGroupDecls,13
|
||||||
|
addGlobalNotationDecls,13
|
||||||
|
addGlobalGroupDecls,13
|
||||||
|
addGlobalTypeDecls,13
|
||||||
|
expandComponents,14
|
||||||
|
expandRelatedComponents,14
|
||||||
|
expandRelatedAttributeComponents,14
|
||||||
|
expandRelatedElementComponents,14
|
||||||
|
expandRelatedTypeComponents,14
|
||||||
|
expandRelatedModelGroupDefinitionComponents,14
|
||||||
|
expandRelatedAttributeGroupComponents,14
|
||||||
|
expandRelatedComplexTypeComponents,14
|
||||||
|
expandRelatedSimpleTypeComponents,14
|
||||||
|
expandRelatedAttributeUsesComponents,14
|
||||||
|
expandRelatedAttributeUseComponents,14
|
||||||
|
expandRelatedParticleComponents,14
|
||||||
|
expandRelatedModelGroupComponents,14
|
||||||
|
addRelatedType,0
|
||||||
|
addRelatedElement,0
|
||||||
|
addRelatedAttribute,0
|
||||||
|
addGlobalComponents,13
|
||||||
|
addGlobalComponent,13
|
||||||
|
updateImportDependencies,0
|
||||||
|
expandImportList,0
|
||||||
|
addImportList,0
|
||||||
|
containedImportedGrammar,0
|
||||||
|
getSchemaGrammar,0
|
||||||
|
findDependentNamespaces,0
|
||||||
|
addNamespaceDependency,0
|
||||||
|
reportSharingError,9
|
||||||
|
createTraversers,1
|
||||||
|
prepareForParse,0
|
||||||
|
prepareForTraverse,0
|
||||||
|
setDeclPool,0
|
||||||
|
setDVFactory,0
|
||||||
|
reset,0
|
||||||
|
traverseLocalElements,0
|
||||||
|
removeParticle,0
|
||||||
|
fillInLocalElemInfo,0
|
||||||
|
checkForDuplicateNames,0
|
||||||
|
renameRedefiningComponents,14
|
||||||
|
findQName,0
|
||||||
|
changeRedefineGroup,0
|
||||||
|
findXSDocumentForDecl,0
|
||||||
|
nonAnnotationContent,10
|
||||||
|
setSchemasVisible,0
|
||||||
|
element2Locator,0
|
||||||
|
reportSchemaError,9
|
||||||
|
reportSchemaWarning,0
|
||||||
|
setGenerateSyntheticAnnotations,0
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
,cluster
|
||||||
|
null2EmptyString,6
|
||||||
|
emptyString2Null,6
|
||||||
|
doc2SystemId,3
|
||||||
|
parseSchema,7
|
||||||
|
validateAnnotations,3
|
||||||
|
createAnnotationValidator,3
|
||||||
|
getGrammar,3
|
||||||
|
findGrammar,3
|
||||||
|
constructTrees,2
|
||||||
|
isExistingGrammar,3
|
||||||
|
updateImportListFor,3
|
||||||
|
updateImportListWith,3
|
||||||
|
buildGlobalNameRegistries,0
|
||||||
|
traverseSchemas,0
|
||||||
|
needReportTNSError,3
|
||||||
|
addGlobalAttributeDecl,3
|
||||||
|
addGlobalAttributeGroupDecl,3
|
||||||
|
addGlobalElementDecl,3
|
||||||
|
addGlobalGroupDecl,3
|
||||||
|
addGlobalNotationDecl,3
|
||||||
|
addGlobalTypeDecl,3
|
||||||
|
addIDConstraintDecl,3
|
||||||
|
getGlobalAttributeDecl,3
|
||||||
|
getGlobalAttributeGroupDecl,3
|
||||||
|
getGlobalElementDecl,3
|
||||||
|
getGlobalGroupDecl,3
|
||||||
|
getGlobalNotationDecl,3
|
||||||
|
getGlobalTypeDecl,3
|
||||||
|
getIDConstraintDecl,3
|
||||||
|
getGlobalDecl,3
|
||||||
|
getGlobalDeclFromGrammar,3
|
||||||
|
traverseGlobalDecl,0
|
||||||
|
schemaDocument2SystemId,3
|
||||||
|
getGrpOrAttrGrpRedefinedByRestriction,6
|
||||||
|
resolveKeyRefs,3
|
||||||
|
getIDRegistry,3
|
||||||
|
getIDRegistry_sub,3
|
||||||
|
storeKeyRef,0
|
||||||
|
resolveSchema,3
|
||||||
|
resolveSchemaSource,3
|
||||||
|
getSchemaDocument,12
|
||||||
|
getSchemaDocument0,3
|
||||||
|
getSchemaDocument1,3
|
||||||
|
expandGrammars,3
|
||||||
|
existingGrammars,3
|
||||||
|
canAddComponents,3
|
||||||
|
canAddComponent,5
|
||||||
|
addGrammars,3
|
||||||
|
addGrammarComponents,3
|
||||||
|
createGrammarFrom,3
|
||||||
|
addNewGrammarLocations,3
|
||||||
|
addNewImportedGrammars,3
|
||||||
|
updateImportList,3
|
||||||
|
addNewGrammarComponents,3
|
||||||
|
addGlobalElementDecls,5
|
||||||
|
addGlobalAttributeDecls,5
|
||||||
|
addGlobalAttributeGroupDecls,5
|
||||||
|
addGlobalNotationDecls,5
|
||||||
|
addGlobalGroupDecls,5
|
||||||
|
addGlobalTypeDecls,5
|
||||||
|
expandComponents,3
|
||||||
|
expandRelatedComponents,5
|
||||||
|
expandRelatedAttributeComponents,3
|
||||||
|
expandRelatedElementComponents,3
|
||||||
|
expandRelatedTypeComponents,3
|
||||||
|
expandRelatedModelGroupDefinitionComponents,3
|
||||||
|
expandRelatedAttributeGroupComponents,3
|
||||||
|
expandRelatedComplexTypeComponents,3
|
||||||
|
expandRelatedSimpleTypeComponents,3
|
||||||
|
expandRelatedAttributeUsesComponents,3
|
||||||
|
expandRelatedAttributeUseComponents,3
|
||||||
|
expandRelatedParticleComponents,5
|
||||||
|
expandRelatedModelGroupComponents,3
|
||||||
|
addRelatedType,3
|
||||||
|
addRelatedElement,5
|
||||||
|
addRelatedAttribute,5
|
||||||
|
addGlobalComponents,3
|
||||||
|
addGlobalComponent,9
|
||||||
|
updateImportDependencies,3
|
||||||
|
expandImportList,3
|
||||||
|
addImportList,3
|
||||||
|
containedImportedGrammar,3
|
||||||
|
getSchemaGrammar,3
|
||||||
|
findDependentNamespaces,3
|
||||||
|
addNamespaceDependency,3
|
||||||
|
reportSharingError,3
|
||||||
|
createTraversers,3
|
||||||
|
prepareForParse,3
|
||||||
|
prepareForTraverse,3
|
||||||
|
setDeclPool,3
|
||||||
|
setDVFactory,3
|
||||||
|
reset,3
|
||||||
|
traverseLocalElements,10
|
||||||
|
removeParticle,4
|
||||||
|
fillInLocalElemInfo,8
|
||||||
|
checkForDuplicateNames,3
|
||||||
|
renameRedefiningComponents,2
|
||||||
|
findQName,6
|
||||||
|
changeRedefineGroup,11
|
||||||
|
findXSDocumentForDecl,3
|
||||||
|
nonAnnotationContent,3
|
||||||
|
setSchemasVisible,3
|
||||||
|
element2Locator,3
|
||||||
|
reportSchemaError,1
|
||||||
|
reportSchemaWarning,1
|
||||||
|
setGenerateSyntheticAnnotations,3
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
,cluster
|
||||||
|
null2EmptyString,8
|
||||||
|
emptyString2Null,8
|
||||||
|
doc2SystemId,0
|
||||||
|
parseSchema,5
|
||||||
|
validateAnnotations,0
|
||||||
|
createAnnotationValidator,0
|
||||||
|
getGrammar,0
|
||||||
|
findGrammar,0
|
||||||
|
constructTrees,3
|
||||||
|
isExistingGrammar,0
|
||||||
|
updateImportListFor,0
|
||||||
|
updateImportListWith,0
|
||||||
|
buildGlobalNameRegistries,2
|
||||||
|
traverseSchemas,2
|
||||||
|
needReportTNSError,0
|
||||||
|
addGlobalAttributeDecl,0
|
||||||
|
addGlobalAttributeGroupDecl,0
|
||||||
|
addGlobalElementDecl,0
|
||||||
|
addGlobalGroupDecl,0
|
||||||
|
addGlobalNotationDecl,0
|
||||||
|
addGlobalTypeDecl,0
|
||||||
|
addIDConstraintDecl,0
|
||||||
|
getGlobalAttributeDecl,0
|
||||||
|
getGlobalAttributeGroupDecl,0
|
||||||
|
getGlobalElementDecl,0
|
||||||
|
getGlobalGroupDecl,0
|
||||||
|
getGlobalNotationDecl,0
|
||||||
|
getGlobalTypeDecl,0
|
||||||
|
getIDConstraintDecl,0
|
||||||
|
getGlobalDecl,0
|
||||||
|
getGlobalDeclFromGrammar,0
|
||||||
|
traverseGlobalDecl,2
|
||||||
|
schemaDocument2SystemId,0
|
||||||
|
getGrpOrAttrGrpRedefinedByRestriction,8
|
||||||
|
resolveKeyRefs,0
|
||||||
|
getIDRegistry,0
|
||||||
|
getIDRegistry_sub,0
|
||||||
|
storeKeyRef,9
|
||||||
|
resolveSchema,0
|
||||||
|
resolveSchemaSource,0
|
||||||
|
getSchemaDocument,0
|
||||||
|
getSchemaDocument0,0
|
||||||
|
getSchemaDocument1,0
|
||||||
|
expandGrammars,0
|
||||||
|
existingGrammars,0
|
||||||
|
canAddComponents,0
|
||||||
|
canAddComponent,1
|
||||||
|
addGrammars,0
|
||||||
|
addGrammarComponents,0
|
||||||
|
createGrammarFrom,0
|
||||||
|
addNewGrammarLocations,0
|
||||||
|
addNewImportedGrammars,0
|
||||||
|
updateImportList,0
|
||||||
|
addNewGrammarComponents,0
|
||||||
|
addGlobalElementDecls,1
|
||||||
|
addGlobalAttributeDecls,1
|
||||||
|
addGlobalAttributeGroupDecls,1
|
||||||
|
addGlobalNotationDecls,1
|
||||||
|
addGlobalGroupDecls,1
|
||||||
|
addGlobalTypeDecls,1
|
||||||
|
expandComponents,0
|
||||||
|
expandRelatedComponents,1
|
||||||
|
expandRelatedAttributeComponents,0
|
||||||
|
expandRelatedElementComponents,0
|
||||||
|
expandRelatedTypeComponents,0
|
||||||
|
expandRelatedModelGroupDefinitionComponents,0
|
||||||
|
expandRelatedAttributeGroupComponents,0
|
||||||
|
expandRelatedComplexTypeComponents,0
|
||||||
|
expandRelatedSimpleTypeComponents,0
|
||||||
|
expandRelatedAttributeUsesComponents,0
|
||||||
|
expandRelatedAttributeUseComponents,0
|
||||||
|
expandRelatedParticleComponents,1
|
||||||
|
expandRelatedModelGroupComponents,0
|
||||||
|
addRelatedType,0
|
||||||
|
addRelatedElement,1
|
||||||
|
addRelatedAttribute,1
|
||||||
|
addGlobalComponents,0
|
||||||
|
addGlobalComponent,6
|
||||||
|
updateImportDependencies,0
|
||||||
|
expandImportList,0
|
||||||
|
addImportList,0
|
||||||
|
containedImportedGrammar,0
|
||||||
|
getSchemaGrammar,0
|
||||||
|
findDependentNamespaces,0
|
||||||
|
addNamespaceDependency,0
|
||||||
|
reportSharingError,0
|
||||||
|
createTraversers,0
|
||||||
|
prepareForParse,0
|
||||||
|
prepareForTraverse,0
|
||||||
|
setDeclPool,0
|
||||||
|
setDVFactory,0
|
||||||
|
reset,0
|
||||||
|
traverseLocalElements,7
|
||||||
|
removeParticle,10
|
||||||
|
fillInLocalElemInfo,12
|
||||||
|
checkForDuplicateNames,0
|
||||||
|
renameRedefiningComponents,13
|
||||||
|
findQName,8
|
||||||
|
changeRedefineGroup,11
|
||||||
|
findXSDocumentForDecl,0
|
||||||
|
nonAnnotationContent,0
|
||||||
|
setSchemasVisible,0
|
||||||
|
element2Locator,0
|
||||||
|
reportSchemaError,4
|
||||||
|
reportSchemaWarning,4
|
||||||
|
setGenerateSyntheticAnnotations,0
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
,k_means,hierarchical
|
||||||
|
2,0.653710343152024,0.6897290995261953
|
||||||
|
3,0.7619679813132313,0.739093871180792
|
||||||
|
4,0.7984312146893348,0.8064687623601431
|
||||||
|
5,0.8435547562272465,0.8435547562272465
|
||||||
|
6,0.8611535046317008,0.842649115950788
|
||||||
|
7,0.8678669375642456,0.8504397238719222
|
||||||
|
8,0.8739125874487823,0.8174948896615486
|
||||||
|
9,0.8788481792716016,0.8747912741464551
|
||||||
|
10,0.8903718217266576,0.8775840532668583
|
||||||
|
11,0.8875387867792598,0.8867064376197864
|
||||||
|
12,0.8959671318660039,0.8952551515692575
|
||||||
|
13,0.902637950556443,0.9017527022417632
|
||||||
|
14,0.9084235209322024,0.8989895586680513
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
,cluster
|
||||||
|
reset,0
|
||||||
|
getRecognizedFeatures,0
|
||||||
|
setFeature,0
|
||||||
|
getRecognizedProperties,0
|
||||||
|
setProperty,0
|
||||||
|
getFeatureDefault,0
|
||||||
|
getPropertyDefault,0
|
||||||
|
setDocumentHandler,0
|
||||||
|
getDocumentHandler,0
|
||||||
|
startDocument,0
|
||||||
|
xmlDecl,0
|
||||||
|
doctypeDecl,0
|
||||||
|
comment,0
|
||||||
|
processingInstruction,0
|
||||||
|
startElement,0
|
||||||
|
emptyElement,0
|
||||||
|
endElement,0
|
||||||
|
startGeneralEntity,0
|
||||||
|
textDecl,0
|
||||||
|
endGeneralEntity,0
|
||||||
|
characters,0
|
||||||
|
ignorableWhitespace,0
|
||||||
|
startCDATA,0
|
||||||
|
endCDATA,0
|
||||||
|
endDocument,0
|
||||||
|
setDocumentSource,0
|
||||||
|
getDocumentSource,0
|
||||||
|
attributeDecl,0
|
||||||
|
elementDecl,0
|
||||||
|
endAttlist,0
|
||||||
|
endConditional,0
|
||||||
|
endDTD,0
|
||||||
|
endExternalSubset,12
|
||||||
|
endParameterEntity,11
|
||||||
|
externalEntityDecl,0
|
||||||
|
getDTDSource,0
|
||||||
|
ignoredCharacters,0
|
||||||
|
internalEntityDecl,0
|
||||||
|
notationDecl,0
|
||||||
|
setDTDSource,0
|
||||||
|
startAttlist,0
|
||||||
|
startConditional,0
|
||||||
|
startDTD,0
|
||||||
|
startExternalSubset,12
|
||||||
|
startParameterEntity,11
|
||||||
|
unparsedEntityDecl,0
|
||||||
|
getDTDHandler,0
|
||||||
|
setDTDHandler,0
|
||||||
|
setErrorReporter,9
|
||||||
|
handleFallbackElement,0
|
||||||
|
handleIncludeElement,0
|
||||||
|
hasXIncludeNamespace,0
|
||||||
|
isIncludeElement,0
|
||||||
|
isFallbackElement,0
|
||||||
|
sameBaseURIAsIncludeParent,4
|
||||||
|
sameLanguageAsIncludeParent,0
|
||||||
|
setupCurrentBaseURI,4
|
||||||
|
searchForRecursiveIncludes,0
|
||||||
|
isTopLevelIncludedItem,0
|
||||||
|
isTopLevelIncludedItemViaInclude,0
|
||||||
|
isTopLevelIncludedItemViaFallback,0
|
||||||
|
processAttributes,0
|
||||||
|
getRelativeBaseURI,4
|
||||||
|
getIncludeParentBaseURI,4
|
||||||
|
getIncludeParentLanguage,0
|
||||||
|
getIncludeParentDepth,0
|
||||||
|
getResultDepth,0
|
||||||
|
modifyAugmentations,0
|
||||||
|
getState,0
|
||||||
|
setState,0
|
||||||
|
setSawFallback,0
|
||||||
|
getSawFallback,0
|
||||||
|
setSawInclude,0
|
||||||
|
getSawInclude,0
|
||||||
|
reportResourceError,9
|
||||||
|
reportFatalError,9
|
||||||
|
reportError,9
|
||||||
|
setParent,0
|
||||||
|
setHref,0
|
||||||
|
setXIncludeLocator,0
|
||||||
|
isRootDocument,0
|
||||||
|
addUnparsedEntity,0
|
||||||
|
addNotation,0
|
||||||
|
checkUnparsedEntity,0
|
||||||
|
checkNotation,0
|
||||||
|
checkAndSendUnparsedEntity,0
|
||||||
|
checkAndSendNotation,0
|
||||||
|
checkWhitespace,0
|
||||||
|
checkMultipleRootElements,0
|
||||||
|
setRootElementProcessed,0
|
||||||
|
getRootElementProcessed,0
|
||||||
|
copyFeatures,0
|
||||||
|
copyFeatures1,0
|
||||||
|
saveBaseURI,4
|
||||||
|
restoreBaseURI,4
|
||||||
|
saveLanguage,0
|
||||||
|
restoreLanguage,0
|
||||||
|
getBaseURI,4
|
||||||
|
getLanguage,0
|
||||||
|
getRelativeURI,4
|
||||||
|
scopeOfBaseURI,4
|
||||||
|
scopeOfLanguage,0
|
||||||
|
processXMLBaseAttributes,0
|
||||||
|
processXMLLangAttributes,0
|
||||||
|
isValidInHTTPHeader,0
|
||||||
|
createInputSource,1
|
||||||
|
isEqual,0
|
||||||
|
escapeHref,0
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
,cluster
|
||||||
|
reset,13
|
||||||
|
getRecognizedFeatures,0
|
||||||
|
setFeature,0
|
||||||
|
getRecognizedProperties,0
|
||||||
|
setProperty,0
|
||||||
|
getFeatureDefault,12
|
||||||
|
getPropertyDefault,8
|
||||||
|
setDocumentHandler,0
|
||||||
|
getDocumentHandler,0
|
||||||
|
startDocument,9
|
||||||
|
xmlDecl,0
|
||||||
|
doctypeDecl,0
|
||||||
|
comment,0
|
||||||
|
processingInstruction,0
|
||||||
|
startElement,0
|
||||||
|
emptyElement,0
|
||||||
|
endElement,0
|
||||||
|
startGeneralEntity,7
|
||||||
|
textDecl,0
|
||||||
|
endGeneralEntity,0
|
||||||
|
characters,0
|
||||||
|
ignorableWhitespace,0
|
||||||
|
startCDATA,0
|
||||||
|
endCDATA,0
|
||||||
|
endDocument,0
|
||||||
|
setDocumentSource,0
|
||||||
|
getDocumentSource,0
|
||||||
|
attributeDecl,0
|
||||||
|
elementDecl,0
|
||||||
|
endAttlist,0
|
||||||
|
endConditional,0
|
||||||
|
endDTD,0
|
||||||
|
endExternalSubset,0
|
||||||
|
endParameterEntity,0
|
||||||
|
externalEntityDecl,0
|
||||||
|
getDTDSource,0
|
||||||
|
ignoredCharacters,0
|
||||||
|
internalEntityDecl,0
|
||||||
|
notationDecl,0
|
||||||
|
setDTDSource,0
|
||||||
|
startAttlist,0
|
||||||
|
startConditional,0
|
||||||
|
startDTD,0
|
||||||
|
startExternalSubset,0
|
||||||
|
startParameterEntity,0
|
||||||
|
unparsedEntityDecl,0
|
||||||
|
getDTDHandler,0
|
||||||
|
setDTDHandler,0
|
||||||
|
setErrorReporter,3
|
||||||
|
handleFallbackElement,0
|
||||||
|
handleIncludeElement,11
|
||||||
|
hasXIncludeNamespace,0
|
||||||
|
isIncludeElement,0
|
||||||
|
isFallbackElement,0
|
||||||
|
sameBaseURIAsIncludeParent,0
|
||||||
|
sameLanguageAsIncludeParent,0
|
||||||
|
setupCurrentBaseURI,0
|
||||||
|
searchForRecursiveIncludes,0
|
||||||
|
isTopLevelIncludedItem,0
|
||||||
|
isTopLevelIncludedItemViaInclude,0
|
||||||
|
isTopLevelIncludedItemViaFallback,0
|
||||||
|
processAttributes,14
|
||||||
|
getRelativeBaseURI,0
|
||||||
|
getIncludeParentBaseURI,0
|
||||||
|
getIncludeParentLanguage,0
|
||||||
|
getIncludeParentDepth,0
|
||||||
|
getResultDepth,0
|
||||||
|
modifyAugmentations,5
|
||||||
|
getState,0
|
||||||
|
setState,0
|
||||||
|
setSawFallback,10
|
||||||
|
getSawFallback,10
|
||||||
|
setSawInclude,4
|
||||||
|
getSawInclude,4
|
||||||
|
reportResourceError,1
|
||||||
|
reportFatalError,1
|
||||||
|
reportError,3
|
||||||
|
setParent,0
|
||||||
|
setHref,0
|
||||||
|
setXIncludeLocator,0
|
||||||
|
isRootDocument,0
|
||||||
|
addUnparsedEntity,0
|
||||||
|
addNotation,0
|
||||||
|
checkUnparsedEntity,0
|
||||||
|
checkNotation,0
|
||||||
|
checkAndSendUnparsedEntity,0
|
||||||
|
checkAndSendNotation,0
|
||||||
|
checkWhitespace,0
|
||||||
|
checkMultipleRootElements,0
|
||||||
|
setRootElementProcessed,0
|
||||||
|
getRootElementProcessed,0
|
||||||
|
copyFeatures,7
|
||||||
|
copyFeatures1,0
|
||||||
|
saveBaseURI,0
|
||||||
|
restoreBaseURI,0
|
||||||
|
saveLanguage,0
|
||||||
|
restoreLanguage,0
|
||||||
|
getBaseURI,0
|
||||||
|
getLanguage,0
|
||||||
|
getRelativeURI,0
|
||||||
|
scopeOfBaseURI,0
|
||||||
|
scopeOfLanguage,0
|
||||||
|
processXMLBaseAttributes,2
|
||||||
|
processXMLLangAttributes,2
|
||||||
|
isValidInHTTPHeader,0
|
||||||
|
createInputSource,6
|
||||||
|
isEqual,0
|
||||||
|
escapeHref,0
|
|
109
clustering/org.apache.xerces.xinclude.XIncludeHandler_kmeans.csv
Normal file
109
clustering/org.apache.xerces.xinclude.XIncludeHandler_kmeans.csv
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
,cluster
|
||||||
|
reset,2
|
||||||
|
getRecognizedFeatures,0
|
||||||
|
setFeature,0
|
||||||
|
getRecognizedProperties,0
|
||||||
|
setProperty,0
|
||||||
|
getFeatureDefault,0
|
||||||
|
getPropertyDefault,13
|
||||||
|
setDocumentHandler,0
|
||||||
|
getDocumentHandler,0
|
||||||
|
startDocument,9
|
||||||
|
xmlDecl,0
|
||||||
|
doctypeDecl,0
|
||||||
|
comment,0
|
||||||
|
processingInstruction,0
|
||||||
|
startElement,0
|
||||||
|
emptyElement,0
|
||||||
|
endElement,0
|
||||||
|
startGeneralEntity,1
|
||||||
|
textDecl,0
|
||||||
|
endGeneralEntity,0
|
||||||
|
characters,0
|
||||||
|
ignorableWhitespace,0
|
||||||
|
startCDATA,0
|
||||||
|
endCDATA,0
|
||||||
|
endDocument,0
|
||||||
|
setDocumentSource,0
|
||||||
|
getDocumentSource,0
|
||||||
|
attributeDecl,0
|
||||||
|
elementDecl,0
|
||||||
|
endAttlist,0
|
||||||
|
endConditional,0
|
||||||
|
endDTD,0
|
||||||
|
endExternalSubset,0
|
||||||
|
endParameterEntity,0
|
||||||
|
externalEntityDecl,0
|
||||||
|
getDTDSource,0
|
||||||
|
ignoredCharacters,0
|
||||||
|
internalEntityDecl,0
|
||||||
|
notationDecl,0
|
||||||
|
setDTDSource,0
|
||||||
|
startAttlist,0
|
||||||
|
startConditional,0
|
||||||
|
startDTD,0
|
||||||
|
startExternalSubset,0
|
||||||
|
startParameterEntity,0
|
||||||
|
unparsedEntityDecl,0
|
||||||
|
getDTDHandler,0
|
||||||
|
setDTDHandler,0
|
||||||
|
setErrorReporter,4
|
||||||
|
handleFallbackElement,0
|
||||||
|
handleIncludeElement,3
|
||||||
|
hasXIncludeNamespace,0
|
||||||
|
isIncludeElement,0
|
||||||
|
isFallbackElement,0
|
||||||
|
sameBaseURIAsIncludeParent,0
|
||||||
|
sameLanguageAsIncludeParent,0
|
||||||
|
setupCurrentBaseURI,0
|
||||||
|
searchForRecursiveIncludes,0
|
||||||
|
isTopLevelIncludedItem,0
|
||||||
|
isTopLevelIncludedItemViaInclude,0
|
||||||
|
isTopLevelIncludedItemViaFallback,0
|
||||||
|
processAttributes,12
|
||||||
|
getRelativeBaseURI,0
|
||||||
|
getIncludeParentBaseURI,0
|
||||||
|
getIncludeParentLanguage,0
|
||||||
|
getIncludeParentDepth,0
|
||||||
|
getResultDepth,0
|
||||||
|
modifyAugmentations,11
|
||||||
|
getState,0
|
||||||
|
setState,14
|
||||||
|
setSawFallback,8
|
||||||
|
getSawFallback,8
|
||||||
|
setSawInclude,7
|
||||||
|
getSawInclude,7
|
||||||
|
reportResourceError,6
|
||||||
|
reportFatalError,6
|
||||||
|
reportError,4
|
||||||
|
setParent,0
|
||||||
|
setHref,0
|
||||||
|
setXIncludeLocator,0
|
||||||
|
isRootDocument,0
|
||||||
|
addUnparsedEntity,0
|
||||||
|
addNotation,0
|
||||||
|
checkUnparsedEntity,0
|
||||||
|
checkNotation,0
|
||||||
|
checkAndSendUnparsedEntity,0
|
||||||
|
checkAndSendNotation,0
|
||||||
|
checkWhitespace,0
|
||||||
|
checkMultipleRootElements,0
|
||||||
|
setRootElementProcessed,0
|
||||||
|
getRootElementProcessed,0
|
||||||
|
copyFeatures,1
|
||||||
|
copyFeatures1,0
|
||||||
|
saveBaseURI,0
|
||||||
|
restoreBaseURI,0
|
||||||
|
saveLanguage,0
|
||||||
|
restoreLanguage,0
|
||||||
|
getBaseURI,0
|
||||||
|
getLanguage,0
|
||||||
|
getRelativeURI,0
|
||||||
|
scopeOfBaseURI,0
|
||||||
|
scopeOfLanguage,0
|
||||||
|
processXMLBaseAttributes,5
|
||||||
|
processXMLLangAttributes,5
|
||||||
|
isValidInHTTPHeader,0
|
||||||
|
createInputSource,10
|
||||||
|
isEqual,0
|
||||||
|
escapeHref,0
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
,k_means,hierarchical
|
||||||
|
2,0.7400737900068505,0.7450255723178434
|
||||||
|
3,0.750834341421787,0.7590675173051686
|
||||||
|
4,0.7679751766235376,0.7537209967731088
|
||||||
|
5,0.7694034011467328,0.7857193607872571
|
||||||
|
6,0.7912800923302977,0.7998705023701569
|
||||||
|
7,0.7945459689471787,0.8089785658259816
|
||||||
|
8,0.8182179715203751,0.8386247544828634
|
||||||
|
9,0.8417229111627736,0.8681233694906747
|
||||||
|
10,0.8647266620127783,0.879025772110854
|
||||||
|
11,0.8825387834587034,0.8849687834612566
|
||||||
|
12,0.8852199398177911,0.891045019202272
|
||||||
|
13,0.8905822525359668,0.8974662016344479
|
||||||
|
14,0.9036154755382922,0.9037208985767615
|
||||||
|
15,0.9101193816556802,0.9101193816556802
|
|
47
ground_truth.py
Executable file
47
ground_truth.py
Executable file
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import glob
|
||||||
|
|
||||||
|
|
||||||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
IN_DIR: str = DIR + '/feature_vectors'
|
||||||
|
OUT_DIR: str = DIR + '/clustering'
|
||||||
|
|
||||||
|
|
||||||
|
def clean_output():
|
||||||
|
filelist = glob.glob(OUT_DIR + '/*_groundtruth.csv')
|
||||||
|
for f in filelist:
|
||||||
|
os.remove(f)
|
||||||
|
|
||||||
|
|
||||||
|
def ground_truth(method_name: str, keywords: list[str]):
|
||||||
|
for i, key in enumerate(keywords):
|
||||||
|
if method_name.find(key) != -1:
|
||||||
|
return i + 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def create_ground_truth(path: str, keywords: list[str]):
|
||||||
|
clazz_name = os.path.basename(path)
|
||||||
|
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
||||||
|
|
||||||
|
df = pd.read_csv(path, index_col=0).filter([])
|
||||||
|
df['cluster'] = df.index.map(lambda m: ground_truth(m.lower(), keywords))
|
||||||
|
df.to_csv(OUT_DIR + '/' + clazz_name + '_groundtruth.csv')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open(DIR + '/keyword_list.txt', 'r') as f:
|
||||||
|
keywords: list[str] = [x.strip().strip('\n').lower() for x in f.readlines()]
|
||||||
|
|
||||||
|
clean_output()
|
||||||
|
|
||||||
|
filelist = glob.glob(IN_DIR + '/*.csv')
|
||||||
|
for f in filelist:
|
||||||
|
create_ground_truth(f, keywords)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
49
hierarchical.py
Executable file
49
hierarchical.py
Executable file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
OUT_DIR: str = DIR + '/clustering'
|
||||||
|
IN_DIR: str = DIR + '/feature_vectors'
|
||||||
|
|
||||||
|
|
||||||
|
def cluster_hierarchical(path: str, n_clusters: int, save_to_disk: bool = True) -> tuple[any, any]:
|
||||||
|
clazz_name = os.path.basename(path)
|
||||||
|
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
||||||
|
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
X = df.drop(df.columns[0], axis=1).to_numpy()
|
||||||
|
kmeans = AgglomerativeClustering(
|
||||||
|
n_clusters=n_clusters, linkage='complete').fit(X)
|
||||||
|
|
||||||
|
Y = kmeans.labels_ # array of cluster # assigned to each method
|
||||||
|
|
||||||
|
# combine cluster labels with method name
|
||||||
|
assigned = pd.DataFrame(Y, columns=['cluster']).set_axis(
|
||||||
|
df.iloc[:, 0].values)
|
||||||
|
|
||||||
|
if save_to_disk:
|
||||||
|
assigned.to_csv(OUT_DIR + '/' + clazz_name + '_hierarchical.csv')
|
||||||
|
|
||||||
|
return (X, Y,)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Compute agglomerative clustering')
|
||||||
|
parser.add_argument('class_name', type=str, help='name of the god class')
|
||||||
|
parser.add_argument('n_clusters', type=int, help='number of clusters')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
path = IN_DIR + '/' + args.class_name + '.csv'
|
||||||
|
|
||||||
|
os.remove(OUT_DIR + '/' + args.class_name + '_hierarchical.csv')
|
||||||
|
cluster_hierarchical(path, args.n_clusters)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
52
k_means.py
Executable file
52
k_means.py
Executable file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
OUT_DIR: str = DIR + '/clustering'
|
||||||
|
IN_DIR: str = DIR + '/feature_vectors'
|
||||||
|
|
||||||
|
RAND_SEED: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def cluster_kmeans(path: str, n_clusters: int, save_to_disk: bool = True) -> tuple[any, any]:
|
||||||
|
clazz_name = os.path.basename(path)
|
||||||
|
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
||||||
|
|
||||||
|
df = pd.read_csv(path, index_col=0)
|
||||||
|
X = df.to_numpy()
|
||||||
|
|
||||||
|
kmeans = KMeans(n_clusters=n_clusters,
|
||||||
|
random_state=RAND_SEED, n_init='auto').fit(X)
|
||||||
|
|
||||||
|
Y = kmeans.labels_ # array of cluster # assigned to each method
|
||||||
|
|
||||||
|
# combine cluster labels with method name
|
||||||
|
assigned = pd.DataFrame(Y, columns=['cluster']).set_axis(
|
||||||
|
df.index.values)
|
||||||
|
|
||||||
|
if save_to_disk:
|
||||||
|
assigned.to_csv(OUT_DIR + '/' + clazz_name + '_kmeans.csv')
|
||||||
|
|
||||||
|
return (X, Y,)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Compute k-means clustering')
|
||||||
|
parser.add_argument('class_name', type=str, help='name of the god class')
|
||||||
|
parser.add_argument('n_clusters', type=int, help='number of clusters')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
path = IN_DIR + '/' + args.class_name + '.csv'
|
||||||
|
|
||||||
|
os.remove(OUT_DIR + '/' + args.class_name + '_kmeans.csv')
|
||||||
|
cluster_kmeans(path, args.n_clusters)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
14
keyword_list.txt
Normal file
14
keyword_list.txt
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
create
|
||||||
|
object
|
||||||
|
cache
|
||||||
|
uri
|
||||||
|
standalone
|
||||||
|
encoding
|
||||||
|
identifier
|
||||||
|
user
|
||||||
|
error
|
||||||
|
content
|
||||||
|
parameter
|
||||||
|
subset
|
||||||
|
global
|
||||||
|
component
|
49
prec_recall.py
Executable file
49
prec_recall.py
Executable file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
IN_DIR: str = DIR + '/clustering'
|
||||||
|
OUT_DIR: str = DIR + ''
|
||||||
|
|
||||||
|
|
||||||
|
def intrapairs(path: str) -> set[set[str, str]]:
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
clusters: list[list[str]] = df.groupby(
|
||||||
|
'cluster').agg(list).iloc[:, 0].values
|
||||||
|
|
||||||
|
intrapairs: set[set[str]] = set() # inner sets always contain 2 elements
|
||||||
|
for cluster in clusters:
|
||||||
|
for i, e1 in enumerate(cluster):
|
||||||
|
for j in range(i + 1, len(cluster)):
|
||||||
|
e2 = cluster[j]
|
||||||
|
intrapairs.add(frozenset((e1, e2,)))
|
||||||
|
return intrapairs
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filelist = glob.glob(IN_DIR + '/*_groundtruth.csv')
|
||||||
|
for f in filelist:
|
||||||
|
clazz_name = os.path.basename(f)
|
||||||
|
clazz_name = clazz_name[:clazz_name.rfind('_groundtruth.csv')]
|
||||||
|
print(clazz_name)
|
||||||
|
|
||||||
|
ground_pairs = intrapairs(f)
|
||||||
|
for method in ['kmeans', 'hierarchical']:
|
||||||
|
cluster_pairs = intrapairs(
|
||||||
|
IN_DIR + '/' + clazz_name + '_' + method + '.csv')
|
||||||
|
|
||||||
|
n_common = len(ground_pairs.intersection(cluster_pairs))
|
||||||
|
precision = n_common / len(cluster_pairs)
|
||||||
|
recall = n_common / len(ground_pairs)
|
||||||
|
|
||||||
|
print(method + " precision: " + str(precision))
|
||||||
|
print(method + " recall: " + str(recall))
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
28
readme.md
28
readme.md
|
@ -21,7 +21,7 @@ source env/bin/activate
|
||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run *find god classes*
|
## Running part 1: find god classes
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
./find_god_classes.py
|
./find_god_classes.py
|
||||||
|
@ -29,6 +29,32 @@ pip3 install -r requirements.txt
|
||||||
|
|
||||||
The resulting CSV file containing a list of God classes is generated in the `god_classes/god_classes.csv` path.
|
The resulting CSV file containing a list of God classes is generated in the `god_classes/god_classes.csv` path.
|
||||||
|
|
||||||
|
## Running part 3: clustering and silhouette metric
|
||||||
|
|
||||||
|
To compute optimal k-means and agglomerative clusterings using silhouette
|
||||||
|
validation for all classes run:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./silhouette.py --validate --autorun
|
||||||
|
```
|
||||||
|
|
||||||
|
To compute k-means or agglomerative clustering for a specific number of
|
||||||
|
clusters `K` and a specific class `KLASS` run respectively:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./k_means.py KLASS K
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./hierarchical.py KLASS K
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, to check their silhouette metric run:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./silhouette.py
|
||||||
|
```
|
||||||
|
|
||||||
## Compile report
|
## Compile report
|
||||||
|
|
||||||
- Install [Pandoc](https://pandoc.org/);
|
- Install [Pandoc](https://pandoc.org/);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
javalang==0.13.0
|
javalang==0.13.0
|
||||||
|
numpy==1.23.5
|
||||||
pandas==1.5.2
|
pandas==1.5.2
|
||||||
scikit_learn==1.2.1
|
scikit_learn==1.2.2
|
||||||
shrek==0.0.2
|
|
||||||
|
|
95
silhouette.py
Executable file
95
silhouette.py
Executable file
|
@ -0,0 +1,95 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.metrics import silhouette_score
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import argparse
|
||||||
|
from k_means import cluster_kmeans
|
||||||
|
from hierarchical import cluster_hierarchical
|
||||||
|
|
||||||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
OUT_DIR: str = DIR + '/clustering'
|
||||||
|
IN_DIR: str = DIR + '/feature_vectors'
|
||||||
|
|
||||||
|
K_MAX: int = 65
|
||||||
|
|
||||||
|
|
||||||
|
def clean_output():
|
||||||
|
filelist = glob.glob(OUT_DIR + '/*_silhouette.csv')
|
||||||
|
for f in filelist:
|
||||||
|
os.remove(f)
|
||||||
|
|
||||||
|
|
||||||
|
def validate(path: str, clazz_name: str, autorun: bool):
|
||||||
|
df = pd.DataFrame(columns=['k_means', 'hierarchical'], dtype=float)
|
||||||
|
|
||||||
|
# We bound the number of clusters by the number of distinct points in our dataset.
|
||||||
|
# To count them, we compute the number of "distinct" feature vectors and we
|
||||||
|
# bound to the minimum of K_MAX and this number.
|
||||||
|
nodup = pd.read_csv(path, index_col=0).drop_duplicates()
|
||||||
|
max_distinct = len(nodup)
|
||||||
|
|
||||||
|
for n in range(2, min(K_MAX, max_distinct)):
|
||||||
|
X_h, Y_h = cluster_hierarchical(path, n, save_to_disk=False)
|
||||||
|
df.loc[n, 'k_means'] = silhouette_score(X_h, Y_h)
|
||||||
|
|
||||||
|
X_k, Y_k = cluster_kmeans(path, n, save_to_disk=False)
|
||||||
|
df.loc[n, 'hierarchical'] = silhouette_score(X_k, Y_k)
|
||||||
|
|
||||||
|
k_kmeans = df[['k_means']].idxmax()[0]
|
||||||
|
k_hierarchical = df[['hierarchical']].idxmax()[0]
|
||||||
|
|
||||||
|
print("K_means optimal value: " + str(k_kmeans))
|
||||||
|
print("Hierarchical optimal value: " + str(k_hierarchical))
|
||||||
|
|
||||||
|
df.to_csv(OUT_DIR + '/' + clazz_name + '_silhouette.csv')
|
||||||
|
|
||||||
|
if autorun:
|
||||||
|
cluster_hierarchical(path, k_hierarchical)
|
||||||
|
cluster_kmeans(path, k_kmeans)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def compute_silhouette(path: str, clazz_name: str, suffix: str):
|
||||||
|
df_y = pd.read_csv(OUT_DIR + '/' + clazz_name + '_' + suffix + '.csv')
|
||||||
|
Y = df_y.iloc[:, 1].values
|
||||||
|
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
X = df.drop(df.columns[0], axis=1).to_numpy()
|
||||||
|
|
||||||
|
print("Silhouette for " + suffix + ": " + str(silhouette_score(X, Y)))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Compute silhouette metric.')
|
||||||
|
parser.add_argument('--validate', action='store_true',
|
||||||
|
help='compute optimal k for each algorithm')
|
||||||
|
parser.add_argument('--autorun', action='store_true',
|
||||||
|
help='if validating, computes CSV for optimal clustering automatically')
|
||||||
|
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.validate:
|
||||||
|
clean_output()
|
||||||
|
|
||||||
|
filelist = glob.glob(IN_DIR + '/*.csv')
|
||||||
|
for f in filelist:
|
||||||
|
clazz_name = os.path.basename(f)
|
||||||
|
clazz_name = clazz_name[:clazz_name.rfind('.')]
|
||||||
|
|
||||||
|
print(clazz_name)
|
||||||
|
|
||||||
|
if args.validate:
|
||||||
|
validate(f, clazz_name, args.autorun)
|
||||||
|
|
||||||
|
compute_silhouette(f, clazz_name, 'kmeans')
|
||||||
|
compute_silhouette(f, clazz_name, 'hierarchical')
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Reference in a new issue