report part 1 done

This commit is contained in:
Claudio Maggioni 2023-04-18 13:49:07 +02:00
parent 703b77a39d
commit 82a505b60a
4 changed files with 29 additions and 9 deletions

View file

@ -85,12 +85,16 @@ def parse(path: str):
# consider only the class matching the input file name, to skip inner classes
if path.endswith(node.name + '.java'):
fqdn = package_name + '.' + node.name
print(fqdn)
fields = get_fields(node)
methods = get_methods(node)
cols = sorted(fields.union(methods))
print("# Attributes", len(cols))
print("# Feature vectors", len(node.methods))
df = pd.DataFrame(columns=cols, dtype=int)
for m in node.methods:
# make sure method is included in csv file
@ -100,9 +104,6 @@ def parse(path: str):
m_methods = get_methods_accessed_by_method(m, methods)
for member in m_fields.union(m_methods):
if member not in fields and member not in methods:
print(member)
df.loc[m.name, member] = 1
df = df.fillna(0)

View file

@ -1,4 +1,4 @@
,abort,actualEncoding,addEventListener,adoptNode,allowGrammarAccess,callUserDataHandlers,canRenameElements,changed,changes,checkDOMNSErr,checkNamespaceWF,checkQName,clearIdentifiers,clone,cloneNode,copyEventListeners,createAttribute,createAttributeNS,createCDATASection,createComment,createDocumentFragment,createDocumentType,createElement,createElementDefinition,createElementNS,createEntity,createEntityReference,createNotation,createProcessingInstruction,createTextNode,deletedText,dispatchEvent,docElement,docType,documentNumber,domNormalizer,encoding,errorChecking,fConfiguration,fDocumentURI,fFreeNLCache,fXPathEvaluator,freeNodeListCache,getAsync,getBaseURI,getDoctype,getDocumentElement,getDocumentURI,getDomConfig,getElementById,getElementsByTagName,getElementsByTagNameNS,getEncoding,getErrorChecking,getFeature,getIdentifier,getIdentifiers,getImplementation,getInputEncoding,getMutationEvents,getNodeListCache,getNodeName,getNodeNumber,getNodeType,getOwnerDocument,getStandalone,getStrictErrorChecking,getTextContent,getUserData,getUserDataRecord,getVersion,getXmlEncoding,getXmlStandalone,getXmlVersion,identifiers,importNode,insertBefore,insertedNode,insertedText,insertingNode,isKidOK,isNormalizeDocRequired,isValidQName,isXML11Version,isXMLName,isXMLVersionChanged,kidOK,load,loadXML,modifiedAttrValue,modifiedCharacterData,modifyingCharacterData,nodeCounter,nodeTable,normalizeDocument,putIdentifier,readObject,removeChild,removeEventListener,removeIdentifier,removeUserDataTable,removedAttrNode,removedNode,removingNode,renameNode,renamedAttrNode,renamedElement,replaceChild,replaceRenameElement,replacedCharacterData,replacedNode,replacedText,replacingData,replacingNode,saveXML,serialVersionUID,setAsync,setAttrNode,setDocumentURI,setEncoding,setErrorChecking,setInputEncoding,setMutationEvents,setStandalone,setStrictErrorChecking,setTextContent,setUserData,setUserDataTable,setVersion,setXmlEncoding,setXmlStandalone,setXmlVersion,standalone,undeferChildren,userData,version,writeObject,xml11Version,xmlVersionChanged,Node,UserDataHandler,kid,DOMMessageFormatter,DOMException,NamespaceContext
,abort,actualEncoding,addEventListener,adoptNode,allowGrammarAccess,callUserDataHandlers,canRenameElements,changed,changes,checkDOMNSErr,checkNamespaceWF,checkQName,clearIdentifiers,clone,cloneNode,copyEventListeners,createAttribute,createAttributeNS,createCDATASection,createComment,createDocumentFragment,createDocumentType,createElement,createElementDefinition,createElementNS,createEntity,createEntityReference,createNotation,createProcessingInstruction,createTextNode,deletedText,dispatchEvent,docElement,docType,documentNumber,domNormalizer,encoding,errorChecking,fConfiguration,fDocumentURI,fFreeNLCache,fXPathEvaluator,freeNodeListCache,getAsync,getBaseURI,getDoctype,getDocumentElement,getDocumentURI,getDomConfig,getElementById,getElementsByTagName,getElementsByTagNameNS,getEncoding,getErrorChecking,getFeature,getIdentifier,getIdentifiers,getImplementation,getInputEncoding,getMutationEvents,getNodeListCache,getNodeName,getNodeNumber,getNodeType,getOwnerDocument,getStandalone,getStrictErrorChecking,getTextContent,getUserData,getUserDataRecord,getVersion,getXmlEncoding,getXmlStandalone,getXmlVersion,identifiers,importNode,insertBefore,insertedNode,insertedText,insertingNode,isKidOK,isNormalizeDocRequired,isValidQName,isXML11Version,isXMLName,isXMLVersionChanged,kidOK,load,loadXML,modifiedAttrValue,modifiedCharacterData,modifyingCharacterData,nodeCounter,nodeTable,normalizeDocument,putIdentifier,readObject,removeChild,removeEventListener,removeIdentifier,removeUserDataTable,removedAttrNode,removedNode,removingNode,renameNode,renamedAttrNode,renamedElement,replaceChild,replaceRenameElement,replacedCharacterData,replacedNode,replacedText,replacingData,replacingNode,saveXML,serialVersionUID,setAsync,setAttrNode,setDocumentURI,setEncoding,setErrorChecking,setInputEncoding,setMutationEvents,setStandalone,setStrictErrorChecking,setTextContent,setUserData,setUserDataTable,setVersion,setXmlEncoding,setXmlStandalone,setXmlVersion,standalone,undeferChildren,userData,version,writeObject,xml11Version,xmlVersionChanged,Node,UserDataHandler,kid,DOMException,DOMMessageFormatter,NamespaceContext
getOwnerDocument,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
getNodeType,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
getNodeName,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

1 abort actualEncoding addEventListener adoptNode allowGrammarAccess callUserDataHandlers canRenameElements changed changes checkDOMNSErr checkNamespaceWF checkQName clearIdentifiers clone cloneNode copyEventListeners createAttribute createAttributeNS createCDATASection createComment createDocumentFragment createDocumentType createElement createElementDefinition createElementNS createEntity createEntityReference createNotation createProcessingInstruction createTextNode deletedText dispatchEvent docElement docType documentNumber domNormalizer encoding errorChecking fConfiguration fDocumentURI fFreeNLCache fXPathEvaluator freeNodeListCache getAsync getBaseURI getDoctype getDocumentElement getDocumentURI getDomConfig getElementById getElementsByTagName getElementsByTagNameNS getEncoding getErrorChecking getFeature getIdentifier getIdentifiers getImplementation getInputEncoding getMutationEvents getNodeListCache getNodeName getNodeNumber getNodeType getOwnerDocument getStandalone getStrictErrorChecking getTextContent getUserData getUserDataRecord getVersion getXmlEncoding getXmlStandalone getXmlVersion identifiers importNode insertBefore insertedNode insertedText insertingNode isKidOK isNormalizeDocRequired isValidQName isXML11Version isXMLName isXMLVersionChanged kidOK load loadXML modifiedAttrValue modifiedCharacterData modifyingCharacterData nodeCounter nodeTable normalizeDocument putIdentifier readObject removeChild removeEventListener removeIdentifier removeUserDataTable removedAttrNode removedNode removingNode renameNode renamedAttrNode renamedElement replaceChild replaceRenameElement replacedCharacterData replacedNode replacedText replacingData replacingNode saveXML serialVersionUID setAsync setAttrNode setDocumentURI setEncoding setErrorChecking setInputEncoding setMutationEvents setStandalone setStrictErrorChecking setTextContent setUserData setUserDataTable setVersion setXmlEncoding setXmlStandalone setXmlVersion standalone undeferChildren userData version writeObject xml11Version xmlVersionChanged Node UserDataHandler kid DOMException DOMMessageFormatter NamespaceContext
2 getOwnerDocument 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 getNodeType 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
4 getNodeName 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

View file

@ -1,6 +1,7 @@
---
author: Claudio Maggioni
title: Information Modelling & Analysis -- Project 1
geometry: margin=2.5cm,bottom=3cm
---
<!--
@ -31,6 +32,29 @@ Commit ID: **TBD**
## God Classes
The first part of the project requires to label some classes of the _Xerces_
project as "God classes" based on the number of methods each class has.
Specifically, I label "God classes" the classes that have a number of methods
six times the standard deviation above the the mean number of methods, i.e. where
the condition
$$|M(C)| > \mu(M) + 6\sigma(M)$$
holds.
To scan and compute the number of methods of each class I use the Python library `javalang`, which implements the Java AST and parser. The Python script
`./find_god_classes.py` uses this library to parse each file in the project and
compute the number of methods of each class. Note that only non-constructor methods are counted (specifically the code counts the number of `method` nodes in each `ClassDeclaration` node).
Then, the script computes mean and standard
deviation of the number of methods and filters the list of classes according to the
condition described above. The file `god_classes/god_classes.csv` then is outputted
listing all the god classes found.
The god classes I identified, and their corresponding number of methods
can be found in Table [1](#tab:god_classes){reference-type="ref"
reference="tab:god_classes"}.
::: {#tab:god_classes}
---------------------------------------------- ---------------
**Class Name** **\# Methods**
@ -43,11 +67,6 @@ Commit ID: **TBD**
: Identified God Classes
:::
The god classes I identified, and their corresponding number of methods
can be found in Table [1](#tab:god_classes){reference-type="ref"
reference="tab:god_classes"}.
Note: the number of methods was computed by considering only methods and not constructors.
## Feature Vectors

Binary file not shown.