Add inductive decision tree learning set of functions
To build decision tree use inductiveDecisionTreeLearning function.
This commit is contained in:
parent
43b654a6a5
commit
b0d8872c15
201
src/AI/DecisionTrees/InductiveDecisionTreeLearning.py
Normal file
201
src/AI/DecisionTrees/InductiveDecisionTreeLearning.py
Normal file
@ -0,0 +1,201 @@
|
||||
import math
|
||||
from typing import List
|
||||
|
||||
from src.AI.DecisionTrees.AttributeDefinition import AttributeDefinition
|
||||
from src.AI.DecisionTrees.DecisionTree import DecisionTree
|
||||
from src.AI.DecisionTrees.DecisionTreeBranch import DecisionTreeBranch
|
||||
from src.AI.DecisionTrees.DecisionTreeExample import DecisionTreeExample
|
||||
|
||||
|
||||
def inductiveDecisionTreeLearning(examples: List[DecisionTreeExample], attributes: List[AttributeDefinition], default,
|
||||
classifications):
|
||||
"""
|
||||
Builds decision tree based on given examples and attributes and possible classifications.
|
||||
|
||||
:param examples:
|
||||
:param attributes: List of all attributes definitions.
|
||||
:param default: Default classification.
|
||||
:param classifications: Possible example classifications.
|
||||
:return: Decision Tree.
|
||||
"""
|
||||
|
||||
# If there are no examples return default.
|
||||
if examples is None or len(examples) == 0:
|
||||
return DecisionTree(default)
|
||||
|
||||
# If all examples belong to the same classification then return node with that classification.
|
||||
elif checkIfAllExamplesHaveSameClassification(examples):
|
||||
return DecisionTree(examples[0].classification)
|
||||
|
||||
# If there are no attributes then return node with classification majority.
|
||||
elif attributes is None or len(attributes) == 0:
|
||||
return majorityValue(examples)
|
||||
|
||||
else:
|
||||
best = chooseAttribute(attributes, examples, classifications)
|
||||
tree = DecisionTree(best)
|
||||
|
||||
for value in best.values:
|
||||
examples_i = getElementsWithAttributeValue(examples, best, value)
|
||||
subtree = inductiveDecisionTreeLearning(examples_i, attributes, majorityValue(examples), classifications)
|
||||
tree.addBranch(DecisionTreeBranch(tree, value, subtree))
|
||||
|
||||
return tree
|
||||
|
||||
|
||||
def majorityValue(examples: List[DecisionTreeExample]):
|
||||
"""
|
||||
Returns classification which most of examples have.
|
||||
|
||||
:param examples:
|
||||
:return: Classification.
|
||||
"""
|
||||
classifications = []
|
||||
|
||||
# Making list of values
|
||||
for example in examples:
|
||||
if example.classification not in classifications:
|
||||
classifications.append(example.classification)
|
||||
|
||||
# Finding majority value's index
|
||||
majorityValueInd = 0
|
||||
majorityValueCount = 0
|
||||
ind = 0
|
||||
for classification in classifications:
|
||||
count = 0
|
||||
for example in examples:
|
||||
if example.classification == classification:
|
||||
count += 1
|
||||
if count > majorityValueCount:
|
||||
majorityValueCount = count
|
||||
majorityValueInd = ind
|
||||
|
||||
ind += 1
|
||||
|
||||
return classifications[majorityValueInd]
|
||||
|
||||
|
||||
def checkIfAllExamplesHaveSameClassification(examples: List[DecisionTreeExample]):
|
||||
return all([example.classification == examples[0].classification for example in examples])
|
||||
|
||||
|
||||
def probOfExBeingClass(classification, allExamplesNum, classExamplesNum):
|
||||
"""
|
||||
Calculates probability of example being classified as given classification.
|
||||
Needed to calculate information entropy.
|
||||
|
||||
:param classExamplesNum: Number of examples classified as given classification
|
||||
:param allExamplesNum: Number of all examples
|
||||
:param classification:
|
||||
"""
|
||||
if allExamplesNum == 0:
|
||||
return 0
|
||||
|
||||
return classExamplesNum / allExamplesNum
|
||||
|
||||
|
||||
def chooseAttribute(attributes: List[AttributeDefinition], examples: List[DecisionTreeExample], classifications):
|
||||
"""
|
||||
Chooses best attribute by calculating information gain for each attribute. Returns attribute with maximum gain.
|
||||
|
||||
:param classifications: All possible classifications.
|
||||
:param attributes: All attributes.
|
||||
:param examples:
|
||||
"""
|
||||
bestAttribute = None
|
||||
bestAttributeGain = -1
|
||||
|
||||
for attribute in attributes:
|
||||
attrInformationGain = calculateInformationGain(attribute, classifications, examples)
|
||||
if attrInformationGain > bestAttributeGain:
|
||||
bestAttribute = attribute
|
||||
bestAttributeGain = attrInformationGain
|
||||
|
||||
return bestAttribute
|
||||
|
||||
|
||||
def calculateInformationGain(attribute: AttributeDefinition, classifications, examples: List[DecisionTreeExample]):
|
||||
"""
|
||||
Calculates how much information we will gain after checking value of given attribute.
|
||||
Needed to choose best attribute.
|
||||
|
||||
:param attribute:
|
||||
:param classifications:
|
||||
:param examples:
|
||||
:return:
|
||||
"""
|
||||
return calculateEntropy(classifications, examples) - calculateRemainder(attribute, examples, classifications)
|
||||
|
||||
|
||||
def calculateRemainder(attribute: AttributeDefinition, examples: List[DecisionTreeExample], classifications):
|
||||
"""
|
||||
Calculates how much information will be needed to classify an example after checking value of given attribute.
|
||||
Needed when calculating information gain.
|
||||
|
||||
:param classifications:
|
||||
:param attribute:
|
||||
:param examples:
|
||||
"""
|
||||
|
||||
remainder = 0
|
||||
examplesNum = len(examples)
|
||||
|
||||
# Attribute divides examples to subsets
|
||||
examplesDividedByAttrValues = {}
|
||||
|
||||
for value in attribute.values:
|
||||
examplesWithValue = []
|
||||
for example in examples:
|
||||
if example.getAttributeWithDefinition(attribute).value == value:
|
||||
examplesWithValue.append(example)
|
||||
examplesDividedByAttrValues[value] = examplesWithValue
|
||||
|
||||
for value, examplesSubset in examplesDividedByAttrValues.items():
|
||||
remainder += (len(examplesSubset) / examplesNum) * calculateEntropy(classifications, examplesSubset)
|
||||
|
||||
return remainder
|
||||
|
||||
|
||||
def calculateEntropy(classifications, examples: List[DecisionTreeExample]):
|
||||
"""
|
||||
Calculates information entropy. Needed when calculating information gain.
|
||||
|
||||
:param classifications:
|
||||
:param examples:
|
||||
:return:
|
||||
"""
|
||||
examplesNum = len(examples)
|
||||
examplesNumByClassification = {}
|
||||
for classification in classifications:
|
||||
count = 0
|
||||
for example in examples:
|
||||
if example.classification == classification:
|
||||
count += 1
|
||||
examplesNumByClassification[classification] = count
|
||||
|
||||
entropy = 0
|
||||
for classification in classifications:
|
||||
p = probOfExBeingClass(classification, examplesNum, examplesNumByClassification[classification])
|
||||
if p == 0:
|
||||
pass
|
||||
else:
|
||||
entropy += (-1) * (p * math.log2(p))
|
||||
|
||||
return entropy
|
||||
|
||||
|
||||
def getElementsWithAttributeValue(examples: List[DecisionTreeExample], attributeDefinition: AttributeDefinition, value):
|
||||
"""
|
||||
Returns subset of examples with given attribute value.
|
||||
|
||||
:param examples:
|
||||
:param attributeDefinition:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
elements = []
|
||||
for example in examples:
|
||||
if example.getAttributeWithDefinition(attributeDefinition).value == value:
|
||||
elements.append(example)
|
||||
|
||||
return elements
|
Loading…
Reference in New Issue
Block a user