PotatoPlan/Game1/Sources/ML/MLModel.cs

169 lines
6.9 KiB
C#
Raw Normal View History

2020-05-06 16:27:14 +02:00
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
2020-05-10 01:38:08 +02:00
class MLModel
2020-05-06 16:27:14 +02:00
{
2020-05-10 01:38:08 +02:00
private static MLContext mlContext = new MLContext(seed: 1);
2020-05-11 01:30:18 +02:00
private static string directory = Directory.GetCurrentDirectory();
private static string path = directory;
private static string modelpath = directory;
private static string report = directory;
private static string pathBig = directory;
private static string modelpathBig = directory;
private static string reportBig = directory;
2020-05-10 01:38:08 +02:00
// Loading data, creatin and saving ML model for smaller dataset (100)
public static void CreateModel()
2020-05-06 16:27:14 +02:00
{
2020-05-10 01:38:08 +02:00
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
path: path,
hasHeader: true,
separatorChar: ',',
allowQuoting: true,
allowSparse: false);
ModelInput sample = mlContext.Data.CreateEnumerable<ModelInput>(trainingDataView, false).ElementAt(0);
ITransformer MLModel = BuildAndTrain(mlContext, trainingDataView, sample, report);
SaveModel(mlContext, MLModel, modelpath, trainingDataView.Schema);
}
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
// ... for bigger dataset (1600)
public static void CreateBigModel()
{
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<BigModelInput>(
path: pathBig,
hasHeader: true,
separatorChar: ',',
allowQuoting: true,
allowSparse: false);
BigModelInput sample = mlContext.Data.CreateEnumerable<BigModelInput>(trainingDataView, false).ElementAt(0);
ITransformer MLModel = BuildAndTrain(mlContext, trainingDataView, sample, reportBig);
SaveModel(mlContext, MLModel, modelpathBig, trainingDataView.Schema);
}
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
// Building and training ML model, very small dataset (100 entries)
public static ITransformer BuildAndTrain(MLContext mLContext, IDataView trainingDataView, ModelInput sample, string reportPath)
{
var options = new LightGbmMulticlassTrainer.Options
2020-05-06 16:27:14 +02:00
{
2020-05-10 01:38:08 +02:00
MaximumBinCountPerFeature = 8,
LearningRate = 0.00025,
NumberOfIterations = 40000,
NumberOfLeaves = 10,
LabelColumnName = "Fertilizer_NameF",
FeatureColumnName = "Features",
Booster = new DartBooster.Options()
{
MaximumTreeDepth = 10
}
};
var pipeline = mlContext.Transforms
.Text.FeaturizeText("Soil_TypeF", "Soil_Type")
.Append(mlContext.Transforms.Text.FeaturizeText("Crop_TypeF", "Crop_Type"))
.Append(mlContext.Transforms.Concatenate("Features", "Temperature", "Humidity", "Moisture", "Soil_TypeF", "Crop_TypeF", "Nitrogen", "Potassium", "Phosphorous"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("Fertilizer_NameF", "Fertilizer_Name"), TransformerScope.TrainTest)
.AppendCacheCheckpoint(mLContext)
.Append(mLContext.MulticlassClassification.Trainers.LightGbm(options))
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));
Evaluate(mlContext, trainingDataView, pipeline, 10, reportPath, "Fertilizer_NameF");
ITransformer MLModel = pipeline.Fit(trainingDataView);
return MLModel;
}
//Building and training ML model, moderate size dataset (1600 entries)
public static ITransformer BuildAndTrain(MLContext mLContext, IDataView trainingDataView, BigModelInput sample, string reportPath)
{
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
var options = new LightGbmMulticlassTrainer.Options
2020-05-06 16:27:14 +02:00
{
2020-05-10 01:38:08 +02:00
MaximumBinCountPerFeature = 10,
LearningRate = 0.001,
NumberOfIterations = 10000,
NumberOfLeaves = 12,
LabelColumnName = "ClassF",
FeatureColumnName = "Features",
Booster = new DartBooster.Options()
{
MaximumTreeDepth = 12
}
};
var pipeline = mlContext.Transforms
.Concatenate("Features", "Ca", "Mg", "K", "S", "N", "Lime", "C", "P", "Moisture")
.Append(mLContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("ClassF", "Class"), TransformerScope.TrainTest)
.AppendCacheCheckpoint(mLContext)
.Append(mLContext.MulticlassClassification.Trainers.LightGbm(options))
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));
Evaluate(mlContext, trainingDataView, pipeline, 8, reportPath, "ClassF");
ITransformer MLModel = pipeline.Fit(trainingDataView);
return MLModel;
}
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
{
ITransformer model = trainingPipeline.Fit(trainingDataView);
return model;
}
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
// Evaluate and save results to a text file
public static void Evaluate(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline, int folds, string reportPath, string labelColumnName)
{
var crossVal = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: folds, labelColumnName: labelColumnName);
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
var metricsInMultipleFolds = crossVal.Select(r => r.Metrics);
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
var MicroAccuracyValues = metricsInMultipleFolds.Select(m => m.MicroAccuracy);
var LogLossValues = metricsInMultipleFolds.Select(m => m.LogLoss);
var LogLossReductionValues = metricsInMultipleFolds.Select(m => m.LogLossReduction);
string MicroAccuracyAverage = MicroAccuracyValues.Average().ToString("0.######");
string LogLossAvg = LogLossValues.Average().ToString("0.######");
string LogLossReductionAvg = LogLossReductionValues.Average().ToString("0.######");
2020-05-06 17:48:11 +02:00
2020-05-10 01:38:08 +02:00
var report = File.CreateText(reportPath);
report.Write("Micro Accuracy: " + MicroAccuracyAverage +'\n'+ "LogLoss Average: " + LogLossAvg +'\n'+ "LogLoss Reduction: " + LogLossReductionAvg, 0, 0);
report.Flush();
report.Close();
}
2020-05-06 16:27:14 +02:00
2020-05-10 01:38:08 +02:00
public static void SaveModel(MLContext mlContext, ITransformer Model, string modelPath, DataViewSchema modelInputSchema)
{
mlContext.Model.Save(Model, modelInputSchema, modelPath);
}
public static ITransformer LoadModel(bool isBig)
{
if (isBig)
return mlContext.Model.Load(modelpathBig, out DataViewSchema inputSchema);
else
return mlContext.Model.Load(modelpath, out DataViewSchema inputSchema);
}
2020-05-06 17:48:11 +02:00
2020-05-10 01:38:08 +02:00
public static Microsoft.ML.PredictionEngine<ModelInput, ModelOutput> CreateEngine()
{
ITransformer mlModel = LoadModel(false);
return mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(mlModel);
2020-05-06 16:27:14 +02:00
}
2020-05-10 01:38:08 +02:00
2020-05-06 16:27:14 +02:00
}
2020-05-10 01:38:08 +02:00