improve documentation
This commit is contained in:
parent
590b3726cc
commit
1073407760
24
README.md
24
README.md
@ -52,7 +52,29 @@ Let us download a Gonito.net challenge:
|
|||||||
|
|
||||||
The task is to predict the sentiment of a Polish short text -- whether
|
The task is to predict the sentiment of a Polish short text -- whether
|
||||||
it is positive or negative (or to be precise: to guess whether a
|
it is positive or negative (or to be precise: to guess whether a
|
||||||
positive or negative emoticon was used).
|
positive or negative emoticon was used). The train set is given
|
||||||
|
in the `train/train.tsv.xz` file, each item is given in a separate file,
|
||||||
|
have a look at the first 5 items:
|
||||||
|
|
||||||
|
xzcat train/train.tsv.xz | head -n 5
|
||||||
|
|
||||||
|
Now let's try to evaluate some solution to this challenge. Let's fetch it:
|
||||||
|
|
||||||
|
git fetch git://gonito.net/sentiment-by-emoticons submission-01865
|
||||||
|
git reset --hard FECH_HEAD
|
||||||
|
|
||||||
|
and now run geval:
|
||||||
|
|
||||||
|
geval -t dev-0
|
||||||
|
|
||||||
|
(You need to run `dev-0` test as the expected results for the `test-A`
|
||||||
|
test is hidden from you.) The evaluation result is 0.47481. This might
|
||||||
|
be hard to interpret, so you could try other metrics.
|
||||||
|
|
||||||
|
geval -t dev-0 --metric Accuracy --metric Likelihood
|
||||||
|
|
||||||
|
So now you can see that the accuracy is over 78% and the likelihood
|
||||||
|
(i.e. geometric mean of probabilities of the correct classes) is 0.62.
|
||||||
|
|
||||||
## Preparing a Gonito challenge
|
## Preparing a Gonito challenge
|
||||||
|
|
||||||
|
@ -85,8 +85,10 @@ type MetricValue = Double
|
|||||||
defaultLogLossHashedSize :: Word32
|
defaultLogLossHashedSize :: Word32
|
||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
-- | evaluation metric
|
||||||
| MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
||||||
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
|
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -141,6 +143,7 @@ instance Read Metric where
|
|||||||
|
|
||||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||||
|
|
||||||
|
-- | Returns what is preferred for a given metric: high values or low values.
|
||||||
getMetricOrdering :: Metric -> MetricOrdering
|
getMetricOrdering :: Metric -> MetricOrdering
|
||||||
getMetricOrdering RMSE = TheLowerTheBetter
|
getMetricOrdering RMSE = TheLowerTheBetter
|
||||||
getMetricOrdering MSE = TheLowerTheBetter
|
getMetricOrdering MSE = TheLowerTheBetter
|
||||||
@ -174,6 +177,7 @@ defaultMetric = RMSE
|
|||||||
configFileName :: FilePath
|
configFileName :: FilePath
|
||||||
configFileName = "config.txt"
|
configFileName = "config.txt"
|
||||||
|
|
||||||
|
-- | Specification of an evaluation task to be done.
|
||||||
data GEvalSpecification = GEvalSpecification
|
data GEvalSpecification = GEvalSpecification
|
||||||
{ gesOutDirectory :: FilePath,
|
{ gesOutDirectory :: FilePath,
|
||||||
gesExpectedDirectory :: Maybe FilePath,
|
gesExpectedDirectory :: Maybe FilePath,
|
||||||
@ -327,7 +331,15 @@ singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO)
|
|||||||
singleLineAsLineSource (LineInFile sourceSpec lineNo line) =
|
singleLineAsLineSource (LineInFile sourceSpec lineNo line) =
|
||||||
LineSource (CL.sourceList [line]) sourceSpec lineNo
|
LineSource (CL.sourceList [line]) sourceSpec lineNo
|
||||||
|
|
||||||
gevalCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> IO (MetricValue)
|
-- | Runs evaluation for a given metric using the sources specified
|
||||||
|
-- for input, expected output and output. Returns the metric value.
|
||||||
|
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||||
|
-- inconsistent number of lines in the sources).
|
||||||
|
gevalCore :: Metric -- ^ evaluation metric
|
||||||
|
-> SourceSpec -- ^ source specification for the input values
|
||||||
|
-> SourceSpec -- ^ source specification for the expected output
|
||||||
|
-> SourceSpec -- ^ source specification for the output
|
||||||
|
-> IO (MetricValue) -- ^ metric value for the output against the expected output
|
||||||
gevalCore metric inputSource expectedSource outSource = do
|
gevalCore metric inputSource expectedSource outSource = do
|
||||||
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
||||||
gevalCoreOnSources metric
|
gevalCoreOnSources metric
|
||||||
@ -341,11 +353,25 @@ isEmptyFileSource _ = return False
|
|||||||
|
|
||||||
logLossToLikehood logLoss = exp (-logLoss)
|
logLossToLikehood logLoss = exp (-logLoss)
|
||||||
|
|
||||||
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
|
-- | Runs evaluation for a given metric using the sources given
|
||||||
-> LineSource (ResourceT m)
|
-- for input, expected output and output. Returns the metric value.
|
||||||
-> LineSource (ResourceT m)
|
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||||
-> LineSource (ResourceT m)
|
-- inconsistent number of lines in the sources).
|
||||||
-> m (MetricValue)
|
--
|
||||||
|
-- The difference between this and @gevalCore@ is that it operates on Conduit
|
||||||
|
-- sources (rather than source specification).
|
||||||
|
--
|
||||||
|
-- This could be specialised for particular metrics, if they could be
|
||||||
|
-- calculated from other metrics in a trivial fashion (e.g. @RMSE@
|
||||||
|
-- which is just a square root of @MSE@). Otherwise a metric should be
|
||||||
|
-- defined in @gevalCore'@ and @gevalCoreWithoutInput@ helper
|
||||||
|
-- functions.
|
||||||
|
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
||||||
|
Metric -- ^ evaluation metric
|
||||||
|
-> LineSource (ResourceT m) -- ^ source of the input values
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the expected output
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the output
|
||||||
|
-> m (MetricValue) -- ^ metric values for the output against the expected output
|
||||||
gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
|
gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
|
||||||
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
||||||
return $ mse ** 0.5
|
return $ mse ** 0.5
|
||||||
@ -363,7 +389,20 @@ gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
|||||||
|
|
||||||
data LineInFile = LineInFile SourceSpec Word32 Text
|
data LineInFile = LineInFile SourceSpec Word32 Text
|
||||||
|
|
||||||
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
-- | Runs evaluation for a given metric using the sources given
|
||||||
|
-- for input, expected output and output. Returns the metric value.
|
||||||
|
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||||
|
-- inconsistent number of lines in the sources).
|
||||||
|
--
|
||||||
|
-- Metrics are starting to be really defined here, though when the
|
||||||
|
-- input is not needed for doing the evaluation (which is not in most
|
||||||
|
-- cases), the work is delegated to @gevalCoreWithoutInput@ function.
|
||||||
|
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
||||||
|
Metric -- ^ evaluation metric
|
||||||
|
-> LineSource (ResourceT m) -- ^ source of the input values
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the expected output
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the output
|
||||||
|
-> m (MetricValue) -- ^ metric values for the output against the expected output
|
||||||
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
|
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
|
||||||
where outParser = getValue . TR.double
|
where outParser = getValue . TR.double
|
||||||
|
|
||||||
@ -479,7 +518,19 @@ data SourceItem a = Got a | Wrong String | Done
|
|||||||
skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
|
skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
|
||||||
skipLineNumber fun = fun . snd
|
skipLineNumber fun = fun . snd
|
||||||
|
|
||||||
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
-- | A helper function to run evaluation when the input is not needed to calculate the metric value.
|
||||||
|
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||||
|
(Text -> Either String a) -> -- ^ parser for values in the expected output
|
||||||
|
(Text -> Either String b) -> -- ^ parser for values in the output
|
||||||
|
((a, b) -> c) -> -- ^ function which combines parsed values into a single value
|
||||||
|
-- (will be launched for each item, e.g. an error/cost function
|
||||||
|
-- could be calculated here)
|
||||||
|
(Sink c (ResourceT m) d) -> -- ^ a Conduit which aggregates all the combined values into
|
||||||
|
-- a "total" value
|
||||||
|
(d -> Double) -> -- ^ function to transform the "total" value into the final score
|
||||||
|
LineSource (ResourceT m) -> -- ^ source to read the expected output
|
||||||
|
LineSource (ResourceT m) -> -- ^ source to read the output
|
||||||
|
m (MetricValue) -- ^ metric values for the output against the expected output
|
||||||
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
|
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
|
||||||
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
|
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
|
||||||
where
|
where
|
||||||
@ -493,7 +544,18 @@ gevalCore''' parserSpec itemStep aggregator finalStep context =
|
|||||||
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
|
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
|
||||||
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
|
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
|
||||||
|
|
||||||
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
|
-- | General function to run the evaluation, no matter which particular metric
|
||||||
|
-- was used. It could be seen as the "engine" to run the evaluation.
|
||||||
|
-- If you are defining a new metric, you usually don't have to change anything
|
||||||
|
-- here.
|
||||||
|
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||||
|
ParserSpec ctxt -> -- ^ parsers to parse data
|
||||||
|
(ParsedRecord ctxt -> c) -> -- ^ function to go from the parsed value into
|
||||||
|
-- some "local" score calculated for each line (item)
|
||||||
|
(Sink c (ResourceT m) d) -> -- ^ a Conduit to aggregate score into a "total"
|
||||||
|
(d -> Double) -> -- ^ function to transform the "total" value into the final score
|
||||||
|
ctxt -> -- ^ "context", i.e. 2 or 3 sources needed to operate
|
||||||
|
m (MetricValue)
|
||||||
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
|
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
|
||||||
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
|
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
|
||||||
|
|
||||||
@ -502,9 +564,14 @@ gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
|
|||||||
v <- runResourceT $
|
v <- runResourceT $
|
||||||
(((getZipSource $ (,)
|
(((getZipSource $ (,)
|
||||||
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
|
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
|
||||||
<*> (ZipSource $ recordSource context parserSpec)) =$= CL.map (checkStep (Proxy :: Proxy m) itemStep)) $$ CL.catMaybes =$ aggregator)
|
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) $$ CL.catMaybes =$ aggregator)
|
||||||
return $ finalStep v
|
return $ finalStep v
|
||||||
|
|
||||||
|
-- | A type family to handle all the evaluation "context".
|
||||||
|
--
|
||||||
|
-- This is needed as for some metrics the output and the expected metric is enough
|
||||||
|
-- (see the @WithoutInput@ instance), but for some the input is also needed to do
|
||||||
|
-- the evaluation (see the @WithInput@ instance).
|
||||||
class EvaluationContext ctxt m where
|
class EvaluationContext ctxt m where
|
||||||
data ParserSpec ctxt :: *
|
data ParserSpec ctxt :: *
|
||||||
data WrappedParsedRecord ctxt :: *
|
data WrappedParsedRecord ctxt :: *
|
||||||
|
Loading…
Reference in New Issue
Block a user