Merge branch 'master' into bootstrap

This commit is contained in:
Filip Gralinski 2020-01-18 18:09:19 +01:00
commit 608b1f9d73
35 changed files with 642 additions and 80 deletions

View File

@ -1,4 +1,39 @@
## 1.28.0.0
* Add `s` flag for substitution
## 1.27.0.0
* Results are formatted in cross-tables (if possible)
## 1.26.0.0
* Change the meaning of WER (WER is calculated for the whole set now
- similar to the way BLEU is calculated)
* Use `Mean/WER` if you want the old meaning (average of per-item results)
## 1.25.0.0
* Add --oracle-item-based
## 1.24.0.0
* Introduce metric priorities
* Use "Cartesian" strings in metrics
## 1.23.0.0
* New style of train data is preferred
- `in.tsv` and `expected.tsv` instead of `train.tsv`
- though this is not required as sometimes training data look different than test data
- `--validate` option was changed accordingly
## 1.22.1.0
* Add "Mean/" meta-metric (for the time being working only with MultiLabel-F-measure)
* Add :S flag
## 1.22.0.0
* Add SegmentAccuracy

View File

@ -16,6 +16,8 @@ import System.Exit
import Data.Conduit.SmartSource
import Data.SplitIntoCrossTabs
import System.FilePath
import Data.List (intercalate, sort)
@ -23,6 +25,7 @@ import Data.List (intercalate, sort)
import qualified Data.Text as T
import Data.Map.Strict as M
import qualified Data.Map.Lazy as LM
import Data.Set as S
main :: IO ()
@ -79,7 +82,27 @@ showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec o
showTheResult' opts [] = do
hPutStrLn stderr "no metric given, use --metric option"
exitFailure
showTheResult' opts vals = mapM_ putStrLn $ Prelude.map (formatTheMetricAndResult (gesPrecision $ geoSpec opts)) $ zip (gesMetrics $ geoSpec opts) vals
showTheResult' opts vals = mapM_ putStrLn
$ intercalate [""]
$ Prelude.map (formatCrossTable (gesPrecision $ geoSpec opts))
$ splitIntoTablesWithValues (T.pack "metric") (T.pack "value") mapping metricLabels
where mapping = LM.fromList $ zip metricLabels vals
metricLabels = Prelude.map T.pack $ Prelude.map evaluationSchemeName $ gesMetrics $ geoSpec opts
formatCrossTable :: Maybe Int -> TableWithValues MetricResult -> [String]
formatCrossTable mPrecision (TableWithValues [_, _] body) =
-- actually we won't print metric/value header
-- (1) to keep backward-compatible with the previous version
-- (2) to be concise
Prelude.map (formatCrossTableLine mPrecision) body
formatCrossTable mPrecision (TableWithValues header body) =
(intercalate "\t" $ Prelude.map T.unpack header) : Prelude.map (formatCrossTableLine mPrecision) body
formatCrossTableLine :: Maybe Int -> (T.Text, [MetricResult]) -> String
formatCrossTableLine mPrecision (rowName, values) =
intercalate "\t" ((T.unpack rowName):Prelude.map (formatTheResult mPrecision) values)
formatSourceSpec :: SourceSpec -> String
formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp

View File

@ -26,7 +26,7 @@ let
stack2nix-script = import "${static-haskell-nix}/static-stack2nix-builder/stack2nix-script.nix" {
inherit pkgs;
stack-project-dir = toString ./.; # where stack.yaml is
hackageSnapshot = "2019-05-08T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
hackageSnapshot = "2020-01-03T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
};
static-stack2nix-builder = import "${static-haskell-nix}/static-stack2nix-builder/default.nix" {

View File

@ -1,5 +1,5 @@
name: geval
version: 1.22.0.0
version: 1.28.0.0
synopsis: Machine learning evaluation tools
description: Please see README.md
homepage: http://github.com/name/project
@ -49,6 +49,8 @@ library
, GEval.Selector
, Data.Statistics.Loess
, Data.Statistics.Calibration
, Data.CartesianStrings
, Data.SplitIntoCrossTabs
, Paths_geval
build-depends: base >= 4.7 && < 5
, cond
@ -102,6 +104,7 @@ library
, temporary
, utf8-string
, singletons
, ordered-containers
default-language: Haskell2010
executable geval

View File

@ -0,0 +1,42 @@
{-# LANGUAGE OverloadedStrings #-}
module Data.CartesianStrings
(parseCartesianString,
concatCartesianStrings,
CartesianStrings(..))
where
import Data.List (findIndex)
import Data.List.Split (splitOn)
-- A helper library for parsing strings representing sets of strings
-- obtained via a Cartesian product, e.g.:
-- - "foo" represents just ["foo"]
-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"]
-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings
cartProd :: [a] -> [b] -> [(a, b)]
cartProd xs ys = [(x,y) | x <- xs, y <- ys]
parseCartesianString :: String -> [String]
parseCartesianString s =
case findIndex (=='{') s of
Just begIx ->
let pref = take begIx s
c = drop (begIx + 1) s
in case findIndex (=='}') c of
Just endIx ->
let inf = take endIx c
current = splitOn "," inf
rest = parseCartesianString $ drop (endIx + 1) c
in map (uncurry (++)) $ cartProd (map (pref++) current) rest
Nothing -> [s]
data CartesianStrings a = CartesianStrings [a]
deriving (Eq)
instance Read a => Read (CartesianStrings a) where
readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")]
concatCartesianStrings :: [CartesianStrings a] -> [a]
concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss)

View File

@ -0,0 +1,148 @@
{-# LANGUAGE OverloadedStrings #-}
module Data.SplitIntoCrossTabs
(splitIntoCrossTabs,
splitIntoTablesWithValues,
CrossTab(..),
TableWithValues(..),
TextFrag(..))
where
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Set.Ordered as OS
import qualified Data.Map.Ordered as OM
import qualified Data.Set as S
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Map.Lazy as LM
import Debug.Trace
import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
data TableWithValues a = TableWithValues [Text] [(Text, [a])]
data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
deriving (Show, Eq)
data TextFrag = Prefix Text | Suffix Text
deriving (Show, Eq, Ord)
splitIntoTablesWithValues :: Text
-> Text
-> LM.Map Text a -- ^ map from which values will be taken,
-- deliberately a lazy map so that
-- values could be shown one by one
-> [Text]
-> [TableWithValues a]
splitIntoTablesWithValues defaultMainHeader defaultSecondaryHeader mapping =
joinSingleItems . map (convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping) . splitIntoCrossTabs
where joinSingleItems (TableWithValues h@[_, _] arows : TableWithValues [_, _] brows : rest) =
joinSingleItems (TableWithValues h (arows ++ brows) : rest)
joinSingleItems (e : rest) = e : joinSingleItems rest
joinSingleItems [] = []
convertIntoTableWithValues :: Text -> Text -> LM.Map Text a -> CrossTab -> TableWithValues a
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (SingleItem t) =
TableWithValues [defaultMainHeader, defaultSecondaryHeader] [(t, [mapping LM.! t])]
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (CrossTab rowNames columnNames) =
TableWithValues (T.empty : (map toText columnNames)) (map processRow rowNames)
where processRow rowName = (toText rowName, map (\colName -> mapping LM.! (combineFrags rowName colName)) columnNames)
splitIntoCrossTabs :: [Text] -> [CrossTab]
splitIntoCrossTabs inputs =
map preferVertical
$ map snd
$ sortBy (\(r1,_) (r2, _) -> r1 `compare` r2)
$ map (getRank inputRanks)
$ unfoldr extractTheBestCrossTab inputs
where inputRanks = M.fromList $ zip inputs [1..]
preferVertical :: CrossTab -> CrossTab
preferVertical s@(SingleItem _) = s
preferVertical c@(CrossTab rowNames columnNames)
| length rowNames < length columnNames = CrossTab columnNames rowNames
| otherwise = c
getRank :: M.Map Text Int -> CrossTab -> (Int, CrossTab)
getRank ranks c = (bestRank, c)
where bestRank = minimum
$ map (ranks M.!)
$ S.toList
$ toSet c
extractTheBestCrossTab :: [Text] -> Maybe (CrossTab, [Text])
extractTheBestCrossTab [] = Nothing
extractTheBestCrossTab ts = Just (theBestCrossTab, rest)
where theBestCrossTab = findTheBestCrossTab ts
rest = filter (`S.notMember` (toSet theBestCrossTab)) ts
findTheBestCrossTab :: [Text] -> CrossTab
findTheBestCrossTab ts = case orderedEntries of
[] -> SingleItem defaultSingleton
_ -> maximumBy (\t1 t2 -> crossTabSize t1 `compare` crossTabSize t2)
$ map (findTheBestCrossTabForTextPart (SingleItem defaultSingleton) orderedEntries)
$ map snd orderedEntries
where mapping = gatherTextParts ts
orderedEntries = sortBy entryComparator
$ filter (\(_, (_, tset)) -> OS.size tset >= 2)
$ zip [1..] (OM.assocs mapping)
(defaultSingleton:_) = ts
thenCmp :: Ordering -> Ordering -> Ordering
thenCmp EQ o2 = o2
thenCmp o1 _ = o1
entryComparator (r1, (_, s1)) (r2, (_, s2)) = (OS.size s2 `compare` OS.size s1)
`thenCmp`
(r1 `compare` r2)
findTheBestCrossTabForTextPart :: CrossTab -> [(Int, (TextFrag, OS.OSet TextFrag))] -> (TextFrag, OS.OSet TextFrag) -> CrossTab
findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = if crossTabSize bestCrossTabFound > 1
then bestCrossTabFound
else defaultCrossTab
where bestCrossTabFound = foldr step (CrossTab [] (F.toList tset)) entriesOrderedByIntersection
entriesOrderedByIntersection =
sortBy entryComparator
$ filter (\(_, (_, tset')) -> OS.size tset' >= 2)
$ map (\(r, (t', tset')) -> (r, (t', tset' OS.|/\ tset))) entries
step (_, (t', tset')) currentTab@(CrossTab frags common) = selectedTab
where newTab = CrossTab newT (F.toList newCommon)
newT = t':frags
newCommon = (OS.fromList common) OS.|/\ tset'
selectedTab = if crossTabSize newTab >= crossTabSize currentTab
then newTab
else currentTab
crossTabSize :: CrossTab -> Int
crossTabSize (SingleItem _) = 1
crossTabSize (CrossTab [] _) = 0
crossTabSize (CrossTab _ []) = 0
-- tables really start from 2x2
crossTabSize (CrossTab [_] _) = 0
crossTabSize (CrossTab _ [_]) = 0
crossTabSize (CrossTab rows columns) = length rows * length columns
toSet :: CrossTab -> S.Set Text
toSet (SingleItem t) = S.singleton t
toSet (CrossTab rowNames columnNames) = S.fromList [rName `combineFrags` cName | rName <- rowNames, cName <- columnNames]
toText :: TextFrag -> Text
toText (Prefix prefix) = T.stripEnd prefix
toText (Suffix prefix) = T.stripStart prefix
combineFrags :: TextFrag -> TextFrag -> Text
combineFrags (Prefix prefix) (Suffix suffix) = prefix <> suffix
combineFrags (Suffix suffix) (Prefix prefix) = prefix <> suffix
combineFrags _ _ = error $ "incompatible text fragments"
getTextParts :: Text -> [(TextFrag, TextFrag)]
getTextParts t = [(Prefix (T.take ix t), Suffix (T.drop ix t)) | ix <- [1..(T.length t)-1]]
gatherTextParts :: [Text] -> OM.OMap TextFrag (OS.OSet TextFrag)
gatherTextParts = (gather OS.singleton (OS.|<>)) . concat . (map getTextParts)
gather :: Ord a => (b -> c) -> (c -> c -> c) -> [(a, b)] -> OM.OMap a c
gather createEntry combine = foldr extend OM.empty
where extend (k, v) m = OM.unionWithL (\_ v1 v2 -> combine v1 v2) (OM.singleton (k, (createEntry v))) m

View File

@ -39,6 +39,7 @@ module GEval.Core
checkMultipleOuts,
checkMultipleOutsCore,
gesMainMetric,
gesMainScheme,
gesPreprocess,
getDataDecoder,
threeLineSource,
@ -160,6 +161,7 @@ data GEvalSpecification = GEvalSpecification
gesTestName :: String,
gesSelector :: Maybe Selector,
gesOutFile :: String,
gesAltOutFiles :: Maybe [String],
gesExpectedFile :: String,
gesInputFile :: String,
gesMetrics :: [EvaluationScheme],
@ -177,6 +179,11 @@ gesMainMetric spec = case gesMetrics spec of
(scheme:_) -> evaluationSchemeMetric scheme
otherwise -> error "no metric given"
gesMainScheme :: GEvalSpecification -> EvaluationScheme
gesMainScheme spec = case gesMetrics spec of
(scheme:_) -> scheme
otherwise -> error "no metric given"
gesPreprocess :: GEvalSpecification -> (Text -> Text)
gesPreprocess spec = tokenizeTabSeparatedWithSpaces (gesTokenizer spec)
@ -191,6 +198,7 @@ data GEvalSpecialCommand = Init
| Diff FilePath | MostWorseningFeatures FilePath
| PrintVersion | JustTokenize | Submit
| Validate | ListMetrics
| OracleItemBased
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
@ -209,6 +217,7 @@ defaultGEvalSpecification = GEvalSpecification {
gesTestName = defaultTestName,
gesSelector = Nothing,
gesOutFile = defaultOutFile,
gesAltOutFiles = Nothing,
gesExpectedFile = defaultExpectedFile,
gesInputFile = defaultInputFile,
gesMetrics = [EvaluationScheme defaultMetric []],
@ -453,6 +462,37 @@ gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource
gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood
gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
((fMeasureOnCounts beta) . (getCounts (==)))
averageC
id
noGraph
where
-- repeated as below, as it will be refactored into dependent types soon anyway
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
gevalCoreOnSources (Mean WER) _
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
((uncurry (/.)) . (uncurry werStep))
averageC
id
noGraph
where
-- repeated as below, as it will be refactored into dependent types soon anyway
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
-- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
@ -511,9 +551,12 @@ gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuAgg bleuFinal noGra
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
where gleuFinal (m, t) = m /. t
gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
gleuFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER averageC id noGraph
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER werAgg werFinal noGraph
where werAgg = CC.foldl werFuse (0, 0)
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
werFinal (errors, ref) = errors /. ref
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
@ -753,7 +796,10 @@ continueGEvalCalculations SAGLEU GLEU = defineContinuation gleuAgg gleuFinal noG
gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
continueGEvalCalculations SAWER WER = defineContinuation averageC id noGraph
continueGEvalCalculations SAWER WER = defineContinuation werAgg werFinal noGraph
where werAgg = CC.foldl werFuse (0, 0)
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
werFinal (errors, ref) = errors /. ref
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph

View File

@ -19,6 +19,9 @@ import Control.Exception
import Control.Monad.Trans.Resource
import Data.String.Here
import Data.List (intercalate)
import Data.List.Split (splitOn)
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
createChallenge withDataFiles expectedDirectory spec = do
D.createDirectoryIfMissing False expectedDirectory
@ -31,7 +34,7 @@ createChallenge withDataFiles expectedDirectory spec = do
if withDataFiles
then
do
createFile (trainDirectory </> "train.tsv") $ trainContents metric
createTrainFiles metric trainDirectory expectedFile
createFile (devDirectory </> "in.tsv") $ devInContents metric
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
@ -50,12 +53,23 @@ createChallenge withDataFiles expectedDirectory spec = do
testDirectory = expectedDirectory </> testName
expectedFile = gesExpectedFile spec
createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
createTrainFiles metric trainDirectory expectedFile = do
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
createSingleTrainFile metric trainDirectory =
createFile (trainDirectory </> "train.tsv") $ trainContents metric
createFile :: FilePath -> String -> IO ()
createFile filePath contents = do
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
writeFile filePath contents
readmeMDContents :: Metric -> String -> String
readmeMDContents (Mean metric) testName = readmeMDContents metric testName
readmeMDContents GLEU testName = readmeMDContents BLEU testName
readmeMDContents BLEU testName = [i|
GEval sample machine translation challenge
@ -413,7 +427,22 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
where precisionOpt Nothing = ""
precisionOpt (Just p) = " --precision " ++ (show p)
-- Originally train content was in one file, to avoid large changes
-- for the time being we are using the original function.
trainInContents :: Metric -> String
trainInContents metric = unlines
$ map (intercalate "\t")
$ map tail
$ map (splitOn "\t")
$ lines
$ trainContents metric
trainExpectedContents :: Metric -> String
trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
trainContents :: Metric -> String
trainContents (Mean metric) = trainContents metric
trainContents GLEU = trainContents BLEU
trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
@ -482,7 +511,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!!
trainContents BIOF1Labels = trainContents BIOF1
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
O O O O O There is no name here
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman
|]
trainContents TokenAccuracy = [hereLit|* V N I like cats
* * V * N I can see the rainbow
@ -500,17 +529,20 @@ Love and hate LOVE HATE
I am sad SADNESS
I am so sad and hateful SADNESS HATE
|]
trainContents (Soft2DFMeasure _) = trainContents ClippEU
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
trainContents (Soft2DFMeasure _) = [hereLit|2/0,0,10,150 foo.djvu
1/30,40,100,1000 bar.djvu
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
14.6 19.22 27 0.047
trainContents ClippEU = [hereLit|1/30,40,100,1000/10 bar.djvu
2/30,40,500,600/10 foo.djvu
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
14.6 19.22 27 0.047
|]
devInContents :: Metric -> String
devInContents (Mean metric) = devInContents metric
devInContents GLEU = devInContents BLEU
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
@ -578,6 +610,7 @@ devInContents _ = [hereLit|0.72 0 0.007
|]
devExpectedContents :: Metric -> String
devExpectedContents (Mean metric) = devExpectedContents metric
devExpectedContents GLEU = devExpectedContents BLEU
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
@ -647,6 +680,7 @@ devExpectedContents _ = [hereLit|0.82
|]
testInContents :: Metric -> String
testInContents (Mean metric) = testInContents metric
testInContents GLEU = [hereLit|Alice has a black
|]
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
@ -717,6 +751,7 @@ testInContents _ = [hereLit|0.72 0 0.007
|]
testExpectedContents :: Metric -> String
testExpectedContents (Mean metric) = testExpectedContents metric
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
a ko te ahiahi , ko te ata , he ra kotahi
|]

View File

@ -1,22 +1,33 @@
module GEval.EvaluationScheme
(EvaluationScheme(..), evaluationSchemeMetric, applyPreprocessingOperations, evaluationSchemeName, PreprocessingOperation(..))
(EvaluationScheme(..),
evaluationSchemeMetric,
applyPreprocessingOperations,
evaluationSchemeName,
evaluationSchemePriority,
PreprocessingOperation(..))
where
import GEval.Metric
import Text.Regex.PCRE.Heavy
import Text.Regex.PCRE.Light.Base (Regex(..))
import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack)
import Data.List (intercalate, break)
import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
import Data.List (intercalate, break, sort)
import Data.Either
import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe, catMaybes)
import qualified Data.ByteString.UTF8 as BSU
data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
deriving (Eq)
data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | SetName Text
data PreprocessingOperation = RegexpMatch Regex
| LowerCasing
| UpperCasing
| Sorting
| SetName Text
| SetPriority Int
| RegexpSubstition Regex Text
deriving (Eq)
leftParameterBracket :: Char
@ -39,16 +50,37 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest')
readOps ('u':theRest) = (UpperCasing:ops, theRest')
where (ops, theRest') = readOps theRest
readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
readOps ('S':theRest) = (Sorting:ops, theRest')
where (ops, theRest') = readOps theRest
readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (fromRight undefined $ compileM (BSU.fromString a) []) (pack b)) theRest
readOps s = ([], s)
handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
handleParametrizedOp constructor (leftParameterBracket:theRest) =
handleParametrizedOp constructor theRest =
case parseParameter theRest of
(Nothing, s) -> ([], s)
(Just param, theRest') -> let (ops, theRest'') = readOps theRest'
in ((constructor param):ops, theRest'')
handleParametrizedBinaryOp :: (String -> String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
handleParametrizedBinaryOp constructor theRest =
case parseParameter theRest of
(Nothing, s) -> ([], s)
(Just paramA, theRest') ->
case parseParameter theRest' of
(Nothing, s) -> ([], s)
(Just paramB, theRest'') -> let (ops, theRest''') = readOps theRest''
in ((constructor paramA paramB):ops, theRest''')
parseParameter :: String -> (Maybe String, String)
parseParameter (leftParameterBracket:theRest) =
case break (== rightParameterBracket) theRest of
(s, []) -> ([], s)
(param, (_:theRest')) -> let (ops, theRest'') = readOps theRest'
in ((constructor param):ops, theRest'')
handleParametrizedOp _ s = ([], s)
(s, []) -> (Nothing, s)
(param, (_:theRest')) -> (Just param, theRest')
parseParameter s = (Nothing, s)
instance Show EvaluationScheme where
show (EvaluationScheme metric operations) = (show metric) ++ (if null operations
@ -58,10 +90,21 @@ instance Show EvaluationScheme where
evaluationSchemeName :: EvaluationScheme -> String
evaluationSchemeName scheme@(EvaluationScheme metric operations) = fromMaybe (show scheme) (findNameSet operations)
evaluationSchemePriority scheme@(EvaluationScheme _ operations) = fromMaybe defaultPriority (findPrioritySet operations)
where defaultPriority = 1
findNameSet :: [PreprocessingOperation] -> Maybe String
findNameSet [] = Nothing
findNameSet ((SetName name):_) = Just (unpack name)
findNameSet (_:ops) = findNameSet ops
findNameSet ops = case names of
[] -> Nothing
_ -> Just $ intercalate " " names
where names = catMaybes $ map extractName ops
extractName (SetName n) = Just (unpack n)
extractName _ = Nothing
findPrioritySet :: [PreprocessingOperation] -> Maybe Int
findPrioritySet [] = Nothing
findPrioritySet ((SetPriority p):_) = Just p
findPrioritySet (_:ops) = findPrioritySet ops
evaluationSchemeMetric :: EvaluationScheme -> Metric
evaluationSchemeMetric (EvaluationScheme metric _) = metric
@ -70,10 +113,31 @@ instance Show PreprocessingOperation where
show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
show LowerCasing = "l"
show UpperCasing = "u"
show Sorting = "S"
show (SetName t) = parametrizedOperation "N" (unpack t)
show (SetPriority p) = parametrizedOperation "P" (show p)
show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
applySubstitution :: Regex -> Text -> Text -> Text
applySubstitution r substitution t =
gsub r (handleRefs substitution) t
handleRefs :: Text -> Text -> [Text] -> Text
handleRefs substitution mainMatch subMatches = gsub refRegexp handleRef substitution
where Right refRegexp = compileM (BSU.fromString "\\\\\\d+") []
indexables = mainMatch : subMatches
handleRef :: Text -> Text
handleRef ref =
let ix = (read $ tail $ unpack ref)
in if ix >= length indexables
then (pack "")
else indexables !! ix
parametrizedOperation :: String -> String -> String
parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket]
parametrizedOperation opCode opArg = opCode ++ (formatParameter opArg)
formatParameter :: String -> String
formatParameter p = [leftParameterBracket] ++ p ++ [rightParameterBracket]
applyPreprocessingOperations :: EvaluationScheme -> Text -> Text
applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations
@ -82,4 +146,7 @@ applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text
applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
applyPreprocessingOperation LowerCasing = toLower
applyPreprocessingOperation UpperCasing = toUpper
applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
applyPreprocessingOperation (SetName _) = id
applyPreprocessingOperation (SetPriority _) = id
applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition

View File

@ -16,11 +16,14 @@ module GEval.LineByLine
runDiffGeneralized,
LineRecord(..),
ResultOrdering(..),
justTokenize
justTokenize,
worstFeaturesPipeline,
runOracleItemBased
) where
import GEval.Core
import GEval.Common
import GEval.EvaluationScheme
import Text.Tokenizer
import Data.Conduit.AutoDecompress (doNothing)
@ -33,10 +36,11 @@ import Data.Text
import Data.Text.Encoding
import Data.Conduit.Rank
import Data.Maybe (fromMaybe)
import Data.Either (rights)
import qualified Data.Vector as V
import Data.List (sortBy, sortOn, sort, concat)
import Data.List (sortBy, sortOn, sort, concat, maximumBy)
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
@ -83,7 +87,6 @@ parseReferenceEntry :: Text -> (Integer, Text)
parseReferenceEntry line = (read $ unpack refId, t)
where [refId, t] = splitOn "\t" line
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
@ -107,28 +110,26 @@ runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank,
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences)
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences consumFeatures)
where ordering' = forceSomeOrdering ordering
consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
.| CC.unlinesAscii
.| CC.stdout
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
worstFeaturesPipeline reversed spec bbdo mReferences = rank (lessByMetric reversed $ gesMainMetric spec)
worstFeaturesPipeline :: Bool
-> GEvalSpecification
-> BlackBoxDebuggingOptions
-> Maybe References
-> ConduitT FeatureWithPValue Void (ResourceT IO) ()
-> ConduitT LineRecord Void (ResourceT IO) ()
worstFeaturesPipeline reversed spec bbdo mReferences consum = rank (lessByMetric reversed $ gesMainMetric spec)
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
.| gobbleAndDo (sortBy featureOrder)
.| filtreCartesian (bbdoCartesian bbdo)
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
.| CC.unlinesAscii
.| CC.stdout
where formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
.| consum
where featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
p1 `compare` p2
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
@ -359,8 +360,9 @@ runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
where metric = gesMainMetric spec
scheme = gesMainScheme spec
mSelector = gesSelector spec
preprocess = gesPreprocess spec
preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
@ -393,6 +395,31 @@ runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
runOracleItemBased :: GEvalSpecification -> IO ()
runOracleItemBased spec = runMultiOutputGeneralized spec consum
where consum = CL.map picker .| format
picker = maximumBy (\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) -> metricCompare metric scoreA scoreB)
format = CL.map (encodeUtf8 . formatOutput)
.| CC.unlinesAscii
.| CC.stdout
formatOutput (LineRecord _ _ out _ _) = out
metric = gesMainMetric spec
runMultiOutputGeneralized :: GEvalSpecification -> ConduitT [LineRecord] Void (ResourceT IO) () -> IO ()
runMultiOutputGeneralized spec consum = do
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
let (Just altOuts) = gesAltOutFiles spec
altSourceSpecs' <- mapM (getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv") altOuts
let altSourceSpecs = rights altSourceSpecs'
let sourceSpecs = (outSource:altSourceSpecs)
let sources = Prelude.map (gevalLineByLineSource metric mSelector preprocess inputSource expectedSource) sourceSpecs
runResourceT $ runConduit $
(sequenceSources sources .| consum)
where metric = gesMainMetric spec
scheme = gesMainScheme spec
preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
mSelector = gesSelector spec
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
where ordering' = forceSomeOrdering ordering
@ -402,7 +429,7 @@ runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized order
FirstTheBest -> True
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = \mReferences -> CC.map prepareFakeLineRecord
.| (worstFeaturesPipeline reversed spec bbdo mReferences)
.| (worstFeaturesPipeline reversed spec bbdo mReferences consumFeatures)
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
LineRecord inp exp out c (score - scorePrev)

View File

@ -8,7 +8,8 @@ module GEval.Metric
bestPossibleValue,
perfectOutLineFromExpectedLine,
fixedNumberOfColumnsInExpected,
fixedNumberOfColumnsInInput)
fixedNumberOfColumnsInInput,
metricCompare)
where
import Data.Word
@ -28,7 +29,12 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
| ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
-- it would be better to avoid infinite recursion here
-- `Mean (Mean BLEU)` is not useful, but as it would mean
-- a larger refactor, we will postpone this
| Mean Metric
deriving (Eq)
instance Show Metric where
@ -73,8 +79,12 @@ instance Show Metric where
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
show MultiLabelLogLoss = "MultiLabel-Logloss"
show MultiLabelLikelihood = "MultiLabel-Likelihood"
show (Mean metric) = "Mean/" ++ (show metric)
instance Read Metric where
readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of
[(metric, theRest)] -> [(Mean metric, theRest)]
_ -> []
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
@ -162,6 +172,12 @@ getMetricOrdering SMAPE = TheLowerTheBetter
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
getMetricOrdering (Mean metric) = getMetricOrdering metric
metricCompare :: Metric -> MetricValue -> MetricValue -> Ordering
metricCompare metric a b = metricCompare' (getMetricOrdering metric) a b
where metricCompare' TheHigherTheBetter a b = a `compare` b
metricCompare' TheLowerTheBetter a b = b `compare` a
bestPossibleValue :: Metric -> MetricValue
bestPossibleValue metric = case getMetricOrdering metric of
@ -169,18 +185,21 @@ bestPossibleValue metric = case getMetricOrdering metric of
TheHigherTheBetter -> 1.0
fixedNumberOfColumnsInExpected :: Metric -> Bool
fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected metric
fixedNumberOfColumnsInExpected MAP = False
fixedNumberOfColumnsInExpected BLEU = False
fixedNumberOfColumnsInExpected GLEU = False
fixedNumberOfColumnsInExpected _ = True
fixedNumberOfColumnsInInput :: Metric -> Bool
fixedNumberOfColumnsInInput (Mean metric) = fixedNumberOfColumnsInInput metric
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
fixedNumberOfColumnsInInput _ = True
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t
perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0"
perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t

View File

@ -222,6 +222,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
ItemIntermediateRepresentationType AWER = (Int, Int)
ItemIntermediateRepresentationType t = Double
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t

View File

@ -48,6 +48,7 @@ listOfAvailableMetrics = [RMSE,
MultiLabelFMeasure 1.0,
MultiLabelFMeasure 2.0,
MultiLabelFMeasure 0.25,
Mean (MultiLabelFMeasure 1.0),
ProbabilisticMultiLabelFMeasure 1.0,
ProbabilisticMultiLabelFMeasure 2.0,
ProbabilisticMultiLabelFMeasure 0.25,

View File

@ -39,6 +39,7 @@ import GEval.Validation
import Data.List (intercalate)
import Data.Conduit.SmartSource
import Data.CartesianStrings
fullOptionsParser = info (helper <*> optionsParser)
(fullDesc
@ -94,6 +95,10 @@ optionsParser = GEvalOptions
(flag' ListMetrics
( long "list-metrics"
<> help "List all metrics with their descriptions"))
<|>
(flag' OracleItemBased
( long "oracle-item-based"
<> help "Generate the best possible output considering outputs given by --out-file and --alt-out-file options (and peeking into the expected file)."))
)
<*> ((flag' FirstTheWorst
@ -151,6 +156,10 @@ specParser = GEvalSpecification
<> showDefault
<> metavar "OUT"
<> help "The name of the file to be evaluated" )
<*> (optional $ some $ strOption
( long "alt-out-file"
<> metavar "OUT"
<> help "Alternative output file, makes sense only for some options, e.g. --oracle-item-based"))
<*> strOption
( long "expected-file"
<> short 'e'
@ -249,14 +258,12 @@ sel (Just m) _ = m
metricReader :: Parser [EvaluationScheme]
metricReader = many $ option auto -- actually `some` should be used instead of `many`, the problem is that
( long "metric" -- --metric might be in the config.txt file...
<> short 'm'
<> metavar "METRIC"
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))
-- RMSE, MSE, MAE, SMAPE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25), probabilistic soft F-measure (specify as Probabilistic-Soft-F1, Probabilistic-Soft-F2, Probabilistic-Soft-F0.25) or CharMatch" )
metricReader = concatCartesianStrings <$>
(many $ option auto -- actually `some` should be used instead of `many`, the problem is that
( long "metric" -- --metric might be in the config.txt file...
<> short 'm'
<> metavar "METRIC"
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)))
altMetricReader :: Parser (Maybe EvaluationScheme)
altMetricReader = optional $ option auto
@ -362,6 +369,9 @@ runGEval''' (Just Validate) _ _ spec _ _ = do
runGEval''' (Just ListMetrics) _ _ _ _ _ = do
listMetrics
return Nothing
runGEval''' (Just OracleItemBased) _ _ spec _ _ = do
runOracleItemBased spec
return Nothing
getGraphFilename :: Int -> FilePath -> FilePath
getGraphFilename 0 fp = fp

View File

@ -76,16 +76,13 @@ validationChallenge challengeDirectory spec = do
checkCorrectFile gitignoreFile
checkCorrectFile readmeFile
testDirectories <- findTestDirs challengeDirectory
checkTestDirectories mainMetric testDirectories
checkTrainDirectory mainMetric challengeDirectory
mapM_ (runOnTest spec) testDirectories
checkTestDirectories spec testDirectories
checkTrainDirectory spec challengeDirectory
where
configFile = challengeDirectory </> "config.txt"
gitignoreFile = challengeDirectory </> ".gitignore"
readmeFile = challengeDirectory </> "README.md"
mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec
checkCorrectFile :: FilePath -> IO ()
checkCorrectFile filePath = do
@ -147,7 +144,8 @@ never :: FindClause Bool
never = depth ==? 0
testDirFilter :: FindClause Bool
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
||? SFF.fileName ~~? "test-*")
fileFilter :: String -> FindClause Bool
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
@ -155,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
checkTestDirectories :: Metric -> [FilePath] -> IO ()
checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO ()
checkTestDirectories _ [] = throwM NoTestDirectories
checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories
checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories
checkTestDirectory :: Metric -> FilePath -> IO ()
checkTestDirectory metric directoryPath = do
checkTestDirectory :: GEvalSpecification -> FilePath -> IO ()
checkTestDirectory spec directoryPath = do
inputFiles <- findInputFiles directoryPath
when (null inputFiles) $ throw $ NoInputFile inputFile
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
@ -180,21 +178,29 @@ checkTestDirectory metric directoryPath = do
outputFiles <- findOutputFiles directoryPath
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
runOnTest spec directoryPath
where
metric = evaluationSchemeMetric $ head $ gesMetrics spec
inputFile = directoryPath </> defaultInputFile
expectedFile = directoryPath </> defaultExpectedFile
checkTrainDirectory :: Metric -> FilePath -> IO ()
checkTrainDirectory metric challengeDirectory = do
checkTrainDirectory :: GEvalSpecification -> FilePath -> IO ()
checkTrainDirectory spec challengeDirectory = do
let trainDirectory = challengeDirectory </> "train"
whenM (doesDirectoryExist trainDirectory) $ do
trainFiles <- findTrainFiles trainDirectory
when (null trainFiles) $ throw $ NoInputFile "train.tsv"
when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles
let [trainFile] = trainFiles
checkCorrectFile trainFile
when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do
checkColumns trainFile
if (not $ null trainFiles)
then
do
putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in"
putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)."
putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)"
else
do
runOnTest spec trainDirectory
checkColumns :: FilePath -> IO ()
checkColumns filePath = do

View File

@ -5,8 +5,8 @@ module GEval.WER
import Data.Array
import GEval.Common
werStep :: Eq a => [a] -> [a] -> Double
werStep expected got = (fromIntegral $ distance expected got) `safeDoubleDiv` (fromIntegral $ length expected)
werStep :: Eq a => [a] -> [a] -> (Int, Int)
werStep expected got = (distance expected got, length expected)
-- see https://stackoverflow.com/questions/6718787/levenshtein-distance-cost
distance u v = memo ! (m, n)

View File

@ -1,5 +1,5 @@
flags: {}
packages:
- '.'
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1]
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1,'ordered-containers-0.2.2@sha256:ebf2be3f592d9cf148ea6b8375f8af97148d44f82d8d04476899285e965afdbf,810']
resolver: lts-12.26

View File

@ -64,6 +64,8 @@ import qualified Data.Vector.Unboxed as DVU
import qualified Statistics.Matrix.Types as SMT
import Data.Statistics.Loess (loess)
import Data.Statistics.Calibration (calibration)
import Data.CartesianStrings (parseCartesianString)
import Data.SplitIntoCrossTabs (splitIntoCrossTabs, CrossTab(..), TextFrag(..))
informationRetrievalBookExample :: [(String, Int)]
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@ -127,6 +129,8 @@ main = hspec $ do
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
it "with probs" $
runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4
it "sorted" $
runGEvalTest "accuracy-on-sorted" `shouldReturnAlmost` 0.75
describe "F-measure" $ do
it "simple example" $
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
@ -326,12 +330,17 @@ main = hspec $ do
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
it "labels given with probs and numbers" $ do
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
describe "Mean/MultiLabel-F" $ do
it "simple" $ do
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
describe "MultiLabel-Likelihood" $ do
it "simple" $ do
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
describe "Preprocessing operations" $ do
it "F1 with preprocessing" $ do
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
it "Regexp substition" $ do
runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
describe "evaluating single lines" $ do
it "RMSE" $ do
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
@ -439,6 +448,7 @@ main = hspec $ do
gesTestName = "test-A",
gesSelector = Nothing,
gesOutFile = "out.tsv",
gesAltOutFiles = Nothing,
gesExpectedFile = "expected.tsv",
gesInputFile = "in.tsv",
gesMetrics = [EvaluationScheme Likelihood []],
@ -666,6 +676,41 @@ main = hspec $ do
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
describe "Cartesian strings" $ do
it "singleton" $ do
(parseCartesianString "foo") `shouldBe` ["foo"]
it "simple" $ do
parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"]
it "3x2" $ do
parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b",
"a-bar-c", "a-baz-b", "a-baz-c" ]
it "3x2x3" $ do
parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x",
"foo-c-0x", "foo-c-1x", "foo-c-2x",
"bar-b-0x", "bar-b-1x", "bar-b-2x",
"bar-c-0x", "bar-c-1x", "bar-c-2x",
"ba-b-0x", "ba-b-1x", "ba-b-2x",
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
describe "cross-tabs" $ do
it "singleton" $ do
splitIntoCrossTabs ["abababab"] `shouldBe` [SingleItem "abababab"]
it "too small" $ do
splitIntoCrossTabs ["aabb", "aacc"] `shouldBe` [SingleItem "aabb", SingleItem "aacc"]
it "two tables" $ do
splitIntoCrossTabs ["yABC", "xx00", "yABD", "ZC", "xx11", "yy00", "yy11", "ZD"] `shouldBe` [
CrossTab [Prefix "yAB", Prefix "Z"] [Suffix "C", Suffix "D"],
CrossTab [Prefix "xx", Prefix "yy"] [Suffix "00", Suffix "11"]]
it "simple" $ do
splitIntoCrossTabs ["aabsolutely",
"aaafoo",
"other",
"aaabaz",
"aaabaq",
"bbbfoo",
"bbbbaz",
"bbbbaq"] `shouldBe` [SingleItem "aabsolutely",
CrossTab [Suffix "foo", Suffix "baz", Suffix "baq"] [Prefix "aaa", Prefix "bbb"],
SingleItem "other"]
checkConduitPure conduit inList expList = do
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList

View File

@ -0,0 +1,4 @@
foo baz bar
xyz aaa
2 a:1 3
1 foo baz bar
2 xyz aaa
3 2 a:1 3

View File

@ -0,0 +1 @@
--metric Accuracy:S

View File

@ -0,0 +1,4 @@
bar baz foo
xyz
a:1 2 3
1 bar baz foo
2 xyz
3 a:1 2 3

View File

@ -0,0 +1,5 @@
b88 b901
a100
a93
t34
y23
1 b88 b901
2 a100
3 a93
4 t34
5 y23

View File

@ -0,0 +1 @@
--metric Accuracy:s<[abc](\d+)><!\1>

View File

@ -0,0 +1,5 @@
a88 b901
c100
b93
t34
z23
1 a88 b901
2 c100
3 b93
4 t34
5 z23

View File

@ -0,0 +1,4 @@
foo bar baz
uuu
foo bar baz
qqq aaa
1 foo bar baz
2 uuu
3 foo bar baz
4 qqq aaa

View File

@ -0,0 +1 @@
--metric Mean/MultiLabel-F1

View File

@ -0,0 +1,4 @@
foo bar baz
foo
qqq qqq
1 foo bar baz
2 foo
3 qqq qqq

View File

@ -0,0 +1,4 @@
A
C
D
D
1 A
2 C
3 D
4 D

View File

@ -0,0 +1,4 @@
D
C
B
A
1 D
2 C
3 B
4 A

View File

@ -0,0 +1,4 @@
B
A
C
A
1 B
2 A
3 C
4 A

View File

@ -0,0 +1 @@
--metric Accuracy

View File

@ -0,0 +1,4 @@
A
B
C
D
1 A
2 B
3 C
4 D

View File

@ -0,0 +1,4 @@
1
2
3
4
1 1
2 2
3 3
4 4

View File

@ -0,0 +1,4 @@
A
C
C
D
1 A
2 C
3 C
4 D

View File

@ -1 +1 @@
--metric WER
--metric Mean/WER