Merge branch 'master' into bootstrap
This commit is contained in:
commit
608b1f9d73
35
CHANGELOG.md
35
CHANGELOG.md
@ -1,4 +1,39 @@
|
|||||||
|
|
||||||
|
## 1.28.0.0
|
||||||
|
|
||||||
|
* Add `s` flag for substitution
|
||||||
|
|
||||||
|
## 1.27.0.0
|
||||||
|
|
||||||
|
* Results are formatted in cross-tables (if possible)
|
||||||
|
|
||||||
|
## 1.26.0.0
|
||||||
|
|
||||||
|
* Change the meaning of WER (WER is calculated for the whole set now
|
||||||
|
- similar to the way BLEU is calculated)
|
||||||
|
* Use `Mean/WER` if you want the old meaning (average of per-item results)
|
||||||
|
|
||||||
|
## 1.25.0.0
|
||||||
|
|
||||||
|
* Add --oracle-item-based
|
||||||
|
|
||||||
|
## 1.24.0.0
|
||||||
|
|
||||||
|
* Introduce metric priorities
|
||||||
|
* Use "Cartesian" strings in metrics
|
||||||
|
|
||||||
|
## 1.23.0.0
|
||||||
|
|
||||||
|
* New style of train data is preferred
|
||||||
|
- `in.tsv` and `expected.tsv` instead of `train.tsv`
|
||||||
|
- though this is not required as sometimes training data look different than test data
|
||||||
|
- `--validate` option was changed accordingly
|
||||||
|
|
||||||
|
## 1.22.1.0
|
||||||
|
|
||||||
|
* Add "Mean/" meta-metric (for the time being working only with MultiLabel-F-measure)
|
||||||
|
* Add :S flag
|
||||||
|
|
||||||
## 1.22.0.0
|
## 1.22.0.0
|
||||||
|
|
||||||
* Add SegmentAccuracy
|
* Add SegmentAccuracy
|
||||||
|
25
app/Main.hs
25
app/Main.hs
@ -16,6 +16,8 @@ import System.Exit
|
|||||||
|
|
||||||
import Data.Conduit.SmartSource
|
import Data.Conduit.SmartSource
|
||||||
|
|
||||||
|
import Data.SplitIntoCrossTabs
|
||||||
|
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
|
|
||||||
import Data.List (intercalate, sort)
|
import Data.List (intercalate, sort)
|
||||||
@ -23,6 +25,7 @@ import Data.List (intercalate, sort)
|
|||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
|
||||||
import Data.Map.Strict as M
|
import Data.Map.Strict as M
|
||||||
|
import qualified Data.Map.Lazy as LM
|
||||||
import Data.Set as S
|
import Data.Set as S
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
@ -79,7 +82,27 @@ showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec o
|
|||||||
showTheResult' opts [] = do
|
showTheResult' opts [] = do
|
||||||
hPutStrLn stderr "no metric given, use --metric option"
|
hPutStrLn stderr "no metric given, use --metric option"
|
||||||
exitFailure
|
exitFailure
|
||||||
showTheResult' opts vals = mapM_ putStrLn $ Prelude.map (formatTheMetricAndResult (gesPrecision $ geoSpec opts)) $ zip (gesMetrics $ geoSpec opts) vals
|
showTheResult' opts vals = mapM_ putStrLn
|
||||||
|
$ intercalate [""]
|
||||||
|
$ Prelude.map (formatCrossTable (gesPrecision $ geoSpec opts))
|
||||||
|
$ splitIntoTablesWithValues (T.pack "metric") (T.pack "value") mapping metricLabels
|
||||||
|
where mapping = LM.fromList $ zip metricLabels vals
|
||||||
|
metricLabels = Prelude.map T.pack $ Prelude.map evaluationSchemeName $ gesMetrics $ geoSpec opts
|
||||||
|
|
||||||
|
formatCrossTable :: Maybe Int -> TableWithValues MetricResult -> [String]
|
||||||
|
formatCrossTable mPrecision (TableWithValues [_, _] body) =
|
||||||
|
-- actually we won't print metric/value header
|
||||||
|
-- (1) to keep backward-compatible with the previous version
|
||||||
|
-- (2) to be concise
|
||||||
|
Prelude.map (formatCrossTableLine mPrecision) body
|
||||||
|
formatCrossTable mPrecision (TableWithValues header body) =
|
||||||
|
(intercalate "\t" $ Prelude.map T.unpack header) : Prelude.map (formatCrossTableLine mPrecision) body
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
formatCrossTableLine :: Maybe Int -> (T.Text, [MetricResult]) -> String
|
||||||
|
formatCrossTableLine mPrecision (rowName, values) =
|
||||||
|
intercalate "\t" ((T.unpack rowName):Prelude.map (formatTheResult mPrecision) values)
|
||||||
|
|
||||||
formatSourceSpec :: SourceSpec -> String
|
formatSourceSpec :: SourceSpec -> String
|
||||||
formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp
|
formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp
|
||||||
|
@ -26,7 +26,7 @@ let
|
|||||||
stack2nix-script = import "${static-haskell-nix}/static-stack2nix-builder/stack2nix-script.nix" {
|
stack2nix-script = import "${static-haskell-nix}/static-stack2nix-builder/stack2nix-script.nix" {
|
||||||
inherit pkgs;
|
inherit pkgs;
|
||||||
stack-project-dir = toString ./.; # where stack.yaml is
|
stack-project-dir = toString ./.; # where stack.yaml is
|
||||||
hackageSnapshot = "2019-05-08T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
|
hackageSnapshot = "2020-01-03T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
|
||||||
};
|
};
|
||||||
|
|
||||||
static-stack2nix-builder = import "${static-haskell-nix}/static-stack2nix-builder/default.nix" {
|
static-stack2nix-builder = import "${static-haskell-nix}/static-stack2nix-builder/default.nix" {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
name: geval
|
name: geval
|
||||||
version: 1.22.0.0
|
version: 1.28.0.0
|
||||||
synopsis: Machine learning evaluation tools
|
synopsis: Machine learning evaluation tools
|
||||||
description: Please see README.md
|
description: Please see README.md
|
||||||
homepage: http://github.com/name/project
|
homepage: http://github.com/name/project
|
||||||
@ -49,6 +49,8 @@ library
|
|||||||
, GEval.Selector
|
, GEval.Selector
|
||||||
, Data.Statistics.Loess
|
, Data.Statistics.Loess
|
||||||
, Data.Statistics.Calibration
|
, Data.Statistics.Calibration
|
||||||
|
, Data.CartesianStrings
|
||||||
|
, Data.SplitIntoCrossTabs
|
||||||
, Paths_geval
|
, Paths_geval
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
@ -102,6 +104,7 @@ library
|
|||||||
, temporary
|
, temporary
|
||||||
, utf8-string
|
, utf8-string
|
||||||
, singletons
|
, singletons
|
||||||
|
, ordered-containers
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
42
src/Data/CartesianStrings.hs
Normal file
42
src/Data/CartesianStrings.hs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Data.CartesianStrings
|
||||||
|
(parseCartesianString,
|
||||||
|
concatCartesianStrings,
|
||||||
|
CartesianStrings(..))
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.List (findIndex)
|
||||||
|
import Data.List.Split (splitOn)
|
||||||
|
|
||||||
|
-- A helper library for parsing strings representing sets of strings
|
||||||
|
-- obtained via a Cartesian product, e.g.:
|
||||||
|
-- - "foo" represents just ["foo"]
|
||||||
|
-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"]
|
||||||
|
-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings
|
||||||
|
|
||||||
|
cartProd :: [a] -> [b] -> [(a, b)]
|
||||||
|
cartProd xs ys = [(x,y) | x <- xs, y <- ys]
|
||||||
|
|
||||||
|
parseCartesianString :: String -> [String]
|
||||||
|
parseCartesianString s =
|
||||||
|
case findIndex (=='{') s of
|
||||||
|
Just begIx ->
|
||||||
|
let pref = take begIx s
|
||||||
|
c = drop (begIx + 1) s
|
||||||
|
in case findIndex (=='}') c of
|
||||||
|
Just endIx ->
|
||||||
|
let inf = take endIx c
|
||||||
|
current = splitOn "," inf
|
||||||
|
rest = parseCartesianString $ drop (endIx + 1) c
|
||||||
|
in map (uncurry (++)) $ cartProd (map (pref++) current) rest
|
||||||
|
Nothing -> [s]
|
||||||
|
|
||||||
|
data CartesianStrings a = CartesianStrings [a]
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
|
instance Read a => Read (CartesianStrings a) where
|
||||||
|
readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")]
|
||||||
|
|
||||||
|
concatCartesianStrings :: [CartesianStrings a] -> [a]
|
||||||
|
concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss)
|
148
src/Data/SplitIntoCrossTabs.hs
Normal file
148
src/Data/SplitIntoCrossTabs.hs
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Data.SplitIntoCrossTabs
|
||||||
|
(splitIntoCrossTabs,
|
||||||
|
splitIntoTablesWithValues,
|
||||||
|
CrossTab(..),
|
||||||
|
TableWithValues(..),
|
||||||
|
TextFrag(..))
|
||||||
|
where
|
||||||
|
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Set.Ordered as OS
|
||||||
|
import qualified Data.Map.Ordered as OM
|
||||||
|
import qualified Data.Set as S
|
||||||
|
import qualified Data.Foldable as F
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Map.Lazy as LM
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
|
||||||
|
|
||||||
|
data TableWithValues a = TableWithValues [Text] [(Text, [a])]
|
||||||
|
|
||||||
|
data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data TextFrag = Prefix Text | Suffix Text
|
||||||
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
|
splitIntoTablesWithValues :: Text
|
||||||
|
-> Text
|
||||||
|
-> LM.Map Text a -- ^ map from which values will be taken,
|
||||||
|
-- deliberately a lazy map so that
|
||||||
|
-- values could be shown one by one
|
||||||
|
-> [Text]
|
||||||
|
-> [TableWithValues a]
|
||||||
|
splitIntoTablesWithValues defaultMainHeader defaultSecondaryHeader mapping =
|
||||||
|
joinSingleItems . map (convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping) . splitIntoCrossTabs
|
||||||
|
where joinSingleItems (TableWithValues h@[_, _] arows : TableWithValues [_, _] brows : rest) =
|
||||||
|
joinSingleItems (TableWithValues h (arows ++ brows) : rest)
|
||||||
|
joinSingleItems (e : rest) = e : joinSingleItems rest
|
||||||
|
joinSingleItems [] = []
|
||||||
|
|
||||||
|
convertIntoTableWithValues :: Text -> Text -> LM.Map Text a -> CrossTab -> TableWithValues a
|
||||||
|
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (SingleItem t) =
|
||||||
|
TableWithValues [defaultMainHeader, defaultSecondaryHeader] [(t, [mapping LM.! t])]
|
||||||
|
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (CrossTab rowNames columnNames) =
|
||||||
|
TableWithValues (T.empty : (map toText columnNames)) (map processRow rowNames)
|
||||||
|
where processRow rowName = (toText rowName, map (\colName -> mapping LM.! (combineFrags rowName colName)) columnNames)
|
||||||
|
|
||||||
|
splitIntoCrossTabs :: [Text] -> [CrossTab]
|
||||||
|
splitIntoCrossTabs inputs =
|
||||||
|
map preferVertical
|
||||||
|
$ map snd
|
||||||
|
$ sortBy (\(r1,_) (r2, _) -> r1 `compare` r2)
|
||||||
|
$ map (getRank inputRanks)
|
||||||
|
$ unfoldr extractTheBestCrossTab inputs
|
||||||
|
where inputRanks = M.fromList $ zip inputs [1..]
|
||||||
|
|
||||||
|
preferVertical :: CrossTab -> CrossTab
|
||||||
|
preferVertical s@(SingleItem _) = s
|
||||||
|
preferVertical c@(CrossTab rowNames columnNames)
|
||||||
|
| length rowNames < length columnNames = CrossTab columnNames rowNames
|
||||||
|
| otherwise = c
|
||||||
|
|
||||||
|
getRank :: M.Map Text Int -> CrossTab -> (Int, CrossTab)
|
||||||
|
getRank ranks c = (bestRank, c)
|
||||||
|
where bestRank = minimum
|
||||||
|
$ map (ranks M.!)
|
||||||
|
$ S.toList
|
||||||
|
$ toSet c
|
||||||
|
|
||||||
|
extractTheBestCrossTab :: [Text] -> Maybe (CrossTab, [Text])
|
||||||
|
extractTheBestCrossTab [] = Nothing
|
||||||
|
extractTheBestCrossTab ts = Just (theBestCrossTab, rest)
|
||||||
|
where theBestCrossTab = findTheBestCrossTab ts
|
||||||
|
rest = filter (`S.notMember` (toSet theBestCrossTab)) ts
|
||||||
|
|
||||||
|
findTheBestCrossTab :: [Text] -> CrossTab
|
||||||
|
findTheBestCrossTab ts = case orderedEntries of
|
||||||
|
[] -> SingleItem defaultSingleton
|
||||||
|
_ -> maximumBy (\t1 t2 -> crossTabSize t1 `compare` crossTabSize t2)
|
||||||
|
$ map (findTheBestCrossTabForTextPart (SingleItem defaultSingleton) orderedEntries)
|
||||||
|
$ map snd orderedEntries
|
||||||
|
where mapping = gatherTextParts ts
|
||||||
|
orderedEntries = sortBy entryComparator
|
||||||
|
$ filter (\(_, (_, tset)) -> OS.size tset >= 2)
|
||||||
|
$ zip [1..] (OM.assocs mapping)
|
||||||
|
(defaultSingleton:_) = ts
|
||||||
|
|
||||||
|
thenCmp :: Ordering -> Ordering -> Ordering
|
||||||
|
thenCmp EQ o2 = o2
|
||||||
|
thenCmp o1 _ = o1
|
||||||
|
|
||||||
|
entryComparator (r1, (_, s1)) (r2, (_, s2)) = (OS.size s2 `compare` OS.size s1)
|
||||||
|
`thenCmp`
|
||||||
|
(r1 `compare` r2)
|
||||||
|
|
||||||
|
findTheBestCrossTabForTextPart :: CrossTab -> [(Int, (TextFrag, OS.OSet TextFrag))] -> (TextFrag, OS.OSet TextFrag) -> CrossTab
|
||||||
|
findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = if crossTabSize bestCrossTabFound > 1
|
||||||
|
then bestCrossTabFound
|
||||||
|
else defaultCrossTab
|
||||||
|
where bestCrossTabFound = foldr step (CrossTab [] (F.toList tset)) entriesOrderedByIntersection
|
||||||
|
entriesOrderedByIntersection =
|
||||||
|
sortBy entryComparator
|
||||||
|
$ filter (\(_, (_, tset')) -> OS.size tset' >= 2)
|
||||||
|
$ map (\(r, (t', tset')) -> (r, (t', tset' OS.|/\ tset))) entries
|
||||||
|
step (_, (t', tset')) currentTab@(CrossTab frags common) = selectedTab
|
||||||
|
where newTab = CrossTab newT (F.toList newCommon)
|
||||||
|
newT = t':frags
|
||||||
|
newCommon = (OS.fromList common) OS.|/\ tset'
|
||||||
|
selectedTab = if crossTabSize newTab >= crossTabSize currentTab
|
||||||
|
then newTab
|
||||||
|
else currentTab
|
||||||
|
|
||||||
|
crossTabSize :: CrossTab -> Int
|
||||||
|
crossTabSize (SingleItem _) = 1
|
||||||
|
crossTabSize (CrossTab [] _) = 0
|
||||||
|
crossTabSize (CrossTab _ []) = 0
|
||||||
|
-- tables really start from 2x2
|
||||||
|
crossTabSize (CrossTab [_] _) = 0
|
||||||
|
crossTabSize (CrossTab _ [_]) = 0
|
||||||
|
crossTabSize (CrossTab rows columns) = length rows * length columns
|
||||||
|
|
||||||
|
toSet :: CrossTab -> S.Set Text
|
||||||
|
toSet (SingleItem t) = S.singleton t
|
||||||
|
toSet (CrossTab rowNames columnNames) = S.fromList [rName `combineFrags` cName | rName <- rowNames, cName <- columnNames]
|
||||||
|
|
||||||
|
toText :: TextFrag -> Text
|
||||||
|
toText (Prefix prefix) = T.stripEnd prefix
|
||||||
|
toText (Suffix prefix) = T.stripStart prefix
|
||||||
|
|
||||||
|
combineFrags :: TextFrag -> TextFrag -> Text
|
||||||
|
combineFrags (Prefix prefix) (Suffix suffix) = prefix <> suffix
|
||||||
|
combineFrags (Suffix suffix) (Prefix prefix) = prefix <> suffix
|
||||||
|
combineFrags _ _ = error $ "incompatible text fragments"
|
||||||
|
|
||||||
|
getTextParts :: Text -> [(TextFrag, TextFrag)]
|
||||||
|
getTextParts t = [(Prefix (T.take ix t), Suffix (T.drop ix t)) | ix <- [1..(T.length t)-1]]
|
||||||
|
|
||||||
|
gatherTextParts :: [Text] -> OM.OMap TextFrag (OS.OSet TextFrag)
|
||||||
|
gatherTextParts = (gather OS.singleton (OS.|<>)) . concat . (map getTextParts)
|
||||||
|
|
||||||
|
gather :: Ord a => (b -> c) -> (c -> c -> c) -> [(a, b)] -> OM.OMap a c
|
||||||
|
gather createEntry combine = foldr extend OM.empty
|
||||||
|
where extend (k, v) m = OM.unionWithL (\_ v1 v2 -> combine v1 v2) (OM.singleton (k, (createEntry v))) m
|
@ -39,6 +39,7 @@ module GEval.Core
|
|||||||
checkMultipleOuts,
|
checkMultipleOuts,
|
||||||
checkMultipleOutsCore,
|
checkMultipleOutsCore,
|
||||||
gesMainMetric,
|
gesMainMetric,
|
||||||
|
gesMainScheme,
|
||||||
gesPreprocess,
|
gesPreprocess,
|
||||||
getDataDecoder,
|
getDataDecoder,
|
||||||
threeLineSource,
|
threeLineSource,
|
||||||
@ -160,6 +161,7 @@ data GEvalSpecification = GEvalSpecification
|
|||||||
gesTestName :: String,
|
gesTestName :: String,
|
||||||
gesSelector :: Maybe Selector,
|
gesSelector :: Maybe Selector,
|
||||||
gesOutFile :: String,
|
gesOutFile :: String,
|
||||||
|
gesAltOutFiles :: Maybe [String],
|
||||||
gesExpectedFile :: String,
|
gesExpectedFile :: String,
|
||||||
gesInputFile :: String,
|
gesInputFile :: String,
|
||||||
gesMetrics :: [EvaluationScheme],
|
gesMetrics :: [EvaluationScheme],
|
||||||
@ -177,6 +179,11 @@ gesMainMetric spec = case gesMetrics spec of
|
|||||||
(scheme:_) -> evaluationSchemeMetric scheme
|
(scheme:_) -> evaluationSchemeMetric scheme
|
||||||
otherwise -> error "no metric given"
|
otherwise -> error "no metric given"
|
||||||
|
|
||||||
|
gesMainScheme :: GEvalSpecification -> EvaluationScheme
|
||||||
|
gesMainScheme spec = case gesMetrics spec of
|
||||||
|
(scheme:_) -> scheme
|
||||||
|
otherwise -> error "no metric given"
|
||||||
|
|
||||||
gesPreprocess :: GEvalSpecification -> (Text -> Text)
|
gesPreprocess :: GEvalSpecification -> (Text -> Text)
|
||||||
gesPreprocess spec = tokenizeTabSeparatedWithSpaces (gesTokenizer spec)
|
gesPreprocess spec = tokenizeTabSeparatedWithSpaces (gesTokenizer spec)
|
||||||
|
|
||||||
@ -191,6 +198,7 @@ data GEvalSpecialCommand = Init
|
|||||||
| Diff FilePath | MostWorseningFeatures FilePath
|
| Diff FilePath | MostWorseningFeatures FilePath
|
||||||
| PrintVersion | JustTokenize | Submit
|
| PrintVersion | JustTokenize | Submit
|
||||||
| Validate | ListMetrics
|
| Validate | ListMetrics
|
||||||
|
| OracleItemBased
|
||||||
|
|
||||||
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
||||||
|
|
||||||
@ -209,6 +217,7 @@ defaultGEvalSpecification = GEvalSpecification {
|
|||||||
gesTestName = defaultTestName,
|
gesTestName = defaultTestName,
|
||||||
gesSelector = Nothing,
|
gesSelector = Nothing,
|
||||||
gesOutFile = defaultOutFile,
|
gesOutFile = defaultOutFile,
|
||||||
|
gesAltOutFiles = Nothing,
|
||||||
gesExpectedFile = defaultExpectedFile,
|
gesExpectedFile = defaultExpectedFile,
|
||||||
gesInputFile = defaultInputFile,
|
gesInputFile = defaultInputFile,
|
||||||
gesMetrics = [EvaluationScheme defaultMetric []],
|
gesMetrics = [EvaluationScheme defaultMetric []],
|
||||||
@ -453,6 +462,37 @@ gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource
|
|||||||
gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
|
gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
|
||||||
gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood
|
gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood
|
||||||
|
|
||||||
|
|
||||||
|
gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _
|
||||||
|
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
||||||
|
(Right . getWords)
|
||||||
|
((fMeasureOnCounts beta) . (getCounts (==)))
|
||||||
|
averageC
|
||||||
|
id
|
||||||
|
noGraph
|
||||||
|
where
|
||||||
|
-- repeated as below, as it will be refactored into dependent types soon anyway
|
||||||
|
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
|
||||||
|
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
|
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
|
||||||
|
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
|
|
||||||
|
gevalCoreOnSources (Mean WER) _
|
||||||
|
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
||||||
|
(Right . getWords)
|
||||||
|
((uncurry (/.)) . (uncurry werStep))
|
||||||
|
averageC
|
||||||
|
id
|
||||||
|
noGraph
|
||||||
|
where
|
||||||
|
-- repeated as below, as it will be refactored into dependent types soon anyway
|
||||||
|
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
|
||||||
|
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
|
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
|
||||||
|
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
|
|
||||||
|
gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
|
||||||
|
|
||||||
-- only MultiLabel-F1 handled for JSONs for the time being...
|
-- only MultiLabel-F1 handled for JSONs for the time being...
|
||||||
gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
||||||
(Right . getWords)
|
(Right . getWords)
|
||||||
@ -511,9 +551,12 @@ gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuAgg bleuFinal noGra
|
|||||||
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
|
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
|
||||||
where gleuFinal (m, t) = m /. t
|
where gleuFinal (m, t) = m /. t
|
||||||
gleuAgg = CC.foldl gleuFuse (0, 0)
|
gleuAgg = CC.foldl gleuFuse (0, 0)
|
||||||
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
|
gleuFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
|
|
||||||
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER averageC id noGraph
|
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER werAgg werFinal noGraph
|
||||||
|
where werAgg = CC.foldl werFuse (0, 0)
|
||||||
|
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
|
werFinal (errors, ref) = errors /. ref
|
||||||
|
|
||||||
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
|
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
|
||||||
|
|
||||||
@ -753,7 +796,10 @@ continueGEvalCalculations SAGLEU GLEU = defineContinuation gleuAgg gleuFinal noG
|
|||||||
gleuAgg = CC.foldl gleuFuse (0, 0)
|
gleuAgg = CC.foldl gleuFuse (0, 0)
|
||||||
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
|
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
|
||||||
|
|
||||||
continueGEvalCalculations SAWER WER = defineContinuation averageC id noGraph
|
continueGEvalCalculations SAWER WER = defineContinuation werAgg werFinal noGraph
|
||||||
|
where werAgg = CC.foldl werFuse (0, 0)
|
||||||
|
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
|
werFinal (errors, ref) = errors /. ref
|
||||||
|
|
||||||
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
|
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
|
||||||
|
|
||||||
|
@ -19,6 +19,9 @@ import Control.Exception
|
|||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import Data.String.Here
|
import Data.String.Here
|
||||||
|
|
||||||
|
import Data.List (intercalate)
|
||||||
|
import Data.List.Split (splitOn)
|
||||||
|
|
||||||
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
||||||
createChallenge withDataFiles expectedDirectory spec = do
|
createChallenge withDataFiles expectedDirectory spec = do
|
||||||
D.createDirectoryIfMissing False expectedDirectory
|
D.createDirectoryIfMissing False expectedDirectory
|
||||||
@ -31,7 +34,7 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
if withDataFiles
|
if withDataFiles
|
||||||
then
|
then
|
||||||
do
|
do
|
||||||
createFile (trainDirectory </> "train.tsv") $ trainContents metric
|
createTrainFiles metric trainDirectory expectedFile
|
||||||
|
|
||||||
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
||||||
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
||||||
@ -50,12 +53,23 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
testDirectory = expectedDirectory </> testName
|
testDirectory = expectedDirectory </> testName
|
||||||
expectedFile = gesExpectedFile spec
|
expectedFile = gesExpectedFile spec
|
||||||
|
|
||||||
|
createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
|
||||||
|
createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
|
||||||
|
createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
|
||||||
|
createTrainFiles metric trainDirectory expectedFile = do
|
||||||
|
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
|
||||||
|
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
|
||||||
|
|
||||||
|
createSingleTrainFile metric trainDirectory =
|
||||||
|
createFile (trainDirectory </> "train.tsv") $ trainContents metric
|
||||||
|
|
||||||
createFile :: FilePath -> String -> IO ()
|
createFile :: FilePath -> String -> IO ()
|
||||||
createFile filePath contents = do
|
createFile filePath contents = do
|
||||||
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
|
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
|
||||||
writeFile filePath contents
|
writeFile filePath contents
|
||||||
|
|
||||||
readmeMDContents :: Metric -> String -> String
|
readmeMDContents :: Metric -> String -> String
|
||||||
|
readmeMDContents (Mean metric) testName = readmeMDContents metric testName
|
||||||
readmeMDContents GLEU testName = readmeMDContents BLEU testName
|
readmeMDContents GLEU testName = readmeMDContents BLEU testName
|
||||||
readmeMDContents BLEU testName = [i|
|
readmeMDContents BLEU testName = [i|
|
||||||
GEval sample machine translation challenge
|
GEval sample machine translation challenge
|
||||||
@ -413,7 +427,22 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
|
|||||||
where precisionOpt Nothing = ""
|
where precisionOpt Nothing = ""
|
||||||
precisionOpt (Just p) = " --precision " ++ (show p)
|
precisionOpt (Just p) = " --precision " ++ (show p)
|
||||||
|
|
||||||
|
-- Originally train content was in one file, to avoid large changes
|
||||||
|
-- for the time being we are using the original function.
|
||||||
|
|
||||||
|
trainInContents :: Metric -> String
|
||||||
|
trainInContents metric = unlines
|
||||||
|
$ map (intercalate "\t")
|
||||||
|
$ map tail
|
||||||
|
$ map (splitOn "\t")
|
||||||
|
$ lines
|
||||||
|
$ trainContents metric
|
||||||
|
|
||||||
|
trainExpectedContents :: Metric -> String
|
||||||
|
trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
|
||||||
|
|
||||||
trainContents :: Metric -> String
|
trainContents :: Metric -> String
|
||||||
|
trainContents (Mean metric) = trainContents metric
|
||||||
trainContents GLEU = trainContents BLEU
|
trainContents GLEU = trainContents BLEU
|
||||||
trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua
|
trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua
|
||||||
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
||||||
@ -482,7 +511,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
|||||||
trainContents BIOF1Labels = trainContents BIOF1
|
trainContents BIOF1Labels = trainContents BIOF1
|
||||||
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
|
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
|
||||||
O O O O O There is no name here
|
O O O O O There is no name here
|
||||||
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman
|
||||||
|]
|
|]
|
||||||
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
||||||
* * V * N I can see the rainbow
|
* * V * N I can see the rainbow
|
||||||
@ -500,17 +529,20 @@ Love and hate LOVE HATE
|
|||||||
I am sad SADNESS
|
I am sad SADNESS
|
||||||
I am so sad and hateful SADNESS HATE
|
I am so sad and hateful SADNESS HATE
|
||||||
|]
|
|]
|
||||||
trainContents (Soft2DFMeasure _) = trainContents ClippEU
|
trainContents (Soft2DFMeasure _) = [hereLit|2/0,0,10,150 foo.djvu
|
||||||
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
|
|
||||||
1/30,40,100,1000 bar.djvu
|
1/30,40,100,1000 bar.djvu
|
||||||
|]
|
|]
|
||||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
trainContents ClippEU = [hereLit|1/30,40,100,1000/10 bar.djvu
|
||||||
1.00 1.00 1 0.017
|
2/30,40,500,600/10 foo.djvu
|
||||||
317.8 5.20 67 0.048
|
|]
|
||||||
14.6 19.22 27 0.047
|
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||||
|
1.00 1.00 1 0.017
|
||||||
|
317.8 5.20 67 0.048
|
||||||
|
14.6 19.22 27 0.047
|
||||||
|]
|
|]
|
||||||
|
|
||||||
devInContents :: Metric -> String
|
devInContents :: Metric -> String
|
||||||
|
devInContents (Mean metric) = devInContents metric
|
||||||
devInContents GLEU = devInContents BLEU
|
devInContents GLEU = devInContents BLEU
|
||||||
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
||||||
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
||||||
@ -578,6 +610,7 @@ devInContents _ = [hereLit|0.72 0 0.007
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
devExpectedContents :: Metric -> String
|
devExpectedContents :: Metric -> String
|
||||||
|
devExpectedContents (Mean metric) = devExpectedContents metric
|
||||||
devExpectedContents GLEU = devExpectedContents BLEU
|
devExpectedContents GLEU = devExpectedContents BLEU
|
||||||
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
||||||
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
||||||
@ -647,6 +680,7 @@ devExpectedContents _ = [hereLit|0.82
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
testInContents :: Metric -> String
|
testInContents :: Metric -> String
|
||||||
|
testInContents (Mean metric) = testInContents metric
|
||||||
testInContents GLEU = [hereLit|Alice has a black
|
testInContents GLEU = [hereLit|Alice has a black
|
||||||
|]
|
|]
|
||||||
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
||||||
@ -717,6 +751,7 @@ testInContents _ = [hereLit|0.72 0 0.007
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
testExpectedContents :: Metric -> String
|
testExpectedContents :: Metric -> String
|
||||||
|
testExpectedContents (Mean metric) = testExpectedContents metric
|
||||||
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
||||||
a ko te ahiahi , ko te ata , he ra kotahi
|
a ko te ahiahi , ko te ata , he ra kotahi
|
||||||
|]
|
|]
|
||||||
|
@ -1,22 +1,33 @@
|
|||||||
module GEval.EvaluationScheme
|
module GEval.EvaluationScheme
|
||||||
(EvaluationScheme(..), evaluationSchemeMetric, applyPreprocessingOperations, evaluationSchemeName, PreprocessingOperation(..))
|
(EvaluationScheme(..),
|
||||||
|
evaluationSchemeMetric,
|
||||||
|
applyPreprocessingOperations,
|
||||||
|
evaluationSchemeName,
|
||||||
|
evaluationSchemePriority,
|
||||||
|
PreprocessingOperation(..))
|
||||||
where
|
where
|
||||||
|
|
||||||
import GEval.Metric
|
import GEval.Metric
|
||||||
|
|
||||||
import Text.Regex.PCRE.Heavy
|
import Text.Regex.PCRE.Heavy
|
||||||
import Text.Regex.PCRE.Light.Base (Regex(..))
|
import Text.Regex.PCRE.Light.Base (Regex(..))
|
||||||
import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack)
|
import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
|
||||||
import Data.List (intercalate, break)
|
import Data.List (intercalate, break, sort)
|
||||||
import Data.Either
|
import Data.Either
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe, catMaybes)
|
||||||
import qualified Data.ByteString.UTF8 as BSU
|
import qualified Data.ByteString.UTF8 as BSU
|
||||||
|
|
||||||
|
|
||||||
data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
|
data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | SetName Text
|
data PreprocessingOperation = RegexpMatch Regex
|
||||||
|
| LowerCasing
|
||||||
|
| UpperCasing
|
||||||
|
| Sorting
|
||||||
|
| SetName Text
|
||||||
|
| SetPriority Int
|
||||||
|
| RegexpSubstition Regex Text
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
leftParameterBracket :: Char
|
leftParameterBracket :: Char
|
||||||
@ -39,16 +50,37 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest')
|
|||||||
readOps ('u':theRest) = (UpperCasing:ops, theRest')
|
readOps ('u':theRest) = (UpperCasing:ops, theRest')
|
||||||
where (ops, theRest') = readOps theRest
|
where (ops, theRest') = readOps theRest
|
||||||
readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
|
readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
|
||||||
|
readOps ('S':theRest) = (Sorting:ops, theRest')
|
||||||
|
where (ops, theRest') = readOps theRest
|
||||||
readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
|
readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
|
||||||
|
readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
|
||||||
|
readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (fromRight undefined $ compileM (BSU.fromString a) []) (pack b)) theRest
|
||||||
readOps s = ([], s)
|
readOps s = ([], s)
|
||||||
|
|
||||||
handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
|
handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
|
||||||
handleParametrizedOp constructor (leftParameterBracket:theRest) =
|
handleParametrizedOp constructor theRest =
|
||||||
|
case parseParameter theRest of
|
||||||
|
(Nothing, s) -> ([], s)
|
||||||
|
(Just param, theRest') -> let (ops, theRest'') = readOps theRest'
|
||||||
|
in ((constructor param):ops, theRest'')
|
||||||
|
|
||||||
|
handleParametrizedBinaryOp :: (String -> String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
|
||||||
|
handleParametrizedBinaryOp constructor theRest =
|
||||||
|
case parseParameter theRest of
|
||||||
|
(Nothing, s) -> ([], s)
|
||||||
|
(Just paramA, theRest') ->
|
||||||
|
case parseParameter theRest' of
|
||||||
|
(Nothing, s) -> ([], s)
|
||||||
|
(Just paramB, theRest'') -> let (ops, theRest''') = readOps theRest''
|
||||||
|
in ((constructor paramA paramB):ops, theRest''')
|
||||||
|
|
||||||
|
parseParameter :: String -> (Maybe String, String)
|
||||||
|
parseParameter (leftParameterBracket:theRest) =
|
||||||
case break (== rightParameterBracket) theRest of
|
case break (== rightParameterBracket) theRest of
|
||||||
(s, []) -> ([], s)
|
(s, []) -> (Nothing, s)
|
||||||
(param, (_:theRest')) -> let (ops, theRest'') = readOps theRest'
|
(param, (_:theRest')) -> (Just param, theRest')
|
||||||
in ((constructor param):ops, theRest'')
|
parseParameter s = (Nothing, s)
|
||||||
handleParametrizedOp _ s = ([], s)
|
|
||||||
|
|
||||||
instance Show EvaluationScheme where
|
instance Show EvaluationScheme where
|
||||||
show (EvaluationScheme metric operations) = (show metric) ++ (if null operations
|
show (EvaluationScheme metric operations) = (show metric) ++ (if null operations
|
||||||
@ -58,10 +90,21 @@ instance Show EvaluationScheme where
|
|||||||
evaluationSchemeName :: EvaluationScheme -> String
|
evaluationSchemeName :: EvaluationScheme -> String
|
||||||
evaluationSchemeName scheme@(EvaluationScheme metric operations) = fromMaybe (show scheme) (findNameSet operations)
|
evaluationSchemeName scheme@(EvaluationScheme metric operations) = fromMaybe (show scheme) (findNameSet operations)
|
||||||
|
|
||||||
|
evaluationSchemePriority scheme@(EvaluationScheme _ operations) = fromMaybe defaultPriority (findPrioritySet operations)
|
||||||
|
where defaultPriority = 1
|
||||||
|
|
||||||
findNameSet :: [PreprocessingOperation] -> Maybe String
|
findNameSet :: [PreprocessingOperation] -> Maybe String
|
||||||
findNameSet [] = Nothing
|
findNameSet ops = case names of
|
||||||
findNameSet ((SetName name):_) = Just (unpack name)
|
[] -> Nothing
|
||||||
findNameSet (_:ops) = findNameSet ops
|
_ -> Just $ intercalate " " names
|
||||||
|
where names = catMaybes $ map extractName ops
|
||||||
|
extractName (SetName n) = Just (unpack n)
|
||||||
|
extractName _ = Nothing
|
||||||
|
|
||||||
|
findPrioritySet :: [PreprocessingOperation] -> Maybe Int
|
||||||
|
findPrioritySet [] = Nothing
|
||||||
|
findPrioritySet ((SetPriority p):_) = Just p
|
||||||
|
findPrioritySet (_:ops) = findPrioritySet ops
|
||||||
|
|
||||||
evaluationSchemeMetric :: EvaluationScheme -> Metric
|
evaluationSchemeMetric :: EvaluationScheme -> Metric
|
||||||
evaluationSchemeMetric (EvaluationScheme metric _) = metric
|
evaluationSchemeMetric (EvaluationScheme metric _) = metric
|
||||||
@ -70,10 +113,31 @@ instance Show PreprocessingOperation where
|
|||||||
show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
|
show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
|
||||||
show LowerCasing = "l"
|
show LowerCasing = "l"
|
||||||
show UpperCasing = "u"
|
show UpperCasing = "u"
|
||||||
|
show Sorting = "S"
|
||||||
show (SetName t) = parametrizedOperation "N" (unpack t)
|
show (SetName t) = parametrizedOperation "N" (unpack t)
|
||||||
|
show (SetPriority p) = parametrizedOperation "P" (show p)
|
||||||
|
show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
|
||||||
|
|
||||||
|
applySubstitution :: Regex -> Text -> Text -> Text
|
||||||
|
applySubstitution r substitution t =
|
||||||
|
gsub r (handleRefs substitution) t
|
||||||
|
|
||||||
|
handleRefs :: Text -> Text -> [Text] -> Text
|
||||||
|
handleRefs substitution mainMatch subMatches = gsub refRegexp handleRef substitution
|
||||||
|
where Right refRegexp = compileM (BSU.fromString "\\\\\\d+") []
|
||||||
|
indexables = mainMatch : subMatches
|
||||||
|
handleRef :: Text -> Text
|
||||||
|
handleRef ref =
|
||||||
|
let ix = (read $ tail $ unpack ref)
|
||||||
|
in if ix >= length indexables
|
||||||
|
then (pack "")
|
||||||
|
else indexables !! ix
|
||||||
|
|
||||||
parametrizedOperation :: String -> String -> String
|
parametrizedOperation :: String -> String -> String
|
||||||
parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket]
|
parametrizedOperation opCode opArg = opCode ++ (formatParameter opArg)
|
||||||
|
|
||||||
|
formatParameter :: String -> String
|
||||||
|
formatParameter p = [leftParameterBracket] ++ p ++ [rightParameterBracket]
|
||||||
|
|
||||||
applyPreprocessingOperations :: EvaluationScheme -> Text -> Text
|
applyPreprocessingOperations :: EvaluationScheme -> Text -> Text
|
||||||
applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations
|
applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations
|
||||||
@ -82,4 +146,7 @@ applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text
|
|||||||
applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
|
applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
|
||||||
applyPreprocessingOperation LowerCasing = toLower
|
applyPreprocessingOperation LowerCasing = toLower
|
||||||
applyPreprocessingOperation UpperCasing = toUpper
|
applyPreprocessingOperation UpperCasing = toUpper
|
||||||
|
applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
|
||||||
applyPreprocessingOperation (SetName _) = id
|
applyPreprocessingOperation (SetName _) = id
|
||||||
|
applyPreprocessingOperation (SetPriority _) = id
|
||||||
|
applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition
|
||||||
|
@ -16,11 +16,14 @@ module GEval.LineByLine
|
|||||||
runDiffGeneralized,
|
runDiffGeneralized,
|
||||||
LineRecord(..),
|
LineRecord(..),
|
||||||
ResultOrdering(..),
|
ResultOrdering(..),
|
||||||
justTokenize
|
justTokenize,
|
||||||
|
worstFeaturesPipeline,
|
||||||
|
runOracleItemBased
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GEval.Core
|
import GEval.Core
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
|
import GEval.EvaluationScheme
|
||||||
import Text.Tokenizer
|
import Text.Tokenizer
|
||||||
|
|
||||||
import Data.Conduit.AutoDecompress (doNothing)
|
import Data.Conduit.AutoDecompress (doNothing)
|
||||||
@ -33,10 +36,11 @@ import Data.Text
|
|||||||
import Data.Text.Encoding
|
import Data.Text.Encoding
|
||||||
import Data.Conduit.Rank
|
import Data.Conduit.Rank
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Either (rights)
|
||||||
|
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
|
|
||||||
import Data.List (sortBy, sortOn, sort, concat)
|
import Data.List (sortBy, sortOn, sort, concat, maximumBy)
|
||||||
|
|
||||||
import Control.Monad.IO.Class
|
import Control.Monad.IO.Class
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
@ -83,7 +87,6 @@ parseReferenceEntry :: Text -> (Integer, Text)
|
|||||||
parseReferenceEntry line = (read $ unpack refId, t)
|
parseReferenceEntry line = (read $ unpack refId, t)
|
||||||
where [refId, t] = splitOn "\t" line
|
where [refId, t] = splitOn "\t" line
|
||||||
|
|
||||||
|
|
||||||
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
|
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
|
||||||
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
@ -107,28 +110,26 @@ runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank,
|
|||||||
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
||||||
|
|
||||||
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences)
|
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences consumFeatures)
|
||||||
where ordering' = forceSomeOrdering ordering
|
where ordering' = forceSomeOrdering ordering
|
||||||
|
|
||||||
|
consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
|
||||||
|
.| CC.unlinesAscii
|
||||||
|
.| CC.stdout
|
||||||
|
|
||||||
|
worstFeaturesPipeline :: Bool
|
||||||
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
-> GEvalSpecification
|
||||||
worstFeaturesPipeline reversed spec bbdo mReferences = rank (lessByMetric reversed $ gesMainMetric spec)
|
-> BlackBoxDebuggingOptions
|
||||||
|
-> Maybe References
|
||||||
|
-> ConduitT FeatureWithPValue Void (ResourceT IO) ()
|
||||||
|
-> ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
|
worstFeaturesPipeline reversed spec bbdo mReferences consum = rank (lessByMetric reversed $ gesMainMetric spec)
|
||||||
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
|
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
|
||||||
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
||||||
.| gobbleAndDo (sortBy featureOrder)
|
.| gobbleAndDo (sortBy featureOrder)
|
||||||
.| filtreCartesian (bbdoCartesian bbdo)
|
.| filtreCartesian (bbdoCartesian bbdo)
|
||||||
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
|
.| consum
|
||||||
.| CC.unlinesAscii
|
where featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
|
||||||
.| CC.stdout
|
|
||||||
where formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
|
||||||
formatScore score,
|
|
||||||
escapeTabs inp,
|
|
||||||
escapeTabs exp,
|
|
||||||
escapeTabs out]
|
|
||||||
formatScore :: MetricValue -> Text
|
|
||||||
formatScore = Data.Text.pack . printf "%f"
|
|
||||||
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
|
|
||||||
p1 `compare` p2
|
p1 `compare` p2
|
||||||
|
|
||||||
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
|
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
|
||||||
@ -359,8 +360,9 @@ runLineByLineGeneralized ordering spec consum = do
|
|||||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
|
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
|
||||||
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
|
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
|
||||||
where metric = gesMainMetric spec
|
where metric = gesMainMetric spec
|
||||||
|
scheme = gesMainScheme spec
|
||||||
mSelector = gesSelector spec
|
mSelector = gesSelector spec
|
||||||
preprocess = gesPreprocess spec
|
preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
|
||||||
sorter KeepTheOriginalOrder = doNothing
|
sorter KeepTheOriginalOrder = doNothing
|
||||||
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
||||||
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
||||||
@ -393,6 +395,31 @@ runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering
|
|||||||
formatScoreDiff :: Double -> Text
|
formatScoreDiff :: Double -> Text
|
||||||
formatScoreDiff = Data.Text.pack . printf "%f"
|
formatScoreDiff = Data.Text.pack . printf "%f"
|
||||||
|
|
||||||
|
runOracleItemBased :: GEvalSpecification -> IO ()
|
||||||
|
runOracleItemBased spec = runMultiOutputGeneralized spec consum
|
||||||
|
where consum = CL.map picker .| format
|
||||||
|
picker = maximumBy (\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) -> metricCompare metric scoreA scoreB)
|
||||||
|
format = CL.map (encodeUtf8 . formatOutput)
|
||||||
|
.| CC.unlinesAscii
|
||||||
|
.| CC.stdout
|
||||||
|
formatOutput (LineRecord _ _ out _ _) = out
|
||||||
|
metric = gesMainMetric spec
|
||||||
|
|
||||||
|
runMultiOutputGeneralized :: GEvalSpecification -> ConduitT [LineRecord] Void (ResourceT IO) () -> IO ()
|
||||||
|
runMultiOutputGeneralized spec consum = do
|
||||||
|
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
|
||||||
|
let (Just altOuts) = gesAltOutFiles spec
|
||||||
|
altSourceSpecs' <- mapM (getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv") altOuts
|
||||||
|
let altSourceSpecs = rights altSourceSpecs'
|
||||||
|
let sourceSpecs = (outSource:altSourceSpecs)
|
||||||
|
let sources = Prelude.map (gevalLineByLineSource metric mSelector preprocess inputSource expectedSource) sourceSpecs
|
||||||
|
runResourceT $ runConduit $
|
||||||
|
(sequenceSources sources .| consum)
|
||||||
|
where metric = gesMainMetric spec
|
||||||
|
scheme = gesMainScheme spec
|
||||||
|
preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
|
||||||
|
mSelector = gesSelector spec
|
||||||
|
|
||||||
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
|
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
|
||||||
where ordering' = forceSomeOrdering ordering
|
where ordering' = forceSomeOrdering ordering
|
||||||
@ -402,7 +429,7 @@ runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized order
|
|||||||
FirstTheBest -> True
|
FirstTheBest -> True
|
||||||
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
consum = \mReferences -> CC.map prepareFakeLineRecord
|
consum = \mReferences -> CC.map prepareFakeLineRecord
|
||||||
.| (worstFeaturesPipeline reversed spec bbdo mReferences)
|
.| (worstFeaturesPipeline reversed spec bbdo mReferences consumFeatures)
|
||||||
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
||||||
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
||||||
LineRecord inp exp out c (score - scorePrev)
|
LineRecord inp exp out c (score - scorePrev)
|
||||||
|
@ -8,7 +8,8 @@ module GEval.Metric
|
|||||||
bestPossibleValue,
|
bestPossibleValue,
|
||||||
perfectOutLineFromExpectedLine,
|
perfectOutLineFromExpectedLine,
|
||||||
fixedNumberOfColumnsInExpected,
|
fixedNumberOfColumnsInExpected,
|
||||||
fixedNumberOfColumnsInInput)
|
fixedNumberOfColumnsInInput,
|
||||||
|
metricCompare)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Data.Word
|
import Data.Word
|
||||||
@ -28,7 +29,12 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
|
|||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
|
||||||
|
| ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
||||||
|
-- it would be better to avoid infinite recursion here
|
||||||
|
-- `Mean (Mean BLEU)` is not useful, but as it would mean
|
||||||
|
-- a larger refactor, we will postpone this
|
||||||
|
| Mean Metric
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -73,8 +79,12 @@ instance Show Metric where
|
|||||||
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
||||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||||
|
show (Mean metric) = "Mean/" ++ (show metric)
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
|
readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of
|
||||||
|
[(metric, theRest)] -> [(Mean metric, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||||
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
||||||
@ -162,6 +172,12 @@ getMetricOrdering SMAPE = TheLowerTheBetter
|
|||||||
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
||||||
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
|
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
|
||||||
|
getMetricOrdering (Mean metric) = getMetricOrdering metric
|
||||||
|
|
||||||
|
metricCompare :: Metric -> MetricValue -> MetricValue -> Ordering
|
||||||
|
metricCompare metric a b = metricCompare' (getMetricOrdering metric) a b
|
||||||
|
where metricCompare' TheHigherTheBetter a b = a `compare` b
|
||||||
|
metricCompare' TheLowerTheBetter a b = b `compare` a
|
||||||
|
|
||||||
bestPossibleValue :: Metric -> MetricValue
|
bestPossibleValue :: Metric -> MetricValue
|
||||||
bestPossibleValue metric = case getMetricOrdering metric of
|
bestPossibleValue metric = case getMetricOrdering metric of
|
||||||
@ -169,18 +185,21 @@ bestPossibleValue metric = case getMetricOrdering metric of
|
|||||||
TheHigherTheBetter -> 1.0
|
TheHigherTheBetter -> 1.0
|
||||||
|
|
||||||
fixedNumberOfColumnsInExpected :: Metric -> Bool
|
fixedNumberOfColumnsInExpected :: Metric -> Bool
|
||||||
|
fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected metric
|
||||||
fixedNumberOfColumnsInExpected MAP = False
|
fixedNumberOfColumnsInExpected MAP = False
|
||||||
fixedNumberOfColumnsInExpected BLEU = False
|
fixedNumberOfColumnsInExpected BLEU = False
|
||||||
fixedNumberOfColumnsInExpected GLEU = False
|
fixedNumberOfColumnsInExpected GLEU = False
|
||||||
fixedNumberOfColumnsInExpected _ = True
|
fixedNumberOfColumnsInExpected _ = True
|
||||||
|
|
||||||
fixedNumberOfColumnsInInput :: Metric -> Bool
|
fixedNumberOfColumnsInInput :: Metric -> Bool
|
||||||
|
fixedNumberOfColumnsInInput (Mean metric) = fixedNumberOfColumnsInInput metric
|
||||||
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
|
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
|
||||||
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
|
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
|
||||||
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
|
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
|
||||||
fixedNumberOfColumnsInInput _ = True
|
fixedNumberOfColumnsInInput _ = True
|
||||||
|
|
||||||
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
|
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
|
||||||
|
perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t
|
||||||
perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0"
|
perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0"
|
||||||
perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
|
perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
|
||||||
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
|
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
|
||||||
|
@ -222,6 +222,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
|||||||
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
|
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
|
||||||
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
|
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
|
||||||
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
||||||
|
ItemIntermediateRepresentationType AWER = (Int, Int)
|
||||||
ItemIntermediateRepresentationType t = Double
|
ItemIntermediateRepresentationType t = Double
|
||||||
|
|
||||||
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
||||||
|
@ -48,6 +48,7 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
MultiLabelFMeasure 1.0,
|
MultiLabelFMeasure 1.0,
|
||||||
MultiLabelFMeasure 2.0,
|
MultiLabelFMeasure 2.0,
|
||||||
MultiLabelFMeasure 0.25,
|
MultiLabelFMeasure 0.25,
|
||||||
|
Mean (MultiLabelFMeasure 1.0),
|
||||||
ProbabilisticMultiLabelFMeasure 1.0,
|
ProbabilisticMultiLabelFMeasure 1.0,
|
||||||
ProbabilisticMultiLabelFMeasure 2.0,
|
ProbabilisticMultiLabelFMeasure 2.0,
|
||||||
ProbabilisticMultiLabelFMeasure 0.25,
|
ProbabilisticMultiLabelFMeasure 0.25,
|
||||||
|
@ -39,6 +39,7 @@ import GEval.Validation
|
|||||||
import Data.List (intercalate)
|
import Data.List (intercalate)
|
||||||
|
|
||||||
import Data.Conduit.SmartSource
|
import Data.Conduit.SmartSource
|
||||||
|
import Data.CartesianStrings
|
||||||
|
|
||||||
fullOptionsParser = info (helper <*> optionsParser)
|
fullOptionsParser = info (helper <*> optionsParser)
|
||||||
(fullDesc
|
(fullDesc
|
||||||
@ -94,6 +95,10 @@ optionsParser = GEvalOptions
|
|||||||
(flag' ListMetrics
|
(flag' ListMetrics
|
||||||
( long "list-metrics"
|
( long "list-metrics"
|
||||||
<> help "List all metrics with their descriptions"))
|
<> help "List all metrics with their descriptions"))
|
||||||
|
<|>
|
||||||
|
(flag' OracleItemBased
|
||||||
|
( long "oracle-item-based"
|
||||||
|
<> help "Generate the best possible output considering outputs given by --out-file and --alt-out-file options (and peeking into the expected file)."))
|
||||||
)
|
)
|
||||||
|
|
||||||
<*> ((flag' FirstTheWorst
|
<*> ((flag' FirstTheWorst
|
||||||
@ -151,6 +156,10 @@ specParser = GEvalSpecification
|
|||||||
<> showDefault
|
<> showDefault
|
||||||
<> metavar "OUT"
|
<> metavar "OUT"
|
||||||
<> help "The name of the file to be evaluated" )
|
<> help "The name of the file to be evaluated" )
|
||||||
|
<*> (optional $ some $ strOption
|
||||||
|
( long "alt-out-file"
|
||||||
|
<> metavar "OUT"
|
||||||
|
<> help "Alternative output file, makes sense only for some options, e.g. --oracle-item-based"))
|
||||||
<*> strOption
|
<*> strOption
|
||||||
( long "expected-file"
|
( long "expected-file"
|
||||||
<> short 'e'
|
<> short 'e'
|
||||||
@ -249,14 +258,12 @@ sel (Just m) _ = m
|
|||||||
|
|
||||||
|
|
||||||
metricReader :: Parser [EvaluationScheme]
|
metricReader :: Parser [EvaluationScheme]
|
||||||
metricReader = many $ option auto -- actually `some` should be used instead of `many`, the problem is that
|
metricReader = concatCartesianStrings <$>
|
||||||
( long "metric" -- --metric might be in the config.txt file...
|
(many $ option auto -- actually `some` should be used instead of `many`, the problem is that
|
||||||
<> short 'm'
|
( long "metric" -- --metric might be in the config.txt file...
|
||||||
<> metavar "METRIC"
|
<> short 'm'
|
||||||
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))
|
<> metavar "METRIC"
|
||||||
|
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)))
|
||||||
|
|
||||||
-- RMSE, MSE, MAE, SMAPE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25), probabilistic soft F-measure (specify as Probabilistic-Soft-F1, Probabilistic-Soft-F2, Probabilistic-Soft-F0.25) or CharMatch" )
|
|
||||||
|
|
||||||
altMetricReader :: Parser (Maybe EvaluationScheme)
|
altMetricReader :: Parser (Maybe EvaluationScheme)
|
||||||
altMetricReader = optional $ option auto
|
altMetricReader = optional $ option auto
|
||||||
@ -362,6 +369,9 @@ runGEval''' (Just Validate) _ _ spec _ _ = do
|
|||||||
runGEval''' (Just ListMetrics) _ _ _ _ _ = do
|
runGEval''' (Just ListMetrics) _ _ _ _ _ = do
|
||||||
listMetrics
|
listMetrics
|
||||||
return Nothing
|
return Nothing
|
||||||
|
runGEval''' (Just OracleItemBased) _ _ spec _ _ = do
|
||||||
|
runOracleItemBased spec
|
||||||
|
return Nothing
|
||||||
|
|
||||||
getGraphFilename :: Int -> FilePath -> FilePath
|
getGraphFilename :: Int -> FilePath -> FilePath
|
||||||
getGraphFilename 0 fp = fp
|
getGraphFilename 0 fp = fp
|
||||||
|
@ -76,16 +76,13 @@ validationChallenge challengeDirectory spec = do
|
|||||||
checkCorrectFile gitignoreFile
|
checkCorrectFile gitignoreFile
|
||||||
checkCorrectFile readmeFile
|
checkCorrectFile readmeFile
|
||||||
testDirectories <- findTestDirs challengeDirectory
|
testDirectories <- findTestDirs challengeDirectory
|
||||||
checkTestDirectories mainMetric testDirectories
|
checkTestDirectories spec testDirectories
|
||||||
checkTrainDirectory mainMetric challengeDirectory
|
checkTrainDirectory spec challengeDirectory
|
||||||
|
|
||||||
mapM_ (runOnTest spec) testDirectories
|
|
||||||
|
|
||||||
where
|
where
|
||||||
configFile = challengeDirectory </> "config.txt"
|
configFile = challengeDirectory </> "config.txt"
|
||||||
gitignoreFile = challengeDirectory </> ".gitignore"
|
gitignoreFile = challengeDirectory </> ".gitignore"
|
||||||
readmeFile = challengeDirectory </> "README.md"
|
readmeFile = challengeDirectory </> "README.md"
|
||||||
mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec
|
|
||||||
|
|
||||||
checkCorrectFile :: FilePath -> IO ()
|
checkCorrectFile :: FilePath -> IO ()
|
||||||
checkCorrectFile filePath = do
|
checkCorrectFile filePath = do
|
||||||
@ -147,7 +144,8 @@ never :: FindClause Bool
|
|||||||
never = depth ==? 0
|
never = depth ==? 0
|
||||||
|
|
||||||
testDirFilter :: FindClause Bool
|
testDirFilter :: FindClause Bool
|
||||||
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
|
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
|
||||||
|
||? SFF.fileName ~~? "test-*")
|
||||||
|
|
||||||
fileFilter :: String -> FindClause Bool
|
fileFilter :: String -> FindClause Bool
|
||||||
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
||||||
@ -155,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN
|
|||||||
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
|
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
|
||||||
|
|
||||||
|
|
||||||
checkTestDirectories :: Metric -> [FilePath] -> IO ()
|
checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO ()
|
||||||
checkTestDirectories _ [] = throwM NoTestDirectories
|
checkTestDirectories _ [] = throwM NoTestDirectories
|
||||||
checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories
|
checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories
|
||||||
|
|
||||||
checkTestDirectory :: Metric -> FilePath -> IO ()
|
checkTestDirectory :: GEvalSpecification -> FilePath -> IO ()
|
||||||
checkTestDirectory metric directoryPath = do
|
checkTestDirectory spec directoryPath = do
|
||||||
inputFiles <- findInputFiles directoryPath
|
inputFiles <- findInputFiles directoryPath
|
||||||
when (null inputFiles) $ throw $ NoInputFile inputFile
|
when (null inputFiles) $ throw $ NoInputFile inputFile
|
||||||
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
|
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
|
||||||
@ -180,21 +178,29 @@ checkTestDirectory metric directoryPath = do
|
|||||||
|
|
||||||
outputFiles <- findOutputFiles directoryPath
|
outputFiles <- findOutputFiles directoryPath
|
||||||
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
|
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
|
||||||
|
|
||||||
|
runOnTest spec directoryPath
|
||||||
|
|
||||||
where
|
where
|
||||||
|
metric = evaluationSchemeMetric $ head $ gesMetrics spec
|
||||||
inputFile = directoryPath </> defaultInputFile
|
inputFile = directoryPath </> defaultInputFile
|
||||||
|
|
||||||
expectedFile = directoryPath </> defaultExpectedFile
|
expectedFile = directoryPath </> defaultExpectedFile
|
||||||
|
|
||||||
checkTrainDirectory :: Metric -> FilePath -> IO ()
|
checkTrainDirectory :: GEvalSpecification -> FilePath -> IO ()
|
||||||
checkTrainDirectory metric challengeDirectory = do
|
checkTrainDirectory spec challengeDirectory = do
|
||||||
let trainDirectory = challengeDirectory </> "train"
|
let trainDirectory = challengeDirectory </> "train"
|
||||||
whenM (doesDirectoryExist trainDirectory) $ do
|
whenM (doesDirectoryExist trainDirectory) $ do
|
||||||
trainFiles <- findTrainFiles trainDirectory
|
trainFiles <- findTrainFiles trainDirectory
|
||||||
when (null trainFiles) $ throw $ NoInputFile "train.tsv"
|
if (not $ null trainFiles)
|
||||||
when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles
|
then
|
||||||
let [trainFile] = trainFiles
|
do
|
||||||
checkCorrectFile trainFile
|
putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in"
|
||||||
when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do
|
putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)."
|
||||||
checkColumns trainFile
|
putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)"
|
||||||
|
else
|
||||||
|
do
|
||||||
|
runOnTest spec trainDirectory
|
||||||
|
|
||||||
checkColumns :: FilePath -> IO ()
|
checkColumns :: FilePath -> IO ()
|
||||||
checkColumns filePath = do
|
checkColumns filePath = do
|
||||||
|
@ -5,8 +5,8 @@ module GEval.WER
|
|||||||
import Data.Array
|
import Data.Array
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
|
|
||||||
werStep :: Eq a => [a] -> [a] -> Double
|
werStep :: Eq a => [a] -> [a] -> (Int, Int)
|
||||||
werStep expected got = (fromIntegral $ distance expected got) `safeDoubleDiv` (fromIntegral $ length expected)
|
werStep expected got = (distance expected got, length expected)
|
||||||
|
|
||||||
-- see https://stackoverflow.com/questions/6718787/levenshtein-distance-cost
|
-- see https://stackoverflow.com/questions/6718787/levenshtein-distance-cost
|
||||||
distance u v = memo ! (m, n)
|
distance u v = memo ! (m, n)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
flags: {}
|
flags: {}
|
||||||
packages:
|
packages:
|
||||||
- '.'
|
- '.'
|
||||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1]
|
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1,'ordered-containers-0.2.2@sha256:ebf2be3f592d9cf148ea6b8375f8af97148d44f82d8d04476899285e965afdbf,810']
|
||||||
resolver: lts-12.26
|
resolver: lts-12.26
|
||||||
|
45
test/Spec.hs
45
test/Spec.hs
@ -64,6 +64,8 @@ import qualified Data.Vector.Unboxed as DVU
|
|||||||
import qualified Statistics.Matrix.Types as SMT
|
import qualified Statistics.Matrix.Types as SMT
|
||||||
import Data.Statistics.Loess (loess)
|
import Data.Statistics.Loess (loess)
|
||||||
import Data.Statistics.Calibration (calibration)
|
import Data.Statistics.Calibration (calibration)
|
||||||
|
import Data.CartesianStrings (parseCartesianString)
|
||||||
|
import Data.SplitIntoCrossTabs (splitIntoCrossTabs, CrossTab(..), TextFrag(..))
|
||||||
|
|
||||||
informationRetrievalBookExample :: [(String, Int)]
|
informationRetrievalBookExample :: [(String, Int)]
|
||||||
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||||
@ -127,6 +129,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||||
it "with probs" $
|
it "with probs" $
|
||||||
runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4
|
runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4
|
||||||
|
it "sorted" $
|
||||||
|
runGEvalTest "accuracy-on-sorted" `shouldReturnAlmost` 0.75
|
||||||
describe "F-measure" $ do
|
describe "F-measure" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
||||||
@ -326,12 +330,17 @@ main = hspec $ do
|
|||||||
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
||||||
it "labels given with probs and numbers" $ do
|
it "labels given with probs and numbers" $ do
|
||||||
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
||||||
|
describe "Mean/MultiLabel-F" $ do
|
||||||
|
it "simple" $ do
|
||||||
|
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
describe "MultiLabel-Likelihood" $ do
|
describe "MultiLabel-Likelihood" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
|
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
|
||||||
describe "Preprocessing operations" $ do
|
describe "Preprocessing operations" $ do
|
||||||
it "F1 with preprocessing" $ do
|
it "F1 with preprocessing" $ do
|
||||||
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
|
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
|
||||||
|
it "Regexp substition" $ do
|
||||||
|
runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
|
||||||
describe "evaluating single lines" $ do
|
describe "evaluating single lines" $ do
|
||||||
it "RMSE" $ do
|
it "RMSE" $ do
|
||||||
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
|
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
|
||||||
@ -439,6 +448,7 @@ main = hspec $ do
|
|||||||
gesTestName = "test-A",
|
gesTestName = "test-A",
|
||||||
gesSelector = Nothing,
|
gesSelector = Nothing,
|
||||||
gesOutFile = "out.tsv",
|
gesOutFile = "out.tsv",
|
||||||
|
gesAltOutFiles = Nothing,
|
||||||
gesExpectedFile = "expected.tsv",
|
gesExpectedFile = "expected.tsv",
|
||||||
gesInputFile = "in.tsv",
|
gesInputFile = "in.tsv",
|
||||||
gesMetrics = [EvaluationScheme Likelihood []],
|
gesMetrics = [EvaluationScheme Likelihood []],
|
||||||
@ -666,6 +676,41 @@ main = hspec $ do
|
|||||||
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
|
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
|
||||||
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||||
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||||
|
describe "Cartesian strings" $ do
|
||||||
|
it "singleton" $ do
|
||||||
|
(parseCartesianString "foo") `shouldBe` ["foo"]
|
||||||
|
it "simple" $ do
|
||||||
|
parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"]
|
||||||
|
it "3x2" $ do
|
||||||
|
parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b",
|
||||||
|
"a-bar-c", "a-baz-b", "a-baz-c" ]
|
||||||
|
it "3x2x3" $ do
|
||||||
|
parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x",
|
||||||
|
"foo-c-0x", "foo-c-1x", "foo-c-2x",
|
||||||
|
"bar-b-0x", "bar-b-1x", "bar-b-2x",
|
||||||
|
"bar-c-0x", "bar-c-1x", "bar-c-2x",
|
||||||
|
"ba-b-0x", "ba-b-1x", "ba-b-2x",
|
||||||
|
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
|
||||||
|
describe "cross-tabs" $ do
|
||||||
|
it "singleton" $ do
|
||||||
|
splitIntoCrossTabs ["abababab"] `shouldBe` [SingleItem "abababab"]
|
||||||
|
it "too small" $ do
|
||||||
|
splitIntoCrossTabs ["aabb", "aacc"] `shouldBe` [SingleItem "aabb", SingleItem "aacc"]
|
||||||
|
it "two tables" $ do
|
||||||
|
splitIntoCrossTabs ["yABC", "xx00", "yABD", "ZC", "xx11", "yy00", "yy11", "ZD"] `shouldBe` [
|
||||||
|
CrossTab [Prefix "yAB", Prefix "Z"] [Suffix "C", Suffix "D"],
|
||||||
|
CrossTab [Prefix "xx", Prefix "yy"] [Suffix "00", Suffix "11"]]
|
||||||
|
it "simple" $ do
|
||||||
|
splitIntoCrossTabs ["aabsolutely",
|
||||||
|
"aaafoo",
|
||||||
|
"other",
|
||||||
|
"aaabaz",
|
||||||
|
"aaabaq",
|
||||||
|
"bbbfoo",
|
||||||
|
"bbbbaz",
|
||||||
|
"bbbbaq"] `shouldBe` [SingleItem "aabsolutely",
|
||||||
|
CrossTab [Suffix "foo", Suffix "baz", Suffix "baq"] [Prefix "aaa", Prefix "bbb"],
|
||||||
|
SingleItem "other"]
|
||||||
|
|
||||||
checkConduitPure conduit inList expList = do
|
checkConduitPure conduit inList expList = do
|
||||||
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
|
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
foo baz bar
|
||||||
|
|
||||||
|
xyz aaa
|
||||||
|
2 a:1 3
|
|
1
test/accuracy-on-sorted/accuracy-on-sorted/config.txt
Normal file
1
test/accuracy-on-sorted/accuracy-on-sorted/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Accuracy:S
|
@ -0,0 +1,4 @@
|
|||||||
|
bar baz foo
|
||||||
|
|
||||||
|
xyz
|
||||||
|
a:1 2 3
|
|
@ -0,0 +1,5 @@
|
|||||||
|
b88 b901
|
||||||
|
a100
|
||||||
|
a93
|
||||||
|
t34
|
||||||
|
y23
|
|
1
test/accuracy-with-flags/accuracy-with-flags/config.txt
Normal file
1
test/accuracy-with-flags/accuracy-with-flags/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Accuracy:s<[abc](\d+)><!\1>
|
@ -0,0 +1,5 @@
|
|||||||
|
a88 b901
|
||||||
|
c100
|
||||||
|
b93
|
||||||
|
t34
|
||||||
|
z23
|
|
@ -0,0 +1,4 @@
|
|||||||
|
foo bar baz
|
||||||
|
uuu
|
||||||
|
foo bar baz
|
||||||
|
qqq aaa
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric Mean/MultiLabel-F1
|
@ -0,0 +1,4 @@
|
|||||||
|
foo bar baz
|
||||||
|
|
||||||
|
foo
|
||||||
|
qqq qqq
|
|
@ -0,0 +1,4 @@
|
|||||||
|
A
|
||||||
|
C
|
||||||
|
D
|
||||||
|
D
|
|
@ -0,0 +1,4 @@
|
|||||||
|
D
|
||||||
|
C
|
||||||
|
B
|
||||||
|
A
|
|
@ -0,0 +1,4 @@
|
|||||||
|
B
|
||||||
|
A
|
||||||
|
C
|
||||||
|
A
|
|
1
test/oracle-item-based/oracle-item-based/config.txt
Normal file
1
test/oracle-item-based/oracle-item-based/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Accuracy
|
@ -0,0 +1,4 @@
|
|||||||
|
A
|
||||||
|
B
|
||||||
|
C
|
||||||
|
D
|
|
4
test/oracle-item-based/oracle-item-based/test-A/in.tsv
Normal file
4
test/oracle-item-based/oracle-item-based/test-A/in.tsv
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
|
@ -0,0 +1,4 @@
|
|||||||
|
A
|
||||||
|
C
|
||||||
|
C
|
||||||
|
D
|
|
@ -1 +1 @@
|
|||||||
--metric WER
|
--metric Mean/WER
|
||||||
|
Loading…
Reference in New Issue
Block a user