add --just-tokenize option
This commit is contained in:
parent
3a68324a6e
commit
0871b57bbc
@ -230,10 +230,12 @@ getExpectedDirectory :: GEvalSpecification -> FilePath
|
||||
getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
|
||||
where outDirectory = gesOutDirectory spec
|
||||
|
||||
-- | Special command, not just running the regular evaluation.
|
||||
-- See OptionsParser.hs for more information.
|
||||
data GEvalSpecialCommand = Init
|
||||
| LineByLine | WorstFeatures
|
||||
| Diff FilePath | MostWorseningFeatures FilePath
|
||||
| PrintVersion
|
||||
| PrintVersion | JustTokenize
|
||||
|
||||
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
||||
|
||||
|
@ -15,16 +15,19 @@ module GEval.LineByLine
|
||||
runMostWorseningFeatures,
|
||||
runDiffGeneralized,
|
||||
LineRecord(..),
|
||||
ResultOrdering(..)
|
||||
ResultOrdering(..),
|
||||
justTokenize
|
||||
) where
|
||||
|
||||
import GEval.Core
|
||||
import Text.Tokenizer
|
||||
|
||||
import Data.Conduit.AutoDecompress (doNothing)
|
||||
|
||||
import Data.Conduit
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Combinators as CC
|
||||
import qualified Data.Conduit.Text as CT
|
||||
import Data.Text
|
||||
import Data.Text.Encoding
|
||||
import Data.Conduit.Rank
|
||||
@ -288,3 +291,16 @@ gevalLineByLineSource metric preprocess inputSource expectedSource outSource =
|
||||
(LineInFile expectedSource lineNo exp)
|
||||
(LineInFile outSource lineNo out)
|
||||
return $ LineRecord inp exp out lineNo s
|
||||
|
||||
justTokenize :: Maybe Tokenizer -> IO ()
|
||||
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"
|
||||
justTokenize (Just tokenizer) =
|
||||
runResourceT
|
||||
$ runConduit
|
||||
$ CC.stdin
|
||||
.| CC.decodeUtf8Lenient
|
||||
.| CT.lines
|
||||
.| CC.map (tokenizeWithSpaces (Just tokenizer))
|
||||
.| CC.unlines
|
||||
.| CC.encodeUtf8
|
||||
.| CC.stdout
|
||||
|
@ -58,12 +58,17 @@ optionsParser = GEvalOptions
|
||||
( long "diff"
|
||||
<> short 'd'
|
||||
<> metavar "OTHER-OUT"
|
||||
<> help "compare results"))
|
||||
<> help "Compare results of evaluations (line by line) for two outputs."))
|
||||
<|>
|
||||
(MostWorseningFeatures <$> strOption
|
||||
( long "most-worsening-features"
|
||||
<> short 'm'
|
||||
<> help "Print a ranking of the \"most worsening\" features, i.e. features that worsen the score the most when comparing outputs from two systems.")))
|
||||
<> help "Print a ranking of the \"most worsening\" features, i.e. features that worsen the score the most when comparing outputs from two systems."))
|
||||
<|>
|
||||
(flag' JustTokenize
|
||||
( long "just-tokenize"
|
||||
<> short 'j'
|
||||
<> help "Just tokenise standard input and print out the tokens (separated by spaces) on the standard output. rather than do any evaluation. The --tokenizer option must be given.")))
|
||||
|
||||
<*> ((flag' FirstTheWorst
|
||||
(long "sort"
|
||||
@ -127,7 +132,7 @@ specParser = GEvalSpecification
|
||||
<*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
|
||||
<*> optional precisionArgParser
|
||||
<*> (optional $ option auto
|
||||
( long "tokenize"
|
||||
( long "tokenizer"
|
||||
<> short 'T'
|
||||
<> metavar "TOKENIZER"
|
||||
<> help "Tokenizer on expected and actual output before running evaluation (makes sense mostly for metrics such BLEU), only 13a tokenizer is implemented so far" ))
|
||||
@ -222,6 +227,9 @@ runGEval''' (Just (Diff otherOut)) ordering spec = do
|
||||
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering spec = do
|
||||
runMostWorseningFeatures ordering otherOut spec
|
||||
return Nothing
|
||||
runGEval''' (Just JustTokenize) _ spec = do
|
||||
justTokenize (gesTokenizer spec)
|
||||
return Nothing
|
||||
|
||||
initChallenge :: GEvalSpecification -> IO ()
|
||||
initChallenge spec = case gesExpectedDirectory spec of
|
||||
|
@ -1 +1 @@
|
||||
--metric BLEU --tokenize 13a
|
||||
--metric BLEU --tokenizer 13a
|
||||
|
Loading…
Reference in New Issue
Block a user