66 lines
1.9 KiB
Haskell
66 lines
1.9 KiB
Haskell
|
{-# LANGUAGE OverloadedStrings #-}
|
||
|
|
||
|
module GEval.ProbList
|
||
|
(parseIntoProbList, selectByStandardThreshold)
|
||
|
where
|
||
|
|
||
|
import qualified Data.Text as T
|
||
|
|
||
|
import GEval.Common
|
||
|
|
||
|
|
||
|
newtype Probability = P { getP :: Double }
|
||
|
deriving (Eq,Ord,Show)
|
||
|
|
||
|
isProbability :: Double -> Bool
|
||
|
isProbability p = 0.0 <= p && p <= 1.0
|
||
|
|
||
|
mkProbability :: Double -> Probability
|
||
|
mkProbability p
|
||
|
| isProbability p = P p
|
||
|
| otherwise = error $ show p ++ " is not in [0, 1]"
|
||
|
|
||
|
probabilityOne :: Probability
|
||
|
probabilityOne = mkProbability 1.0
|
||
|
|
||
|
data ProbList = ProbList [WordWithProb]
|
||
|
deriving (Show)
|
||
|
|
||
|
data WordWithProb = WordWithProb T.Text Probability
|
||
|
deriving (Show)
|
||
|
|
||
|
parseIntoWordWithProb :: T.Text -> WordWithProb
|
||
|
parseIntoWordWithProb t =
|
||
|
-- If anything is wrong the whole word is treated as a label,
|
||
|
-- even if it contains colons, digits, dots, etc.
|
||
|
if T.null wordSpecPart
|
||
|
then wordWithoutProb
|
||
|
else
|
||
|
if "." `T.isInfixOf` numberPart
|
||
|
then case textToDouble numberPart of
|
||
|
Right p -> if isProbability p
|
||
|
then WordWithProb wordSpecPart (mkProbability p)
|
||
|
else wordWithoutProb
|
||
|
Left _ -> wordWithoutProb
|
||
|
else wordWithoutProb
|
||
|
where
|
||
|
(wordSpecPart', numberPart) = T.breakOnEnd ":" t
|
||
|
wordWithoutProb = WordWithProb t probabilityOne
|
||
|
wordSpecPart = if T.null wordSpecPart'
|
||
|
then wordSpecPart'
|
||
|
else T.init wordSpecPart'
|
||
|
|
||
|
parseIntoProbList :: T.Text -> ProbList
|
||
|
parseIntoProbList = ProbList . map parseIntoWordWithProb . T.words
|
||
|
|
||
|
selectByThreshold :: Probability -> ProbList -> [T.Text]
|
||
|
selectByThreshold threshold (ProbList l) =
|
||
|
map (\(WordWithProb w _) -> w)
|
||
|
$ filter (\(WordWithProb _ p) -> p >= threshold) l
|
||
|
|
||
|
standardThreshold :: Double
|
||
|
standardThreshold = 0.5
|
||
|
|
||
|
selectByStandardThreshold :: ProbList -> [T.Text]
|
||
|
selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
|