WIP
This commit is contained in:
parent
cea084c789
commit
bdcd26cddc
@ -26,6 +26,7 @@ library
|
|||||||
, GEval.LogLossHashed
|
, GEval.LogLossHashed
|
||||||
, GEval.CharMatch
|
, GEval.CharMatch
|
||||||
, GEval.LineByLine
|
, GEval.LineByLine
|
||||||
|
, Data.Conduit.SmartSource
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
, conduit
|
, conduit
|
||||||
@ -48,6 +49,9 @@ library
|
|||||||
, vector
|
, vector
|
||||||
, mtl
|
, mtl
|
||||||
, edit-distance
|
, edit-distance
|
||||||
|
, bytestring
|
||||||
|
, http-conduit
|
||||||
|
, transformers
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
@ -72,6 +76,9 @@ test-suite geval-test
|
|||||||
, text
|
, text
|
||||||
, attoparsec
|
, attoparsec
|
||||||
, edit-distance
|
, edit-distance
|
||||||
|
, resourcet
|
||||||
|
, conduit
|
||||||
|
, conduit-extra
|
||||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
108
src/Data/Conduit/SmartSource.hs
Normal file
108
src/Data/Conduit/SmartSource.hs
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
{-# LANGUAGE RankNTypes #-}
|
||||||
|
|
||||||
|
module Data.Conduit.SmartSource
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.List (isInfixOf, isPrefixOf, isSuffixOf, elemIndex, elem)
|
||||||
|
import Data.Char (ord)
|
||||||
|
|
||||||
|
import System.FilePath
|
||||||
|
|
||||||
|
import Control.Monad.Trans.Resource (MonadResource)
|
||||||
|
import Data.Conduit
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
import Data.Conduit.Binary (sourceFile)
|
||||||
|
import Network.HTTP.Conduit
|
||||||
|
import Control.Monad.IO.Class
|
||||||
|
import Control.Monad.Trans.Class (lift)
|
||||||
|
|
||||||
|
data SmartSpec = NoSpec
|
||||||
|
| Stdin
|
||||||
|
| FileNameSpec FilePath
|
||||||
|
| FilePathSpec FilePath
|
||||||
|
| Http String
|
||||||
|
| Https String
|
||||||
|
| GitSpec String (Maybe FilePath)
|
||||||
|
| PossiblyGitSpec String
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
--smartSource :: (MonadIO m, MonadResource m) => [FilePath] -> Maybe FilePath -> SmartSpec -> Producer m S.ByteString
|
||||||
|
smartSource defaultDirs defaultFile spec = pureSmartSource defaultDirs spec
|
||||||
|
|
||||||
|
--pureSmartSource :: (MonadIO m, MonadResource m) => [FilePath] -> SmartSpec -> Producer m S.ByteString
|
||||||
|
pureSmartSource _ NoSpec = error "No source specification given"
|
||||||
|
pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
|
||||||
|
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
|
||||||
|
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
|
||||||
|
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
|
||||||
|
pureSmartSource _ (Https url) = httpSource url
|
||||||
|
pureSmartSource _ (Http url) = httpSource url
|
||||||
|
|
||||||
|
httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
||||||
|
httpSource url = do
|
||||||
|
request <- liftIO $ parseRequest url
|
||||||
|
manager <- liftIO $ newManager tlsManagerSettings
|
||||||
|
response <- lift $ http request manager
|
||||||
|
(httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
|
||||||
|
httpsource
|
||||||
|
lift finalizer
|
||||||
|
|
||||||
|
parseSmartSpec :: FilePath -> SmartSpec
|
||||||
|
parseSmartSpec "" = NoSpec
|
||||||
|
parseSmartSpec "-" = Stdin
|
||||||
|
parseSmartSpec spec
|
||||||
|
| "http://" `isPrefixOf` spec = Http spec
|
||||||
|
| "https://" `isPrefixOf` spec = Https spec
|
||||||
|
| otherwise = case elemIndex ':' spec of
|
||||||
|
Just ix -> let ref = take ix spec in
|
||||||
|
if checkRefFormat ref
|
||||||
|
then
|
||||||
|
GitSpec ref (if ix == length spec - 1
|
||||||
|
then
|
||||||
|
Nothing
|
||||||
|
else
|
||||||
|
Just $ drop (ix+1) spec)
|
||||||
|
else
|
||||||
|
fileSpec
|
||||||
|
Nothing -> if checkRefFormat spec && not ('/' `elem` spec) && not ('.' `elem` spec)
|
||||||
|
then
|
||||||
|
PossiblyGitSpec spec
|
||||||
|
else
|
||||||
|
fileSpec
|
||||||
|
where fileSpec = (if '/' `elem` spec then FilePathSpec else FileNameSpec) spec
|
||||||
|
|
||||||
|
parseSmartSpecInContext :: [FilePath] -> Maybe FilePath -> String -> Maybe SmartSpec
|
||||||
|
parseSmartSpecInContext defaultDirs defaultFile spec = parseSmartSpecInContext' defaultDirs defaultFile $ parseSmartSpec spec
|
||||||
|
where parseSmartSpecInContext' _ Nothing NoSpec = Nothing
|
||||||
|
parseSmartSpecInContext' [] (Just defaultFile) NoSpec = Just $ FileNameSpec defaultFile
|
||||||
|
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) NoSpec = Just $ FilePathSpec (firstDir </> defaultFile)
|
||||||
|
|
||||||
|
parseSmartSpecInContext' (firstDir:_) _ (FileNameSpec fileName) = Just $ FilePathSpec (firstDir </> fileName)
|
||||||
|
|
||||||
|
parseSmartSpecInContext' _ Nothing (GitSpec branch Nothing) = Nothing
|
||||||
|
parseSmartSpecInContext' [] (Just defaultFile) (GitSpec branch Nothing) = Just $ GitSpec branch $ Just defaultFile
|
||||||
|
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) (GitSpec branch Nothing)
|
||||||
|
= Just $ GitSpec branch $ Just (firstDir </> defaultFile)
|
||||||
|
|
||||||
|
parseSmartSpecInContext' _ _ parsedSpec = Just parsedSpec
|
||||||
|
|
||||||
|
checkRefFormat :: String -> Bool
|
||||||
|
checkRefFormat ref =
|
||||||
|
not ("./" `isInfixOf` ref) &&
|
||||||
|
not (".lock" `isSuffixOf` ref) &&
|
||||||
|
not (".lock/" `isInfixOf` ref) &&
|
||||||
|
not (".." `isInfixOf` ref) &&
|
||||||
|
not (any isUnwantedChar ref) &&
|
||||||
|
not ("/" `isSuffixOf` ref) &&
|
||||||
|
not ("//" `isInfixOf` ref) &&
|
||||||
|
not ("@{" `isInfixOf` ref) &&
|
||||||
|
ref /= "@"
|
||||||
|
where isUnwantedChar ':' = True
|
||||||
|
isUnwantedChar '?' = True
|
||||||
|
isUnwantedChar '*' = True
|
||||||
|
isUnwantedChar '[' = True
|
||||||
|
isUnwantedChar '~' = True
|
||||||
|
isUnwantedChar '^' = True
|
||||||
|
isUnwantedChar '\\' = True
|
||||||
|
isUnwantedChar '\177' = True
|
||||||
|
isUnwantedChar c = ord c < 32
|
@ -60,6 +60,8 @@ import Control.Monad ((<=<))
|
|||||||
|
|
||||||
import Data.Attoparsec.Text (parseOnly)
|
import Data.Attoparsec.Text (parseOnly)
|
||||||
|
|
||||||
|
import Data.Conduit.SmartSource
|
||||||
|
|
||||||
import GEval.BLEU
|
import GEval.BLEU
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import GEval.ClippEU
|
import GEval.ClippEU
|
||||||
@ -259,7 +261,7 @@ checkInputFileIfNeeded _ _ = return ()
|
|||||||
|
|
||||||
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
|
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
|
||||||
fileAsLineSource filePath =
|
fileAsLineSource filePath =
|
||||||
LineSource (CB.sourceFile filePath $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
||||||
|
|
||||||
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
||||||
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
||||||
|
38
test/Spec.hs
38
test/Spec.hs
@ -15,6 +15,12 @@ import Text.EditDistance
|
|||||||
|
|
||||||
import qualified Test.HUnit as HU
|
import qualified Test.HUnit as HU
|
||||||
|
|
||||||
|
import Data.Conduit.SmartSource
|
||||||
|
import qualified Data.Conduit.Text as CT
|
||||||
|
import Data.Conduit
|
||||||
|
import Control.Monad.Trans.Resource
|
||||||
|
import qualified Data.Conduit.List as CL
|
||||||
|
|
||||||
informationRetrievalBookExample :: [(String, Int)]
|
informationRetrievalBookExample :: [(String, Int)]
|
||||||
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||||
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
|
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
|
||||||
@ -187,7 +193,39 @@ main = hspec $ do
|
|||||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
||||||
(LineInFile "stub2" 1 "3.4")
|
(LineInFile "stub2" 1 "3.4")
|
||||||
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
|
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
|
||||||
|
describe "smart sources" $ do
|
||||||
|
it "smart specs are parsed" $ do
|
||||||
|
parseSmartSpec "" `shouldBe` NoSpec
|
||||||
|
parseSmartSpec "-" `shouldBe` Stdin
|
||||||
|
parseSmartSpec "http://gonito.net/foo" `shouldBe` Http "http://gonito.net/foo"
|
||||||
|
parseSmartSpec "https://gonito.net" `shouldBe` Https "https://gonito.net"
|
||||||
|
parseSmartSpec "branch:" `shouldBe` GitSpec "branch" Nothing
|
||||||
|
parseSmartSpec "37be:foo/bar.tsv" `shouldBe` GitSpec "37be" (Just "foo/bar.tsv")
|
||||||
|
parseSmartSpec "bla/xyz:foo/bar.tsv" `shouldBe` GitSpec "bla/xyz" (Just "foo/bar.tsv")
|
||||||
|
parseSmartSpec "out.tsv" `shouldBe` FileNameSpec "out.tsv"
|
||||||
|
parseSmartSpec "dev-1/out.tsv" `shouldBe` FilePathSpec "dev-1/out.tsv"
|
||||||
|
parseSmartSpec "../out.tsv" `shouldBe` FilePathSpec "../out.tsv"
|
||||||
|
parseSmartSpec "4a5f" `shouldBe` PossiblyGitSpec "4a5f"
|
||||||
|
parseSmartSpec "!!" `shouldBe` PossiblyGitSpec "!!"
|
||||||
|
parseSmartSpec "branch" `shouldBe` PossiblyGitSpec "branch"
|
||||||
|
it "smart specs are parsed in context" $ do
|
||||||
|
parseSmartSpecInContext [] Nothing "xyz" `shouldBe` Just (PossiblyGitSpec "xyz")
|
||||||
|
parseSmartSpecInContext ["foo", "bar"] Nothing "out.tsv" `shouldBe` Just (FilePathSpec "foo/out.tsv")
|
||||||
|
parseSmartSpecInContext [] (Just "default") "" `shouldBe` Just (FileNameSpec "default")
|
||||||
|
parseSmartSpecInContext ["foo"] (Just "default") "" `shouldBe` Just (FilePathSpec "foo/default")
|
||||||
|
parseSmartSpecInContext ["foo/bar"] (Just "default") "http://gonito.net" `shouldBe` Just (Http "http://gonito.net")
|
||||||
|
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
|
||||||
|
it "sources are accessed" $ do
|
||||||
|
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
|
||||||
|
readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
||||||
|
["User-agent: *\nDisallow: /deny\n"]
|
||||||
|
|
||||||
|
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
|
||||||
|
readFromSmartSource defaultDirs defaultFile specS = do
|
||||||
|
let (Just spec) = parseSmartSpecInContext defaultDirs defaultFile specS
|
||||||
|
let source = smartSource defaultDirs defaultFile spec
|
||||||
|
contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume)
|
||||||
|
return $ Prelude.map unpack contents
|
||||||
|
|
||||||
neverMatch :: Char -> Int -> Bool
|
neverMatch :: Char -> Int -> Bool
|
||||||
neverMatch _ _ = False
|
neverMatch _ _ = False
|
||||||
|
1
test/files/foo.txt
Normal file
1
test/files/foo.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
foo
|
Loading…
Reference in New Issue
Block a user