WIP
This commit is contained in:
parent
cea084c789
commit
bdcd26cddc
@ -26,6 +26,7 @@ library
|
||||
, GEval.LogLossHashed
|
||||
, GEval.CharMatch
|
||||
, GEval.LineByLine
|
||||
, Data.Conduit.SmartSource
|
||||
build-depends: base >= 4.7 && < 5
|
||||
, cond
|
||||
, conduit
|
||||
@ -48,6 +49,9 @@ library
|
||||
, vector
|
||||
, mtl
|
||||
, edit-distance
|
||||
, bytestring
|
||||
, http-conduit
|
||||
, transformers
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
@ -72,6 +76,9 @@ test-suite geval-test
|
||||
, text
|
||||
, attoparsec
|
||||
, edit-distance
|
||||
, resourcet
|
||||
, conduit
|
||||
, conduit-extra
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
default-language: Haskell2010
|
||||
|
||||
|
108
src/Data/Conduit/SmartSource.hs
Normal file
108
src/Data/Conduit/SmartSource.hs
Normal file
@ -0,0 +1,108 @@
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Data.Conduit.SmartSource
|
||||
where
|
||||
|
||||
import Data.List (isInfixOf, isPrefixOf, isSuffixOf, elemIndex, elem)
|
||||
import Data.Char (ord)
|
||||
|
||||
import System.FilePath
|
||||
|
||||
import Control.Monad.Trans.Resource (MonadResource)
|
||||
import Data.Conduit
|
||||
import qualified Data.ByteString as S
|
||||
import Data.Conduit.Binary (sourceFile)
|
||||
import Network.HTTP.Conduit
|
||||
import Control.Monad.IO.Class
|
||||
import Control.Monad.Trans.Class (lift)
|
||||
|
||||
data SmartSpec = NoSpec
|
||||
| Stdin
|
||||
| FileNameSpec FilePath
|
||||
| FilePathSpec FilePath
|
||||
| Http String
|
||||
| Https String
|
||||
| GitSpec String (Maybe FilePath)
|
||||
| PossiblyGitSpec String
|
||||
deriving (Eq, Show)
|
||||
|
||||
--smartSource :: (MonadIO m, MonadResource m) => [FilePath] -> Maybe FilePath -> SmartSpec -> Producer m S.ByteString
|
||||
smartSource defaultDirs defaultFile spec = pureSmartSource defaultDirs spec
|
||||
|
||||
--pureSmartSource :: (MonadIO m, MonadResource m) => [FilePath] -> SmartSpec -> Producer m S.ByteString
|
||||
pureSmartSource _ NoSpec = error "No source specification given"
|
||||
pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
|
||||
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
|
||||
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
|
||||
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
|
||||
pureSmartSource _ (Https url) = httpSource url
|
||||
pureSmartSource _ (Http url) = httpSource url
|
||||
|
||||
httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
||||
httpSource url = do
|
||||
request <- liftIO $ parseRequest url
|
||||
manager <- liftIO $ newManager tlsManagerSettings
|
||||
response <- lift $ http request manager
|
||||
(httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
|
||||
httpsource
|
||||
lift finalizer
|
||||
|
||||
parseSmartSpec :: FilePath -> SmartSpec
|
||||
parseSmartSpec "" = NoSpec
|
||||
parseSmartSpec "-" = Stdin
|
||||
parseSmartSpec spec
|
||||
| "http://" `isPrefixOf` spec = Http spec
|
||||
| "https://" `isPrefixOf` spec = Https spec
|
||||
| otherwise = case elemIndex ':' spec of
|
||||
Just ix -> let ref = take ix spec in
|
||||
if checkRefFormat ref
|
||||
then
|
||||
GitSpec ref (if ix == length spec - 1
|
||||
then
|
||||
Nothing
|
||||
else
|
||||
Just $ drop (ix+1) spec)
|
||||
else
|
||||
fileSpec
|
||||
Nothing -> if checkRefFormat spec && not ('/' `elem` spec) && not ('.' `elem` spec)
|
||||
then
|
||||
PossiblyGitSpec spec
|
||||
else
|
||||
fileSpec
|
||||
where fileSpec = (if '/' `elem` spec then FilePathSpec else FileNameSpec) spec
|
||||
|
||||
parseSmartSpecInContext :: [FilePath] -> Maybe FilePath -> String -> Maybe SmartSpec
|
||||
parseSmartSpecInContext defaultDirs defaultFile spec = parseSmartSpecInContext' defaultDirs defaultFile $ parseSmartSpec spec
|
||||
where parseSmartSpecInContext' _ Nothing NoSpec = Nothing
|
||||
parseSmartSpecInContext' [] (Just defaultFile) NoSpec = Just $ FileNameSpec defaultFile
|
||||
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) NoSpec = Just $ FilePathSpec (firstDir </> defaultFile)
|
||||
|
||||
parseSmartSpecInContext' (firstDir:_) _ (FileNameSpec fileName) = Just $ FilePathSpec (firstDir </> fileName)
|
||||
|
||||
parseSmartSpecInContext' _ Nothing (GitSpec branch Nothing) = Nothing
|
||||
parseSmartSpecInContext' [] (Just defaultFile) (GitSpec branch Nothing) = Just $ GitSpec branch $ Just defaultFile
|
||||
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) (GitSpec branch Nothing)
|
||||
= Just $ GitSpec branch $ Just (firstDir </> defaultFile)
|
||||
|
||||
parseSmartSpecInContext' _ _ parsedSpec = Just parsedSpec
|
||||
|
||||
checkRefFormat :: String -> Bool
|
||||
checkRefFormat ref =
|
||||
not ("./" `isInfixOf` ref) &&
|
||||
not (".lock" `isSuffixOf` ref) &&
|
||||
not (".lock/" `isInfixOf` ref) &&
|
||||
not (".." `isInfixOf` ref) &&
|
||||
not (any isUnwantedChar ref) &&
|
||||
not ("/" `isSuffixOf` ref) &&
|
||||
not ("//" `isInfixOf` ref) &&
|
||||
not ("@{" `isInfixOf` ref) &&
|
||||
ref /= "@"
|
||||
where isUnwantedChar ':' = True
|
||||
isUnwantedChar '?' = True
|
||||
isUnwantedChar '*' = True
|
||||
isUnwantedChar '[' = True
|
||||
isUnwantedChar '~' = True
|
||||
isUnwantedChar '^' = True
|
||||
isUnwantedChar '\\' = True
|
||||
isUnwantedChar '\177' = True
|
||||
isUnwantedChar c = ord c < 32
|
@ -60,6 +60,8 @@ import Control.Monad ((<=<))
|
||||
|
||||
import Data.Attoparsec.Text (parseOnly)
|
||||
|
||||
import Data.Conduit.SmartSource
|
||||
|
||||
import GEval.BLEU
|
||||
import GEval.Common
|
||||
import GEval.ClippEU
|
||||
@ -259,7 +261,7 @@ checkInputFileIfNeeded _ _ = return ()
|
||||
|
||||
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
|
||||
fileAsLineSource filePath =
|
||||
LineSource (CB.sourceFile filePath $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
||||
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
||||
|
||||
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
||||
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
||||
|
38
test/Spec.hs
38
test/Spec.hs
@ -15,6 +15,12 @@ import Text.EditDistance
|
||||
|
||||
import qualified Test.HUnit as HU
|
||||
|
||||
import Data.Conduit.SmartSource
|
||||
import qualified Data.Conduit.Text as CT
|
||||
import Data.Conduit
|
||||
import Control.Monad.Trans.Resource
|
||||
import qualified Data.Conduit.List as CL
|
||||
|
||||
informationRetrievalBookExample :: [(String, Int)]
|
||||
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
|
||||
@ -187,7 +193,39 @@ main = hspec $ do
|
||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
||||
(LineInFile "stub2" 1 "3.4")
|
||||
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
|
||||
describe "smart sources" $ do
|
||||
it "smart specs are parsed" $ do
|
||||
parseSmartSpec "" `shouldBe` NoSpec
|
||||
parseSmartSpec "-" `shouldBe` Stdin
|
||||
parseSmartSpec "http://gonito.net/foo" `shouldBe` Http "http://gonito.net/foo"
|
||||
parseSmartSpec "https://gonito.net" `shouldBe` Https "https://gonito.net"
|
||||
parseSmartSpec "branch:" `shouldBe` GitSpec "branch" Nothing
|
||||
parseSmartSpec "37be:foo/bar.tsv" `shouldBe` GitSpec "37be" (Just "foo/bar.tsv")
|
||||
parseSmartSpec "bla/xyz:foo/bar.tsv" `shouldBe` GitSpec "bla/xyz" (Just "foo/bar.tsv")
|
||||
parseSmartSpec "out.tsv" `shouldBe` FileNameSpec "out.tsv"
|
||||
parseSmartSpec "dev-1/out.tsv" `shouldBe` FilePathSpec "dev-1/out.tsv"
|
||||
parseSmartSpec "../out.tsv" `shouldBe` FilePathSpec "../out.tsv"
|
||||
parseSmartSpec "4a5f" `shouldBe` PossiblyGitSpec "4a5f"
|
||||
parseSmartSpec "!!" `shouldBe` PossiblyGitSpec "!!"
|
||||
parseSmartSpec "branch" `shouldBe` PossiblyGitSpec "branch"
|
||||
it "smart specs are parsed in context" $ do
|
||||
parseSmartSpecInContext [] Nothing "xyz" `shouldBe` Just (PossiblyGitSpec "xyz")
|
||||
parseSmartSpecInContext ["foo", "bar"] Nothing "out.tsv" `shouldBe` Just (FilePathSpec "foo/out.tsv")
|
||||
parseSmartSpecInContext [] (Just "default") "" `shouldBe` Just (FileNameSpec "default")
|
||||
parseSmartSpecInContext ["foo"] (Just "default") "" `shouldBe` Just (FilePathSpec "foo/default")
|
||||
parseSmartSpecInContext ["foo/bar"] (Just "default") "http://gonito.net" `shouldBe` Just (Http "http://gonito.net")
|
||||
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
|
||||
it "sources are accessed" $ do
|
||||
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
|
||||
readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
||||
["User-agent: *\nDisallow: /deny\n"]
|
||||
|
||||
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
|
||||
readFromSmartSource defaultDirs defaultFile specS = do
|
||||
let (Just spec) = parseSmartSpecInContext defaultDirs defaultFile specS
|
||||
let source = smartSource defaultDirs defaultFile spec
|
||||
contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume)
|
||||
return $ Prelude.map unpack contents
|
||||
|
||||
neverMatch :: Char -> Int -> Bool
|
||||
neverMatch _ _ = False
|
||||
|
1
test/files/foo.txt
Normal file
1
test/files/foo.txt
Normal file
@ -0,0 +1 @@
|
||||
foo
|
Loading…
Reference in New Issue
Block a user