gonito/Gonito/ExtractMetadata.hs

264 lines
9.7 KiB
Haskell
Raw Normal View History

2018-11-03 10:41:49 +01:00
{-# LANGUAGE PackageImports #-}
module Gonito.ExtractMetadata (
extractMetadataFromRepoDir,
GonitoMetadata(..),
ExtractionOptions(..),
parseCommitMessage,
getLastCommitMessage,
2018-11-12 20:41:46 +01:00
parseTags,
Link(..))
where
import Import
import Data.Attoparsec.Text
import Data.Text
2018-11-03 10:41:49 +01:00
import qualified Data.Text.Encoding as DTE
import Data.Aeson
import qualified Data.Yaml as Y
import System.Exit
import System.Process
import qualified Data.Set as S
2018-11-03 10:56:58 +01:00
import qualified Data.Map.Strict as M
import Handler.Shared (gitPath)
2018-11-03 10:41:49 +01:00
import "Glob" System.FilePath.Glob as G
2018-11-16 12:43:44 +01:00
import PersistSHA1
data ExtractionOptions = ExtractionOptions {
extractionOptionsDescription :: Maybe Text,
extractionOptionsTags :: Maybe (S.Set Text),
2018-11-03 10:56:58 +01:00
extractionOptionsGeneralParams :: Maybe (M.Map Text Text),
2018-11-03 10:41:49 +01:00
extractionOptionsUnwantedParams :: Maybe [Text],
extractionOptionsParamFiles :: Maybe [String],
2018-11-12 20:41:46 +01:00
extractionOptionsMLRunPath :: Maybe FilePath,
2018-11-16 12:43:44 +01:00
extractionOptionsExternalLinks :: Maybe [Link],
extractionOptionsDependencies :: Maybe [SHA1]
}
instance FromJSON ExtractionOptions where
parseJSON = withObject "ExtractionOptions" $ \v -> ExtractionOptions
<$> v .:? "description"
<*> v .:? "tags"
2018-11-03 10:41:49 +01:00
<*> fmap (fmap enforceTextHash) (v .:? "params")
<*> v .:? "unwanted-params"
<*> v .:? "param-files"
<*> v .:? "mlrun-path"
2018-11-12 20:41:46 +01:00
<*> v .:? "links"
2018-11-16 12:43:44 +01:00
<*> fmap (fmap (Import.map fromTextToSHA1)) (v .:? "dependencies")
instance Default ExtractionOptions where
def = ExtractionOptions {
extractionOptionsDescription = Nothing,
extractionOptionsTags = Nothing,
extractionOptionsGeneralParams = Nothing,
2018-11-03 10:41:49 +01:00
extractionOptionsUnwantedParams = Nothing,
extractionOptionsParamFiles = Nothing,
2018-11-12 20:41:46 +01:00
extractionOptionsMLRunPath = Nothing,
2018-11-16 12:43:44 +01:00
extractionOptionsExternalLinks = Nothing,
extractionOptionsDependencies = Nothing
}
2018-11-12 20:41:46 +01:00
data Link = Link {
linkTitle :: Maybe Text,
linkUrl :: Text }
deriving (Eq, Show)
instance FromJSON Link where
parseJSON = withObject "Link" $ \v -> Link
<$> v .:? "title"
<*> v .: "url"
data GonitoMetadata = GonitoMetadata {
gonitoMetadataDescription :: Text,
gonitoMetadataTags :: S.Set Text,
2018-11-12 20:41:46 +01:00
gonitoMetadataGeneralParams :: M.Map Text Text,
2018-11-16 12:43:44 +01:00
gonitoMetadataExternalLinks :: [Link],
gonitoMetadataDependencies :: [SHA1]
}
deriving (Eq, Show)
gonitoYamlFile :: FilePath
gonitoYamlFile = "gonito.yaml"
eitherToMaybe :: Either a b -> Maybe b
eitherToMaybe (Left _) = Nothing
eitherToMaybe (Right v) = Just v
combineExtractionOptions :: Maybe ExtractionOptions -> ExtractionOptions -> ExtractionOptions
combineExtractionOptions Nothing options = options
combineExtractionOptions (Just otherOptions) options = ExtractionOptions {
extractionOptionsDescription = combineWithT extractionOptionsDescription,
extractionOptionsTags = combineWithS extractionOptionsTags,
2018-11-03 10:56:58 +01:00
extractionOptionsGeneralParams = Just $ (fromMaybe M.empty $ extractionOptionsGeneralParams options)
`M.union`
(fromMaybe M.empty $ extractionOptionsGeneralParams otherOptions),
2018-11-03 10:41:49 +01:00
extractionOptionsUnwantedParams = Just $ (fromMaybe [] $ extractionOptionsUnwantedParams options)
++
(fromMaybe [] $ extractionOptionsUnwantedParams otherOptions),
extractionOptionsParamFiles = case extractionOptionsParamFiles options of
Nothing -> extractionOptionsParamFiles otherOptions
Just pfs -> Just pfs,
2018-11-12 20:41:46 +01:00
extractionOptionsMLRunPath = combineWithF extractionOptionsMLRunPath,
extractionOptionsExternalLinks = case extractionOptionsExternalLinks options of
Nothing -> extractionOptionsExternalLinks otherOptions
2018-11-16 12:43:44 +01:00
Just links -> Just (links ++ (fromMaybe [] $ extractionOptionsExternalLinks otherOptions)),
extractionOptionsDependencies = case extractionOptionsDependencies options of
Nothing -> extractionOptionsDependencies otherOptions
Just links -> Just (links ++ (fromMaybe [] $ extractionOptionsDependencies otherOptions)) }
where combineWithT fun = case fun options of
Nothing -> fun otherOptions
Just v -> Just v
combineWithF fun = case fun options of
Nothing -> fun otherOptions
Just v -> Just v
combineWithS fun = case fun options of
Nothing -> fun otherOptions
Just s1 -> case fun otherOptions of
Nothing -> Just s1
Just s2 -> Just (s1 `S.union` s2)
extractMetadataFromRepoDir :: FilePath -> ExtractionOptions -> IO GonitoMetadata
extractMetadataFromRepoDir repoDir formExtractionOptions = do
commitMessage <- getLastCommitMessage repoDir
let (mCommitDescription, mCommitTags) = parseCommitMessage commitMessage
mGonitoYamlOptions <- eitherToMaybe <$> Y.decodeFileEither (repoDir </> gonitoYamlFile)
let extractionOptions = combineExtractionOptions mGonitoYamlOptions formExtractionOptions
let description = case extractionOptionsDescription extractionOptions of
Just d -> d
Nothing -> case mCommitDescription of
Just d -> d
Nothing -> "???"
let commitTagsParsed = parseTags mCommitTags
let formTagsParsed = extractionOptionsTags extractionOptions
let tagsParsed = union commitTagsParsed $ fromMaybe S.empty formTagsParsed
2018-11-03 10:41:49 +01:00
paramFiles <- case extractionOptionsParamFiles extractionOptions of
2018-11-03 10:56:58 +01:00
Just paramFilesGlobs -> G.globDir (Import.map G.compile paramFilesGlobs) repoDir
2018-11-03 10:41:49 +01:00
Nothing -> pure []
2018-11-03 10:56:58 +01:00
params' <- M.unions <$> (mapM parseParamFile
2018-11-03 10:41:49 +01:00
$ Import.filter (/= (repoDir </> gonitoYamlFile))
$ Import.concat paramFiles)
let params =
2018-11-03 10:56:58 +01:00
Import.foldl' (flip M.delete) params' (fromMaybe [] $ extractionOptionsUnwantedParams extractionOptions)
`M.union`
fromMaybe M.empty (extractionOptionsGeneralParams extractionOptions)
2018-11-03 10:41:49 +01:00
2018-11-16 12:43:44 +01:00
let dependenciesFromYaml = fromMaybe [] $ extractionOptionsDependencies extractionOptions
dependenciesFromGitSubmodules <- extractDependenciesFromGitSubmodules repoDir
pure $ GonitoMetadata {
gonitoMetadataDescription = description,
gonitoMetadataTags = tagsParsed,
2018-11-12 20:41:46 +01:00
gonitoMetadataGeneralParams = params,
2018-11-16 12:43:44 +01:00
gonitoMetadataExternalLinks = fromMaybe [] (extractionOptionsExternalLinks extractionOptions),
gonitoMetadataDependencies = dependenciesFromYaml ++ dependenciesFromGitSubmodules
}
2018-11-16 12:43:44 +01:00
extractDependenciesFromGitSubmodules :: FilePath -> IO [SHA1]
extractDependenciesFromGitSubmodules repoDir = do
(exitCode, out) <- runProgram repoDir gitPath ["submodule"]
return $ case exitCode of
ExitSuccess -> Import.map (fromTextToSHA1
. Data.Text.take sha1Lenght
. Data.Text.drop 1)
$ Data.Text.lines out
ExitFailure _ -> []
where sha1Lenght = 40
2018-11-03 10:41:49 +01:00
2018-11-03 10:56:58 +01:00
parseParamFile :: FilePath -> IO (M.Map Text Text)
2018-11-03 10:41:49 +01:00
parseParamFile yamlFile = do
decoded <- Y.decodeFileEither yamlFile
return $ case decoded of
2018-11-03 10:56:58 +01:00
Left _ -> M.empty
2018-11-03 10:41:49 +01:00
Right h -> enforceTextHash h
2018-11-03 10:56:58 +01:00
enforceTextHash :: M.Map Text Value -> M.Map Text Text
enforceTextHash h = M.fromList
2018-11-03 10:41:49 +01:00
$ Import.map (\(p, pv) -> (p, strip $ DTE.decodeUtf8 $ Y.encode pv))
2018-11-03 10:56:58 +01:00
$ M.toList h
2018-11-03 10:41:49 +01:00
getLastCommitMessage :: FilePath -> IO (Maybe Text)
getLastCommitMessage repoDir = do
(exitCode, out) <- runProgram repoDir gitPath ["log", "-1", "--pretty=%B"]
return $ case exitCode of
ExitSuccess -> Just out
ExitFailure _ -> Nothing
runProgram :: FilePath -> FilePath -> [String] -> IO (ExitCode, Text)
runProgram dir prog args = do
(_, o, _, p) <- runInteractiveProcess prog args (Just dir) Nothing
hSetBuffering o NoBuffering
out <- hGetContents o
exitCode <- Import.length out `seq` waitForProcess p
return (exitCode, decodeUtf8 out)
parseTags :: Maybe Text -> S.Set Text
parseTags (Just tags) = S.fromList $ Import.map Data.Text.strip $ Data.Text.split (== ',') tags
parseTags Nothing = S.empty
parseCommitMessage :: Maybe Text -> (Maybe Text, Maybe Text)
parseCommitMessage Nothing = (Nothing, Nothing)
parseCommitMessage (Just commitMessage) =
case parseOnly commitMessageParser commitMessage of
Left _ -> (Nothing, Nothing)
Right (d, ts) -> (d, ts)
commitMessageParser :: Data.Attoparsec.Text.Parser (Maybe Text, Maybe Text)
commitMessageParser = do
skipMany emptyLine
d <- nonEmptyLine
mTs <- (do
ts <- findTagsLine
return $ Just ts) <|> (return Nothing)
return (Just d, mTs)
findTagsLine :: Data.Attoparsec.Text.Parser Text
findTagsLine = tagsLine <|> (anyLine >> findTagsLine)
tagsLine :: Data.Attoparsec.Text.Parser Text
tagsLine = do
_ <- (string "tags" <|> string "labels" <|> string "Tags" <|> string "Labels")
_ <- char ':'
skipMany space
s <- many notEndOfLine
endOfLine
return $ Data.Text.pack s
nonEmptyLine :: Data.Attoparsec.Text.Parser Text
nonEmptyLine = do
skipMany space
l1 <- notSpace
l <- (many notEndOfLine)
endOfLine
return $ Data.Text.pack (l1:l)
anyLine :: Data.Attoparsec.Text.Parser ()
anyLine = do
skipMany notEndOfLine
endOfLine
notSpace :: Data.Attoparsec.Text.Parser Char
notSpace = satisfy (\c -> c /= '\r' && c /= '\n' && c /= ' ' && c /= '\t')
notEndOfLine :: Data.Attoparsec.Text.Parser Char
notEndOfLine = satisfy (\c -> c /= '\r' && c /= '\n')
emptyLine :: Data.Attoparsec.Text.Parser ()
emptyLine = do
many space *> endOfLine