fdp2/Parse.hs

89 lines
2.8 KiB
Haskell

module Parse -- (Step (Step),Parse,Ind,Arc,(+<-),(+->),(<<),nextId,size,len,trees,ep,eps,arcs)
where
-- import Data.List (intercalate)
import Base
import Step
import Data.List
import GHC.Exts
type Parse τ c = [Step τ c]
ep = []
eps = [ep]
infixl 4 <<, +->, +<-
(<<) :: Parse τ κ -> Step τ κ -> Parse τ κ
p << s = s : p
addNode = (<<)
(+->),(+<-) :: Parse τ κ -> Arc τ -> Parse τ κ
(Step i c [] d : p) +-> a = Step i c [a] d : p
(Step i c h d : p) +<- a = Step i c h (a:d) : p
linkHead = (+->)
linkDep = (+<-)
nextId :: Parse τ κ -> Ind
nextId [] = 1
nextId (Step i _ _ _:_) = i + 1
len :: Parse τ κ -> Int
len p = length p
size :: Parse τ κ -> Int
size p = sum (map stepSize p) where stepSize (Step _ _ h ds) = length (h ++ ds)
trees :: Parse τ κ -> Int
trees p = len p - size p
showParse :: (Show τ, Show κ) => Parse τ κ -> String
showParse = concatMap shortStep
where
shortStep (Step i c h ds) = "<" ++ show i ++ "|" ++ showArcs h ++ "|" ++ showArcs ds ++ ">"
showArcs = intercalate "," . map showArc
showArc (Head r i) = show r ++ ":" ++ show i
showArc (Dep r i) = show r ++ ":" ++ show i
-- arclist :: Parse τ κ -> [(Int,Arc τ)]
-- arclist p = concatMap stepArcs p
-- where
-- indexes = [1..(length p)]
-- stepArcs (Step i _ hs ds) = [(i,Head t j) | Head t j <- hs ] ++ [(j,Head t i) | Dep t j <- ds ]
-- complete [] _ = []
-- complete (i:is) (a@(i',_):as) | i == i' = a : complete is as
-- | otherwise = (i,Root) : complete is (a:as)
arclist :: Parse τ κ -> [(Int,Arc τ)]
arclist p = complete indexes $ sortWith (\(i,_)->i) $ concatMap stepArcs p
where
indexes = [1..(length p)]
stepArcs (Step i _ hs ds) = [(i,Head t j) | Head t j <- hs ] ++ [(j,Head t i) | Dep t j <- ds ]
complete [] _ = []
complete (i:is) (a@(i',_):as) | i == i' = a : complete is as
| otherwise = (i,Root) : complete is (a:as)
complete (i:is) [] = (i,Root) : complete is []
conll :: [String] -> Parse τ κ -> [Conll τ κ]
conll words parse = let steps = reverse parse
arcs = arclist parse
in
conll' words steps arcs
where
conll' [] [] [] = []
conll' (w:ws) (Step _ c _ _ : ss) ((i,a):as) = Conll i w c a : conll' ws ss as
data Conll τ κ = Conll Int String κ (Arc τ)
instance (Show τ, Show κ) => Show (Conll τ κ) where
show (Conll i w c Root) = intercalate "\t" [show i,w,show c,"0","Root"]
show (Conll i w c (Head t j)) = intercalate "\t" [show i,w,show c,show j,show t]