Fix bug in cross-tabs
It was due to a huge bug in ordered-containers.
This commit is contained in:
parent
593871f19b
commit
e627106dd6
@ -8,6 +8,7 @@ module Data.SplitIntoCrossTabs
|
|||||||
TextFrag(..))
|
TextFrag(..))
|
||||||
where
|
where
|
||||||
|
|
||||||
|
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Set.Ordered as OS
|
import qualified Data.Set.Ordered as OS
|
||||||
@ -16,12 +17,12 @@ import qualified Data.Set as S
|
|||||||
import qualified Data.Foldable as F
|
import qualified Data.Foldable as F
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified Data.Map.Lazy as LM
|
import qualified Data.Map.Lazy as LM
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
import Debug.Trace
|
|
||||||
|
|
||||||
import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
|
import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
|
||||||
|
|
||||||
data TableWithValues a = TableWithValues [Text] [(Text, [a])]
|
data TableWithValues a = TableWithValues [Text] [(Text, [a])]
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
|
data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
@ -29,7 +30,12 @@ data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
|
|||||||
data TextFrag = Prefix Text | Suffix Text
|
data TextFrag = Prefix Text | Suffix Text
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
splitIntoTablesWithValues :: Text
|
|
||||||
|
informativeLookup m e = case m LM.!? e of
|
||||||
|
Just x -> x
|
||||||
|
Nothing -> error ("Cannot find " ++ (show e) ++ " in " ++ (show m))
|
||||||
|
|
||||||
|
splitIntoTablesWithValues :: Show a => Text
|
||||||
-> Text
|
-> Text
|
||||||
-> LM.Map Text a -- ^ map from which values will be taken,
|
-> LM.Map Text a -- ^ map from which values will be taken,
|
||||||
-- deliberately a lazy map so that
|
-- deliberately a lazy map so that
|
||||||
@ -43,12 +49,12 @@ splitIntoTablesWithValues defaultMainHeader defaultSecondaryHeader mapping =
|
|||||||
joinSingleItems (e : rest) = e : joinSingleItems rest
|
joinSingleItems (e : rest) = e : joinSingleItems rest
|
||||||
joinSingleItems [] = []
|
joinSingleItems [] = []
|
||||||
|
|
||||||
convertIntoTableWithValues :: Text -> Text -> LM.Map Text a -> CrossTab -> TableWithValues a
|
convertIntoTableWithValues :: Show a => Text -> Text -> LM.Map Text a -> CrossTab -> TableWithValues a
|
||||||
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (SingleItem t) =
|
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (SingleItem t) =
|
||||||
TableWithValues [defaultMainHeader, defaultSecondaryHeader] [(t, [mapping LM.! t])]
|
TableWithValues [defaultMainHeader, defaultSecondaryHeader] [(t, [mapping LM.! t])]
|
||||||
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (CrossTab rowNames columnNames) =
|
convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (CrossTab rowNames columnNames) =
|
||||||
TableWithValues (T.empty : (map toText columnNames)) (map processRow rowNames)
|
TableWithValues (T.empty : (map toText columnNames)) (map processRow rowNames)
|
||||||
where processRow rowName = (toText rowName, map (\colName -> mapping LM.! (combineFrags rowName colName)) columnNames)
|
where processRow rowName = (toText rowName, map (\colName -> mapping `informativeLookup` (combineFrags rowName colName)) columnNames)
|
||||||
|
|
||||||
splitIntoCrossTabs :: [Text] -> [CrossTab]
|
splitIntoCrossTabs :: [Text] -> [CrossTab]
|
||||||
splitIntoCrossTabs inputs =
|
splitIntoCrossTabs inputs =
|
||||||
@ -98,6 +104,9 @@ entryComparator (r1, (_, s1)) (r2, (_, s2)) = (OS.size s2 `compare` OS.size s1)
|
|||||||
`thenCmp`
|
`thenCmp`
|
||||||
(r1 `compare` r2)
|
(r1 `compare` r2)
|
||||||
|
|
||||||
|
-- OS.|/\ is broken
|
||||||
|
(|/\) osetA osetB = OS.filter ((flip OS.member) osetB) osetA
|
||||||
|
|
||||||
findTheBestCrossTabForTextPart :: CrossTab -> [(Int, (TextFrag, OS.OSet TextFrag))] -> (TextFrag, OS.OSet TextFrag) -> CrossTab
|
findTheBestCrossTabForTextPart :: CrossTab -> [(Int, (TextFrag, OS.OSet TextFrag))] -> (TextFrag, OS.OSet TextFrag) -> CrossTab
|
||||||
findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = if crossTabSize bestCrossTabFound > 1
|
findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = if crossTabSize bestCrossTabFound > 1
|
||||||
then bestCrossTabFound
|
then bestCrossTabFound
|
||||||
@ -106,11 +115,11 @@ findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = i
|
|||||||
entriesOrderedByIntersection =
|
entriesOrderedByIntersection =
|
||||||
sortBy entryComparator
|
sortBy entryComparator
|
||||||
$ filter (\(_, (_, tset')) -> OS.size tset' >= 2)
|
$ filter (\(_, (_, tset')) -> OS.size tset' >= 2)
|
||||||
$ map (\(r, (t', tset')) -> (r, (t', tset' OS.|/\ tset))) entries
|
$ map (\(r, (t', tset')) -> (r, (t', tset' |/\ tset))) entries
|
||||||
step (_, (t', tset')) currentTab@(CrossTab frags common) = selectedTab
|
step (_, (t', tset')) currentTab@(CrossTab frags common) = selectedTab
|
||||||
where newTab = CrossTab newT (F.toList newCommon)
|
where newTab = CrossTab newT (F.toList newCommon)
|
||||||
newT = t':frags
|
newT = t':frags
|
||||||
newCommon = (OS.fromList common) OS.|/\ tset'
|
newCommon = (OS.fromList common) |/\ tset'
|
||||||
selectedTab = if crossTabSize newTab >= crossTabSize currentTab
|
selectedTab = if crossTabSize newTab >= crossTabSize currentTab
|
||||||
then newTab
|
then newTab
|
||||||
else currentTab
|
else currentTab
|
||||||
|
@ -17,6 +17,7 @@ data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
|
|||||||
|
|
||||||
instance Show MetricResult where
|
instance Show MetricResult where
|
||||||
show (SimpleRun val) = show val
|
show (SimpleRun val) = show val
|
||||||
|
show (BootstrapResampling vals) = show vals
|
||||||
|
|
||||||
data MetricOutput = MetricOutput MetricResult (Maybe GraphSeries)
|
data MetricOutput = MetricOutput MetricResult (Maybe GraphSeries)
|
||||||
|
|
||||||
|
@ -716,6 +716,12 @@ main = hspec $ do
|
|||||||
"ba-b-0x", "ba-b-1x", "ba-b-2x",
|
"ba-b-0x", "ba-b-1x", "ba-b-2x",
|
||||||
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
|
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
|
||||||
describe "cross-tabs" $ do
|
describe "cross-tabs" $ do
|
||||||
|
it "tricky" $ do
|
||||||
|
splitIntoCrossTabs ["AAAfoo",
|
||||||
|
"AAAbar", "BBBbar", "CCCbar",
|
||||||
|
"AAAbaz", "BBBbaz", "CCCbaz" ] `shouldBe ` [
|
||||||
|
SingleItem "AAAfoo",
|
||||||
|
CrossTab [Prefix "AAAba", Prefix "BBBba", Prefix "CCCba"] [Suffix "r", Suffix "z"]]
|
||||||
it "singleton" $ do
|
it "singleton" $ do
|
||||||
splitIntoCrossTabs ["abababab"] `shouldBe` [SingleItem "abababab"]
|
splitIntoCrossTabs ["abababab"] `shouldBe` [SingleItem "abababab"]
|
||||||
it "too small" $ do
|
it "too small" $ do
|
||||||
|
Loading…
Reference in New Issue
Block a user