Add Haversine metric
This commit is contained in:
parent
61eb437909
commit
1dfe3ce7f0
@ -23,6 +23,7 @@ library
|
||||
, GEval.CreateChallenge
|
||||
, GEval.OptionsParser
|
||||
, GEval.BLEU
|
||||
, GEval.Haversine
|
||||
, GEval.Clippings
|
||||
, GEval.PrecisionRecall
|
||||
, GEval.ClusteringMetrics
|
||||
|
@ -185,6 +185,7 @@ isPreprocessable (MultiLabelFMeasure _ _) = True
|
||||
isPreprocessable MultiLabelLogLoss = False
|
||||
isPreprocessable MultiLabelLikelihood = False
|
||||
isPreprocessable (Mean metric) = isPreprocessable metric
|
||||
isPreprocessable Haversine = False
|
||||
|
||||
defaultOutDirectory = "."
|
||||
defaultTestName = "test-A"
|
||||
@ -1001,6 +1002,7 @@ continueGEvalCalculations SAMultiLabelLogLoss MultiLabelLogLoss = defineContinua
|
||||
id
|
||||
noGraph
|
||||
|
||||
continueGEvalCalculations SAHaversine Haversine = defineContinuation averageC id noGraph
|
||||
|
||||
defineContinuation :: (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
|
||||
-- a "total" value
|
||||
|
@ -115,6 +115,15 @@ Do OCR.
|
||||
This is a sample fake challenge for Gonito framework. Replace it with
|
||||
the description of your challenge.|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents Haversine testName = [i|
|
||||
GEval simple sphere distance
|
||||
==========================
|
||||
|
||||
Compute distance between two points on a sphere given their longitudes and latitudes.
|
||||
|
||||
This is a sample fake challenge for Gonito framework. Replace it with
|
||||
the description of your challenge.|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents Accuracy testName = [i|
|
||||
GEval sample classification challenge
|
||||
=====================================
|
||||
@ -462,6 +471,11 @@ configContents schemes format testName =
|
||||
-- for the time being we are using the original function.
|
||||
|
||||
trainInContents :: Metric -> String
|
||||
trainInContents Haversine = unlines
|
||||
$ map last
|
||||
$ map (splitOn "\t")
|
||||
$ lines
|
||||
$ trainContents Haversine
|
||||
trainInContents metric = unlines
|
||||
$ map (intercalate "\t")
|
||||
$ map tail
|
||||
@ -470,6 +484,12 @@ trainInContents metric = unlines
|
||||
$ trainContents metric
|
||||
|
||||
trainExpectedContents :: Metric -> String
|
||||
trainExpectedContents Haversine = unlines
|
||||
$ map (intercalate "\t")
|
||||
$ map (take 2)
|
||||
$ map (splitOn "\t")
|
||||
$ lines
|
||||
$ trainContents Haversine
|
||||
trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
|
||||
|
||||
trainContents :: Metric -> String
|
||||
@ -484,6 +504,9 @@ trainContents CER = [hereLit|Hannibal ad portas train1.pdf
|
||||
equo ne credite train2.pdf
|
||||
errare humanum est train3.pdf
|
||||
|]
|
||||
trainContents Haversine = [hereLit|30.47547 -90.100911 some text
|
||||
33.399478 -110.87095 Another text
|
||||
|]
|
||||
|
||||
trainContents Accuracy = [hereLit|Y 10 none yes
|
||||
N -2 strong no
|
||||
@ -645,6 +668,8 @@ devInContents (Soft2DFMeasure _) = devInContents ClippEU
|
||||
devInContents ClippEU = [hereLit|file1.djvu
|
||||
file2.djvu
|
||||
|]
|
||||
devInContents Haversine = [hereLit|Some dev text
|
||||
|]
|
||||
devInContents _ = [hereLit|0.72 0 0.007
|
||||
9.54 62 0.054
|
||||
|]
|
||||
@ -719,6 +744,8 @@ devExpectedContents (Soft2DFMeasure _) = [hereLit|
|
||||
devExpectedContents ClippEU = [hereLit|
|
||||
10/10,20,30,100/5 3/0,50,500,500/5
|
||||
|]
|
||||
devExpectedContents Haversine = [hereLit|32.812883 -109.625582
|
||||
|]
|
||||
devExpectedContents _ = [hereLit|0.82
|
||||
95.2
|
||||
|]
|
||||
@ -794,6 +821,9 @@ testInContents (Soft2DFMeasure _) = testInContents ClippEU
|
||||
testInContents ClippEU = [hereLit|file3.djvu
|
||||
file4.djvu
|
||||
|]
|
||||
testInContents Haversine = [hereLit|Some test text
|
||||
Another test text
|
||||
|]
|
||||
testInContents _ = [hereLit|0.72 0 0.007
|
||||
9.54 62 0.054
|
||||
|]
|
||||
@ -871,6 +901,9 @@ testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
||||
|]
|
||||
testExpectedContents GLEU = [hereLit|Alice has a black cat
|
||||
|]
|
||||
testExpectedContents Haversine = [hereLit|39.575264 -76.995928
|
||||
29.949932 -90.070116
|
||||
|]
|
||||
testExpectedContents _ = [hereLit|0.11
|
||||
17.2
|
||||
|]
|
||||
@ -921,6 +954,7 @@ inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
|
||||
inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
|
||||
inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
|
||||
inHeaderContents ClippEU = Just ["DjvuFilePath"]
|
||||
inHeaderContents Haversine = Just ["Text"]
|
||||
inHeaderContents _ = Just ["OrbitalPeriod", "OrbitalEccentricity", "NumberOfMoons"]
|
||||
|
||||
outHeaderContents :: Metric -> Maybe [String]
|
||||
@ -951,6 +985,7 @@ outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
|
||||
outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
|
||||
outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
|
||||
outHeaderContents ClippEU = Just ["Rectangle"]
|
||||
outHeaderContents Haversine = Just ["Longitude", "Latitude"]
|
||||
outHeaderContents _ = Just ["Mass"]
|
||||
|
||||
gitignoreContents :: String
|
||||
|
28
src/GEval/Haversine.hs
Normal file
28
src/GEval/Haversine.hs
Normal file
@ -0,0 +1,28 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.Haversine
|
||||
(haversine)
|
||||
where
|
||||
|
||||
|
||||
haversine :: ((Double, Double), (Double, Double)) -> Double
|
||||
haversine ((longitude_1, latitude_1), (longitude_2, latitude_2)) = hav longitude_1_rad latitude_1_rad longitude_2_rad latitude_2_rad
|
||||
where
|
||||
longitude_1_rad = toRadians longitude_1
|
||||
latitude_1_rad = toRadians latitude_1
|
||||
longitude_2_rad = toRadians longitude_2
|
||||
latitude_2_rad = toRadians latitude_2
|
||||
|
||||
hav :: Double -> Double -> Double -> Double -> Double
|
||||
hav longitude_1 latitude_1 longitude_2 latitude_2 = 2 * asin (sqrt h) * r
|
||||
where
|
||||
r = 6371.0 -- Radius of earth in kilometers. Use 3956 for miles
|
||||
longitude = longitude_2 - longitude_1
|
||||
latitude = latitude_2 - latitude_1
|
||||
h = hav_ longitude + cos latitude * cos latitude * hav_ latitude
|
||||
|
||||
hav_ :: Double -> Double
|
||||
hav_ x = sin(x / 2) ** 2
|
||||
|
||||
toRadians :: Double -> Double
|
||||
toRadians degrees = degrees * pi / 180.0
|
@ -34,6 +34,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accura
|
||||
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
|
||||
| ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
||||
| FLCFMeasure Double
|
||||
| Haversine
|
||||
-- it would be better to avoid infinite recursion here
|
||||
-- `Mean (Mean BLEU)` is not useful, but as it would mean
|
||||
-- a larger refactor, we will postpone this
|
||||
@ -88,6 +89,7 @@ instance Show Metric where
|
||||
show (MultiLabelFMeasure beta (Harden matchSpec)) = "Harden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||
show Haversine = "Haversine"
|
||||
show (Mean metric) = "Mean/" ++ (show metric)
|
||||
|
||||
applyMatchingSpecification :: (MatchingSpecification -> MatchingSpecification)
|
||||
@ -166,6 +168,7 @@ instance Read Metric where
|
||||
readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)]
|
||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
|
||||
readsPrec _ ('H':'a':'v':'e':'r':'s':'i':'n':'e':theRest) = [(Haversine, theRest)]
|
||||
|
||||
|
||||
|
||||
@ -206,6 +209,7 @@ getMetricOrdering SMAPE = TheLowerTheBetter
|
||||
getMetricOrdering (MultiLabelFMeasure _ _) = TheHigherTheBetter
|
||||
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
||||
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
|
||||
getMetricOrdering Haversine = TheLowerTheBetter
|
||||
getMetricOrdering (Mean metric) = getMetricOrdering metric
|
||||
|
||||
metricCompare :: Metric -> MetricValue -> MetricValue -> Ordering
|
||||
|
@ -43,6 +43,7 @@ import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIn
|
||||
import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
|
||||
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList)
|
||||
import GEval.MatchingSpecification
|
||||
import GEval.Haversine
|
||||
|
||||
-- | Helper type so that singleton can be used.
|
||||
-- | (The problem is that some metrics are parametrized by Double
|
||||
@ -53,7 +54,7 @@ singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU
|
||||
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
|
||||
| AMultiLabelLogLoss | AMultiLabelLikelihood
|
||||
| ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure
|
||||
| AFLCFMeasure
|
||||
| AFLCFMeasure | AHaversine
|
||||
deriving (Eq)
|
||||
|]
|
||||
|
||||
@ -92,6 +93,7 @@ toHelper (FLCFMeasure _) = AFLCFMeasure
|
||||
toHelper (ProbabilisticMultiLabelFMeasure _) = AProbabilisticMultiLabelFMeasure
|
||||
toHelper (ProbabilisticSoftFMeasure _) = AProbabilisticSoftFMeasure
|
||||
toHelper (Soft2DFMeasure _) = ASoft2DFMeasure
|
||||
toHelper Haversine = AHaversine
|
||||
|
||||
type family ParsedInputType (t :: AMetric) :: * where
|
||||
ParsedInputType ACharMatch = Text
|
||||
@ -131,6 +133,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
|
||||
ParsedExpectedType (AMultiLabelFMeasure _) = [Text]
|
||||
ParsedExpectedType AMultiLabelLogLoss = [Text]
|
||||
ParsedExpectedType AMultiLabelLikelihood = [Text]
|
||||
ParsedExpectedType AHaversine = (Double, Double)
|
||||
|
||||
expectedParser :: SAMetric t -> Text -> Either String (ParsedExpectedType t)
|
||||
expectedParser SARMSE = doubleParser
|
||||
@ -166,6 +169,7 @@ expectedParser SASMAPE = doubleParser
|
||||
expectedParser (SAMultiLabelFMeasure _) = intoWords
|
||||
expectedParser SAMultiLabelLogLoss = intoWords
|
||||
expectedParser SAMultiLabelLikelihood = intoWords
|
||||
expectedParser SAHaversine = parseSpherePoints
|
||||
|
||||
type family ParsedOutputType (t :: AMetric) :: * where
|
||||
ParsedOutputType ABLEU = [String]
|
||||
@ -178,6 +182,7 @@ type family ParsedOutputType (t :: AMetric) :: * where
|
||||
ParsedOutputType AProbabilisticMultiLabelFMeasure = [WordWithProb]
|
||||
ParsedOutputType AMultiLabelLikelihood = ProbList
|
||||
ParsedOutputType AMultiLabelLogLoss = ProbList
|
||||
ParsedOutputType AHaversine = (Double, Double)
|
||||
ParsedOutputType t = ParsedExpectedType t
|
||||
|
||||
outputParser :: SAMetric t -> Text -> Either String (ParsedOutputType t)
|
||||
@ -214,6 +219,7 @@ outputParser SASMAPE = doubleParser
|
||||
outputParser (SAMultiLabelFMeasure _) = intoWords
|
||||
outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
|
||||
outputParser SAMultiLabelLikelihood = Right . parseIntoProbList
|
||||
outputParser SAHaversine = parseSpherePoints
|
||||
|
||||
type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
||||
ItemIntermediateRepresentationType ABLEU = (Int, Int, Int, Int, Int, Int, Int, Int, Int)
|
||||
@ -241,6 +247,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
||||
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
||||
ItemIntermediateRepresentationType AWER = (Int, Int)
|
||||
ItemIntermediateRepresentationType ACER = (Int, Int)
|
||||
ItemIntermediateRepresentationType AHaversine = Double
|
||||
ItemIntermediateRepresentationType t = Double
|
||||
|
||||
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
||||
@ -278,6 +285,7 @@ itemStep SASMAPE = smape
|
||||
itemStep (SAMultiLabelFMeasure smatchSpec) = getWeightedCounts (getMatchingFunctionForText $ fromSing smatchSpec)
|
||||
itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
|
||||
itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
|
||||
itemStep SAHaversine = haversine
|
||||
|
||||
|
||||
doubleParser = getValue . TR.double
|
||||
@ -400,3 +408,12 @@ countHitsAndTotals (es, os) =
|
||||
| e == (pack "*") = (h, t)
|
||||
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
|
||||
| otherwise = (h, t + 1)
|
||||
|
||||
parseSpherePoints :: Text -> Either String (Double, Double)
|
||||
parseSpherePoints t = case DLS.splitOn "\t" (unpack t) of
|
||||
[longitudeStr, latitudeStr] -> case doubleParser (pack longitudeStr) of
|
||||
Right longitude -> case doubleParser (pack latitudeStr) of
|
||||
Right latitude -> Right (longitude, latitude)
|
||||
Left _ -> Left "cannot parse line with latitude of sphere"
|
||||
Left _ -> Left "cannot parse line with longitude of sphere"
|
||||
_ -> Left "cannot parse line with longitude and latitude of sphere"
|
||||
|
@ -76,6 +76,7 @@ listOfAvailableMetrics = [RMSE,
|
||||
Soft2DFMeasure 1.0,
|
||||
Soft2DFMeasure 2.0,
|
||||
Soft2DFMeasure 0.25,
|
||||
Haversine,
|
||||
CharMatch]
|
||||
|
||||
extraInfo :: EvaluationScheme -> Maybe String
|
||||
@ -103,6 +104,7 @@ isMetricDescribed GLEU = True
|
||||
isMetricDescribed WER = True
|
||||
isMetricDescribed CER = True
|
||||
isMetricDescribed SegmentAccuracy = True
|
||||
isMetricDescribed Haversine = True
|
||||
isMetricDescribed _ = False
|
||||
|
||||
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||
@ -170,6 +172,10 @@ getMetricDescription SegmentAccuracy =
|
||||
The percentage of labels in the ground truth retrieved in the actual output is returned.
|
||||
Accuracy is calculated separately for each item and then averaged.
|
||||
|]
|
||||
getMetricDescription Haversine =
|
||||
[i|The haversine formula determines the great-circle distance between
|
||||
two points on a sphere given their longitudes and latitudes (in degrees).
|
||||
|]
|
||||
|
||||
outContents :: Metric -> String
|
||||
outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3
|
||||
@ -197,6 +203,9 @@ tabula rasai
|
||||
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
|
||||
N:1-4 V:6-7 A:9-13
|
||||
|]
|
||||
outContents Haversine = [hereLit|39.575264 -56.995928
|
||||
29.949932 -90.070116
|
||||
|]
|
||||
|
||||
expectedScore :: EvaluationScheme -> MetricValue
|
||||
expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666
|
||||
@ -220,6 +229,8 @@ expectedScore (EvaluationScheme WER [])
|
||||
= 0.08571
|
||||
expectedScore (EvaluationScheme CER [])
|
||||
= 0.14814
|
||||
expectedScore (EvaluationScheme Haversine [])
|
||||
= 1044.2633358563135
|
||||
|
||||
helpMetricParameterMetricsList :: String
|
||||
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
|
||||
@ -283,6 +294,9 @@ such a case).
|
||||
formatDescription WER = formatDescription GLEU
|
||||
formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are also considered.
|
||||
|]
|
||||
formatDescription Haversine = [hereLit|Each line is a latitude and longitude of sphere separated by tabulation,
|
||||
e.g. "41.558153 -73.051497".
|
||||
|]
|
||||
|
||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||
scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) [])
|
||||
|
@ -135,6 +135,9 @@ main = hspec $ do
|
||||
runGEvalTest "cer-mean-simple" `shouldReturnAlmost` 0.277777777777778
|
||||
it "space escaping" $
|
||||
runGEvalTest "cer-space-escaping" `shouldReturnAlmost` 0.0555555
|
||||
describe "Haversine" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "haversine" `shouldReturnAlmost` 1951.9351057250876
|
||||
describe "Accuracy" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||
|
3
test/haversine/haversine-solution/test-A/out.tsv
Normal file
3
test/haversine/haversine-solution/test-A/out.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
40.735657 -74.172367
|
||||
21.124312 -73.051497
|
||||
10.232421 -105.734523
|
|
1
test/haversine/haversine/config.txt
Normal file
1
test/haversine/haversine/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Haversine
|
3
test/haversine/haversine/test-A/expected.tsv
Normal file
3
test/haversine/haversine/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
40.735657 -74.172367
|
||||
41.558153 -73.051497
|
||||
40.832421 -115.763123
|
|
Loading…
Reference in New Issue
Block a user