From e1282b4ffe62cc505eff18d11417ab9580135bae Mon Sep 17 00:00:00 2001 From: Rob Stewart Date: Sun, 14 Dec 2014 15:31:57 +0000 Subject: [PATCH] Initial patricia tree implementation --- rdf4h.cabal | 4 + src/Data/RDF/FglGraph.hs | 21 -- src/Data/RDF/PatriciaTreeGraph.hs | 188 ++++++++++++++++++ src/Data/RDF/Query.hs | 14 +- testsuite/tests/Data/RDF/GraphTestUtils.hs | 9 +- .../tests/Data/RDF/PatriciaTreeGraph_Test.hs | 22 ++ testsuite/tests/Test.hs | 14 +- 7 files changed, 243 insertions(+), 29 deletions(-) delete mode 100644 src/Data/RDF/FglGraph.hs create mode 100644 src/Data/RDF/PatriciaTreeGraph.hs create mode 100644 testsuite/tests/Data/RDF/PatriciaTreeGraph_Test.hs diff --git a/rdf4h.cabal b/rdf4h.cabal index 478aa17..69acc05 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -45,6 +45,7 @@ library , Data.RDF.Types , Data.RDF.Query , Data.RDF.MGraph + , Data.RDF.PatriciaTreeGraph , Data.RDF.TriplesGraph , Text.RDF.RDF4H.TurtleParser , Text.RDF.RDF4H.TurtleSerializer @@ -56,6 +57,7 @@ library else build-depends: base < 3 build-depends: parsec >= 3 + , fgl , HTTP >= 4000.0.0 , hxt >= 9.3.1.2 , text @@ -121,6 +123,7 @@ test-suite test-rdf4h , knob , unordered-containers , hashable + , fgl if impl(ghc < 7.6) build-depends: ghc-prim @@ -133,6 +136,7 @@ test-suite test-rdf4h other-modules: Data.RDF , Data.RDF.Namespace , Data.RDF.MGraph + , Data.RDF.PatriciaTreeGraph , Data.RDF.TriplesGraph , Text.RDF.RDF4H.NTriplesParser , Text.RDF.RDF4H.NTriplesSerializer diff --git a/src/Data/RDF/FglGraph.hs b/src/Data/RDF/FglGraph.hs deleted file mode 100644 index e001caf..0000000 --- a/src/Data/RDF/FglGraph.hs +++ /dev/null @@ -1,21 +0,0 @@ -module Data.RDF.FglGraph - -where - -import qualified Data.Graph.Inductive.Graph as G -import qualified Data.Graph.Inductive.Tree as GT -import qualified Data.Graph.Inductive.Query as GQ - -n1, n2 :: Int -n1 = 1 -n2 = 2 -ln1, ln2 :: (G.Node, String) -ln1 = (n1, "N1") -ln2 = (n2, "N2") -e1 :: G.Edge -e1 = (n1, n2) -le1 :: G.LEdge String -le1 = (n1, n2, "E1") -g :: GT.Gr String String -g = G.mkGraph [ln1, ln2] [le1] - diff --git a/src/Data/RDF/PatriciaTreeGraph.hs b/src/Data/RDF/PatriciaTreeGraph.hs new file mode 100644 index 0000000..7979ac9 --- /dev/null +++ b/src/Data/RDF/PatriciaTreeGraph.hs @@ -0,0 +1,188 @@ +module Data.RDF.PatriciaTreeGraph where + +import Data.RDF.Namespace +import Data.RDF.Query +import Data.RDF.Types + +import qualified Data.Graph.Inductive.Graph as G +import qualified Data.Graph.Inductive.PatriciaTree as PT +import qualified Data.Graph.Inductive.Query.DFS as DFS +import qualified Data.IntMap as IntMap +import Data.List +import qualified Data.Map as Map +import Data.Maybe +import qualified Data.Text as T + +newtype PatriciaTreeGraph = PatriciaTreeGraph (PT.Gr Node Node,IntMap.IntMap Node, Maybe BaseUrl, PrefixMappings) + deriving (Show) + +instance RDF PatriciaTreeGraph where + baseUrl = baseUrl' + prefixMappings = prefixMappings' + addPrefixMappings = addPrefixMappings' + empty = empty' + mkRdf = mkRdf' + triplesOf = uniqTriplesOf' + uniqTriplesOf = uniqTriplesOf' + select = select' + query = query' + +empty' :: PatriciaTreeGraph +empty' = PatriciaTreeGraph (G.empty,IntMap.empty, Nothing, PrefixMappings Map.empty) + +prefixMappings' :: PatriciaTreeGraph -> PrefixMappings +prefixMappings' (PatriciaTreeGraph (_,_,_,pms')) = pms' + +addPrefixMappings' :: PatriciaTreeGraph -> PrefixMappings -> Bool -> PatriciaTreeGraph +addPrefixMappings' (PatriciaTreeGraph (g, idxLookup, baseURL, pms)) pms' replace = + let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + in PatriciaTreeGraph (g, idxLookup, baseURL, merge pms pms') + +baseUrl' :: PatriciaTreeGraph -> Maybe BaseUrl +baseUrl' (PatriciaTreeGraph _) = Nothing + +-- in case we want to expand UNode nodes in each triple in mkRdf' +expandNode (Just (BaseUrl b)) (UNode t) = UNode (b `T.append` t) +expandNode _ node = node + +mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> PatriciaTreeGraph +mkRdf' ts base' pms' = + let xs = concatMap (\(Triple s _p o) -> [s,o]) ts + lnodes = zip [0..length xs-1] xs + + uriIdx = Map.fromList (map (\(a,b) -> (b,a)) lnodes) + intIdx = IntMap.fromList lnodes + + ledges = map (\(Triple s p o) -> + let si = fromJust $ Map.lookup s uriIdx + oi = fromJust $ Map.lookup o uriIdx + in (si,oi,expandNode base' p)) ts + + ptGraph = G.mkGraph lnodes ledges + + in PatriciaTreeGraph (ptGraph ,intIdx, base', pms') + +{- will this remain as an RDF method? +triplesOf' :: PatriciaTreeGraph -> Triples +triplesOf' (PatriciaTreeGraph (g,idxLookup,_,_)) = + map (\(sIdx,oIdx,p) -> + let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + in Triple s p o) (G.labEdges g) +-} + +uniqTriplesOf' :: PatriciaTreeGraph -> Triples +uniqTriplesOf' ptG@(PatriciaTreeGraph (g,idxLookup,_,_)) = + nub $ map (\(sIdx,oIdx,p) -> + let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + in expandTriple ptG (Triple s p o)) (G.labEdges g) + +select' :: PatriciaTreeGraph -> NodeSelector -> NodeSelector -> NodeSelector -> Triples +select' ptG@(PatriciaTreeGraph (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel = + let mkTriples nodeIdx = map (\(p,subjIdx) -> + let o = fromJust (IntMap.lookup nodeIdx idxLookup) + s = fromJust (IntMap.lookup subjIdx idxLookup) + in expandTriple ptG (Triple s p o) ) -- expand the triple + + cfun ( adjsIn , nodeIdx , _nodeLbl , _adjsOut ) = + let ts | isJust maybeSubjSel && isNothing maybePredSel && isNothing maybeObjSel = + let ss = filter (\(_p,idxSubj) -> let subjNode = fromJust (IntMap.lookup idxSubj idxLookup) + in fromJust maybeSubjSel subjNode) adjsIn + + in mkTriples nodeIdx ss + + | isJust maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = + let ss = filter (\(p,idxSubj) -> let subjNode = fromJust (IntMap.lookup idxSubj idxLookup) + in fromJust maybeSubjSel subjNode + && fromJust maybePredSel p) adjsIn + + in mkTriples nodeIdx ss + + | isJust maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = + let ss = filter (\(_p,idxSubj) -> let subjNode = fromJust (IntMap.lookup idxSubj idxLookup); objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + in fromJust maybeSubjSel subjNode + && fromJust maybeObjSel objNode) adjsIn + + in mkTriples nodeIdx ss + + | isJust maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = + let ss = filter (\(p,idxSubj) -> let subjNode = fromJust (IntMap.lookup idxSubj idxLookup); objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + in fromJust maybeSubjSel subjNode + && fromJust maybePredSel p + && fromJust maybeObjSel objNode) adjsIn + + in mkTriples nodeIdx ss + + | isNothing maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = + let ss = filter (\(p,_idxSubj) -> fromJust maybePredSel p) adjsIn + + in mkTriples nodeIdx ss + + | isNothing maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = + let ss = filter (\(p,_idxSubj) -> let objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + in fromJust maybePredSel p + && fromJust maybeObjSel objNode) adjsIn + + in mkTriples nodeIdx ss + + | isNothing maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = + let objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + in if fromJust maybeObjSel objNode + then mkTriples nodeIdx adjsIn + else [] + + | isNothing maybeSubjSel && isNothing maybePredSel && isNothing maybeObjSel = + mkTriples nodeIdx adjsIn + in ts + + -- is depth first better or worse than breadth first? + in concat $ DFS.dfsWith' cfun g + +query' :: PatriciaTreeGraph -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples +query' ptG@(PatriciaTreeGraph (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = + let mkTriples nodeIdx = map (\(p,subjIdx) -> + let o = fromJust (IntMap.lookup nodeIdx idxLookup) + s = fromJust (IntMap.lookup subjIdx idxLookup) + in expandTriple ptG (Triple s p o) ) -- expand the triple + + cfun ( adjsIn , nodeIdx , _nodeLbl , _adjsOut ) = + let ts | isJust maybeSubj && isNothing maybePred && isNothing maybeObj = + let ss = filter (\(_p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj ) adjsIn + in mkTriples nodeIdx ss + + | isJust maybeSubj && isJust maybePred && isNothing maybeObj = + let ss = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj + && p == fromJust maybePred) adjsIn + in mkTriples nodeIdx ss + + | isJust maybeSubj && isJust maybePred && isJust maybeObj = + let objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + ss = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj + && p == fromJust maybePred) adjsIn + in if objNode == fromJust maybeObj + then mkTriples nodeIdx ss + else [] + + | isNothing maybeSubj && isJust maybePred && isNothing maybeObj = + let ss = filter (\(p,_idxSubj) -> p == fromJust maybePred) adjsIn + in mkTriples nodeIdx ss + + | isNothing maybeSubj && isJust maybePred && isJust maybeObj = + let objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + ss = filter (\(p,_idxSubj) -> p == fromJust maybePred) adjsIn + in if objNode == fromJust maybeObj + then mkTriples nodeIdx ss + else [] + + | isNothing maybeSubj && isNothing maybePred && isJust maybeObj = + let objNode = fromJust (IntMap.lookup nodeIdx idxLookup) + in if objNode == fromJust maybeObj + then mkTriples nodeIdx adjsIn + else [] + + | isNothing maybeSubj && isNothing maybePred && isNothing maybeObj = + mkTriples nodeIdx adjsIn + + in ts + + -- is depth first better or worse than breadth first? + in concat $ DFS.dfsWith' cfun g diff --git a/src/Data/RDF/Query.hs b/src/Data/RDF/Query.hs index 4a7dff8..ee627b8 100644 --- a/src/Data/RDF/Query.hs +++ b/src/Data/RDF/Query.hs @@ -8,7 +8,7 @@ module Data.RDF.Query ( listSubjectsWithPredicate, listObjectsOfPredicate, -- * RDF graph functions - isIsomorphic, expandTriples, fromEither + isIsomorphic, expandTriples, expandTriple, fromEither ) where @@ -106,3 +106,15 @@ expandTriples' acc baseURL prefixMaps (t:rest) = expandTriples' (normalize baseU expandBaseUrl (Just _) triple' = triple' expandBaseUrl Nothing triple' = triple' expandPrefixes _ triple' = triple' + +-- |Expand the triples in a graph with the prefix map and base URL for that +-- graph. +expandTriple :: (RDF rdf) => rdf -> Triple -> Triple +expandTriple rdf = expandTriple' (baseUrl rdf) (prefixMappings rdf) + +expandTriple' :: Maybe BaseUrl -> PrefixMappings -> Triple -> Triple +expandTriple' baseURL prefixMaps t = normalize baseURL prefixMaps t + where normalize baseURL' prefixMaps' = expandPrefixes prefixMaps' . expandBaseUrl baseURL' + expandBaseUrl (Just _) triple' = triple' + expandBaseUrl Nothing triple' = triple' + expandPrefixes _ triple' = triple' diff --git a/testsuite/tests/Data/RDF/GraphTestUtils.hs b/testsuite/tests/Data/RDF/GraphTestUtils.hs index 4fa1e47..630fe9f 100644 --- a/testsuite/tests/Data/RDF/GraphTestUtils.hs +++ b/testsuite/tests/Data/RDF/GraphTestUtils.hs @@ -28,14 +28,15 @@ import Test.QuickCheck.Monadic (assert, monadicIO,run) graphTests :: forall rdf. (Arbitrary rdf, RDF rdf, Show rdf) => TestName -> (rdf -> Triples) -> rdf -> (Triples -> Maybe BaseUrl -> PrefixMappings -> rdf) -> [Test] graphTests testGroupName _triplesOf _empty _mkRdf = [ testGroup testGroupName - [ testProperty "empty" (p_empty _triplesOf _empty) + [ + testProperty "empty" (p_empty _triplesOf _empty) , testProperty "mkRdf_triplesOf" (p_mkRdf_triplesOf _triplesOf _mkRdf) , testProperty "mkRdf_no_dupes" (p_mkRdf_no_dupes _triplesOf _mkRdf) , testProperty "query_match_none" (p_query_match_none _mkRdf) , testProperty "query_matched_spo" (p_query_matched_spo _triplesOf) , testProperty "query_matched_spo_no_dupes" (p_query_matched_spo_no_dupes _triplesOf _mkRdf) , testProperty "query_unmatched_spo" (p_query_unmatched_spo _triplesOf) - , testProperty "query_match_s" (p_query_match_s _triplesOf) + testProperty "query_match_s" (p_query_match_s _triplesOf) , testProperty "query_match_p" (p_query_match_p _triplesOf) , testProperty "query_match_o" (p_query_match_o _triplesOf) , testProperty "query_match_sp" (p_query_match_sp _triplesOf) @@ -84,6 +85,10 @@ p_mkRdf_no_dupes _triplesOf _mkRdf ts bUrl pms = tsWithDupe = head ts : ts result = _triplesOf $ _mkRdf tsWithDupe bUrl pms +-- Note: in TriplesGraph and PatriciaTreeGraph `query` expands triples +-- but `ts` here is not necessarily expanded. What is the correct +-- property this test should check? +-- -- query with all 3 wildcards should yield all triples in RDF p_query_match_none :: RDF rdf => (Triples -> Maybe BaseUrl -> PrefixMappings -> rdf) -> Triples -> Maybe BaseUrl -> PrefixMappings -> Bool p_query_match_none _mkRdf ts bUrl pms = uordered ts == uordered result diff --git a/testsuite/tests/Data/RDF/PatriciaTreeGraph_Test.hs b/testsuite/tests/Data/RDF/PatriciaTreeGraph_Test.hs new file mode 100644 index 0000000..1ee2acf --- /dev/null +++ b/testsuite/tests/Data/RDF/PatriciaTreeGraph_Test.hs @@ -0,0 +1,22 @@ +module Data.RDF.PatriciaTreeGraph_Test (triplesOf',empty',mkRdf') where + +import Data.RDF.Types +import Data.RDF.PatriciaTreeGraph (PatriciaTreeGraph) +import Data.RDF.GraphTestUtils +import qualified Data.Map as Map +import Control.Monad + +import Test.QuickCheck + +instance Arbitrary PatriciaTreeGraph where + arbitrary = liftM3 mkRdf arbitraryTs (return Nothing) (return $ PrefixMappings Map.empty) + --coarbitrary = undefined + +empty' :: PatriciaTreeGraph +empty' = empty + +mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> PatriciaTreeGraph +mkRdf' = mkRdf + +triplesOf' :: PatriciaTreeGraph -> Triples +triplesOf' = triplesOf diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 111caf5..40cc0b3 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -4,13 +4,17 @@ import Test.Framework (defaultMain) import qualified Data.RDF.TriplesGraph_Test as TriplesGraph import qualified Data.RDF.MGraph_Test as MGraph +import qualified Data.RDF.PatriciaTreeGraph_Test as PatriciaTreeGraph import qualified Text.RDF.RDF4H.XmlParser_Test as XmlParser import qualified Text.RDF.RDF4H.TurtleParser_ConformanceTest as TurtleParser import Data.RDF.GraphTestUtils main :: IO () -main = defaultMain ( graphTests "TriplesGraph" TriplesGraph.triplesOf' TriplesGraph.empty' TriplesGraph.mkRdf' - ++ graphTests "MGraph" MGraph.triplesOf' MGraph.empty' MGraph.mkRdf' - ++ TurtleParser.tests - ++ XmlParser.tests - ) +-- main = defaultMain ( graphTests "TriplesGraph" TriplesGraph.triplesOf' TriplesGraph.empty' TriplesGraph.mkRdf' +-- ++ graphTests "MGraph" MGraph.triplesOf' MGraph.empty' MGraph.mkRdf' +-- ++ graphTests "PatriciaTreeGraph" PatriciaTreeGraph.triplesOf' PatriciaTreeGraph.empty' PatriciaTreeGraph.mkRdf' +-- ++ TurtleParser.tests +-- ++ XmlParser.tests +-- ) + +main = defaultMain ( graphTests "PatriciaTreeGraph" PatriciaTreeGraph.triplesOf' PatriciaTreeGraph.empty' PatriciaTreeGraph.mkRdf' )