never executed always true always false
    1 {-
    2 (c) Bartosz Nitka, Facebook, 2015
    3 
    4 UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
    5 
    6 Basically, the things need to be in class @Uniquable@, and we use the
    7 @getUnique@ method to grab their @Uniques@.
    8 
    9 This is very similar to @UniqFM@, the major difference being that the order of
   10 folding is not dependent on @Unique@ ordering, giving determinism.
   11 Currently the ordering is determined by insertion order.
   12 
   13 See Note [Unique Determinism] in GHC.Types.Unique for explanation why @Unique@ ordering
   14 is not deterministic.
   15 -}
   16 
   17 {-# LANGUAGE DeriveDataTypeable #-}
   18 {-# LANGUAGE DeriveTraversable #-}
   19 {-# LANGUAGE DerivingStrategies #-}
   20 {-# LANGUAGE FlexibleContexts #-}
   21 {-# LANGUAGE ScopedTypeVariables #-}
   22 {-# LANGUAGE TupleSections #-}
   23 {-# OPTIONS_GHC -Wall #-}
   24 
   25 module GHC.Types.Unique.DFM (
   26         -- * Unique-keyed deterministic mappings
   27         UniqDFM,       -- abstract type
   28 
   29         -- ** Manipulating those mappings
   30         emptyUDFM,
   31         unitUDFM,
   32         addToUDFM,
   33         addToUDFM_C,
   34         addToUDFM_C_Directly,
   35         addToUDFM_Directly,
   36         addListToUDFM,
   37         delFromUDFM,
   38         delListFromUDFM,
   39         adjustUDFM,
   40         adjustUDFM_Directly,
   41         alterUDFM,
   42         mapUDFM,
   43         mapMaybeUDFM,
   44         plusUDFM,
   45         plusUDFM_C,
   46         lookupUDFM, lookupUDFM_Directly,
   47         elemUDFM,
   48         foldUDFM,
   49         eltsUDFM,
   50         filterUDFM, filterUDFM_Directly,
   51         isNullUDFM,
   52         sizeUDFM,
   53         intersectUDFM, udfmIntersectUFM,
   54         disjointUDFM, disjointUdfmUfm,
   55         equalKeysUDFM,
   56         minusUDFM,
   57         listToUDFM, listToUDFM_Directly,
   58         udfmMinusUFM, ufmMinusUDFM,
   59         partitionUDFM,
   60         anyUDFM, allUDFM,
   61         pprUniqDFM, pprUDFM,
   62 
   63         udfmToList,
   64         udfmToUfm,
   65         nonDetStrictFoldUDFM,
   66         unsafeCastUDFMKey,
   67         alwaysUnsafeUfmToUdfm,
   68     ) where
   69 
   70 import GHC.Prelude
   71 
   72 import GHC.Types.Unique ( Uniquable(..), Unique, getKey )
   73 import GHC.Utils.Outputable
   74 
   75 import qualified Data.IntMap.Strict as MS
   76 import qualified Data.IntMap as M
   77 import Data.Data
   78 import Data.Functor.Classes (Eq1 (..))
   79 import Data.List (sortBy)
   80 import Data.Function (on)
   81 import GHC.Types.Unique.FM (UniqFM, nonDetUFMToList, ufmToIntMap, unsafeIntMapToUFM)
   82 import Unsafe.Coerce
   83 
   84 -- Note [Deterministic UniqFM]
   85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   86 -- A @UniqDFM@ is just like @UniqFM@ with the following additional
   87 -- property: the function `udfmToList` returns the elements in some
   88 -- deterministic order not depending on the Unique key for those elements.
   89 --
   90 -- If the client of the map performs operations on the map in deterministic
   91 -- order then `udfmToList` returns them in deterministic order.
   92 --
   93 -- There is an implementation cost: each element is given a serial number
   94 -- as it is added, and `udfmToList` sorts it's result by this serial
   95 -- number. So you should only use `UniqDFM` if you need the deterministic
   96 -- property.
   97 --
   98 -- `foldUDFM` also preserves determinism.
   99 --
  100 -- Normal @UniqFM@ when you turn it into a list will use
  101 -- Data.IntMap.toList function that returns the elements in the order of
  102 -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
  103 -- with a list ordered by @Uniques@.
  104 -- The order of @Uniques@ is known to be not stable across rebuilds.
  105 -- See Note [Unique Determinism] in GHC.Types.Unique.
  106 --
  107 --
  108 -- There's more than one way to implement this. The implementation here tags
  109 -- every value with the insertion time that can later be used to sort the
  110 -- values when asked to convert to a list.
  111 --
  112 -- An alternative would be to have
  113 --
  114 --   data UniqDFM ele = UDFM (M.IntMap ele) [ele]
  115 --
  116 -- where the list determines the order. This makes deletion tricky as we'd
  117 -- only accumulate elements in that list, but makes merging easier as you
  118 -- can just merge both structures independently.
  119 -- Deletion can probably be done in amortized fashion when the size of the
  120 -- list is twice the size of the set.
  121 
  122 -- | A type of values tagged with insertion time
  123 data TaggedVal val =
  124   TaggedVal
  125     !val
  126     {-# UNPACK #-} !Int -- ^ insertion time
  127   deriving stock (Data, Functor, Foldable, Traversable)
  128 
  129 taggedFst :: TaggedVal val -> val
  130 taggedFst (TaggedVal v _) = v
  131 
  132 taggedSnd :: TaggedVal val -> Int
  133 taggedSnd (TaggedVal _ i) = i
  134 
  135 instance Eq val => Eq (TaggedVal val) where
  136   (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
  137 
  138 -- | Type of unique deterministic finite maps
  139 --
  140 -- The key is just here to keep us honest. It's always safe
  141 -- to use a single type as key.
  142 -- If two types don't overlap in their uniques it's also safe
  143 -- to index the same map at multiple key types. But this is
  144 -- very much discouraged.
  145 data UniqDFM key ele =
  146   UDFM
  147     !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
  148                                 -- values are tagged with insertion time.
  149                                 -- The invariant is that all the tags will
  150                                 -- be distinct within a single map
  151     {-# UNPACK #-} !Int         -- Upper bound on the values' insertion
  152                                 -- time. See Note [Overflow on plusUDFM]
  153   deriving (Data, Functor)
  154 
  155 -- | Deterministic, in O(n log n).
  156 instance Foldable (UniqDFM key) where
  157   foldr = foldUDFM
  158 
  159 -- | Deterministic, in O(n log n).
  160 instance Traversable (UniqDFM key) where
  161   traverse f = fmap listToUDFM_Directly
  162              . traverse (\(u,a) -> (u,) <$> f a)
  163              . udfmToList
  164 
  165 emptyUDFM :: UniqDFM key elt
  166 emptyUDFM = UDFM M.empty 0
  167 
  168 unitUDFM :: Uniquable key => key -> elt -> UniqDFM key elt
  169 unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
  170 
  171 -- The new binding always goes to the right of existing ones
  172 addToUDFM :: Uniquable key => UniqDFM key elt -> key -> elt  -> UniqDFM key elt
  173 addToUDFM m k v = addToUDFM_Directly m (getUnique k) v
  174 
  175 -- The new binding always goes to the right of existing ones
  176 addToUDFM_Directly :: UniqDFM key elt -> Unique -> elt -> UniqDFM key elt
  177 addToUDFM_Directly (UDFM m i) u v
  178   = UDFM (MS.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
  179   where
  180     tf (TaggedVal new_v _) (TaggedVal _ old_i) = TaggedVal new_v old_i
  181       -- Keep the old tag, but insert the new value
  182       -- This means that udfmToList typically returns elements
  183       -- in the order of insertion, rather than the reverse
  184 
  185       -- It is quite critical that the strict insertWith is used as otherwise
  186       -- the combination function 'tf' is not forced and both old values are retained
  187       -- in the map.
  188 
  189 addToUDFM_C_Directly
  190   :: (elt -> elt -> elt)   -- old -> new -> result
  191   -> UniqDFM key elt
  192   -> Unique -> elt
  193   -> UniqDFM key elt
  194 addToUDFM_C_Directly f (UDFM m i) u v
  195   = UDFM (MS.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
  196     where
  197       tf (TaggedVal new_v _) (TaggedVal old_v old_i)
  198          = TaggedVal (f old_v new_v) old_i
  199           -- Flip the arguments, because M.insertWith uses  (new->old->result)
  200           --                         but f            needs (old->new->result)
  201           -- Like addToUDFM_Directly, keep the old tag
  202 
  203 addToUDFM_C
  204   :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
  205   -> UniqDFM key elt -- old
  206   -> key -> elt -- new
  207   -> UniqDFM key elt -- result
  208 addToUDFM_C f m k v = addToUDFM_C_Directly f m (getUnique k) v
  209 
  210 addListToUDFM :: Uniquable key => UniqDFM key elt -> [(key,elt)] -> UniqDFM key elt
  211 addListToUDFM = foldl' (\m (k, v) -> addToUDFM m k v)
  212 
  213 addListToUDFM_Directly :: UniqDFM key elt -> [(Unique,elt)] -> UniqDFM key elt
  214 addListToUDFM_Directly = foldl' (\m (k, v) -> addToUDFM_Directly m k v)
  215 
  216 addListToUDFM_Directly_C
  217   :: (elt -> elt -> elt) -> UniqDFM key elt -> [(Unique,elt)] -> UniqDFM key elt
  218 addListToUDFM_Directly_C f = foldl' (\m (k, v) -> addToUDFM_C_Directly f m k v)
  219 
  220 delFromUDFM :: Uniquable key => UniqDFM key elt -> key -> UniqDFM key elt
  221 delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
  222 
  223 plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
  224 plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
  225   -- we will use the upper bound on the tag as a proxy for the set size,
  226   -- to insert the smaller one into the bigger one
  227   | i > j = insertUDFMIntoLeft_C f udfml udfmr
  228   | otherwise = insertUDFMIntoLeft_C f udfmr udfml
  229 
  230 -- Note [Overflow on plusUDFM]
  231 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  232 -- There are multiple ways of implementing plusUDFM.
  233 -- The main problem that needs to be solved is overlap on times of
  234 -- insertion between different keys in two maps.
  235 -- Consider:
  236 --
  237 -- A = fromList [(a, (x, 1))]
  238 -- B = fromList [(b, (y, 1))]
  239 --
  240 -- If you merge them naively you end up with:
  241 --
  242 -- C = fromList [(a, (x, 1)), (b, (y, 1))]
  243 --
  244 -- Which loses information about ordering and brings us back into
  245 -- non-deterministic world.
  246 --
  247 -- The solution I considered before would increment the tags on one of the
  248 -- sets by the upper bound of the other set. The problem with this approach
  249 -- is that you'll run out of tags for some merge patterns.
  250 -- Say you start with A with upper bound 1, you merge A with A to get A' and
  251 -- the upper bound becomes 2. You merge A' with A' and the upper bound
  252 -- doubles again. After 64 merges you overflow.
  253 -- This solution would have the same time complexity as plusUFM, namely O(n+m).
  254 --
  255 -- The solution I ended up with has time complexity of
  256 -- O(m log m + m * min (n+m, W)) where m is the smaller set.
  257 -- It simply inserts the elements of the smaller set into the larger
  258 -- set in the order that they were inserted into the smaller set. That's
  259 -- O(m log m) for extracting the elements from the smaller set in the
  260 -- insertion order and O(m * min(n+m, W)) to insert them into the bigger
  261 -- set.
  262 
  263 plusUDFM :: UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
  264 plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
  265   -- we will use the upper bound on the tag as a proxy for the set size,
  266   -- to insert the smaller one into the bigger one
  267   | i > j = insertUDFMIntoLeft udfml udfmr
  268   | otherwise = insertUDFMIntoLeft udfmr udfml
  269 
  270 insertUDFMIntoLeft :: UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
  271 insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
  272 
  273 insertUDFMIntoLeft_C
  274   :: (elt -> elt -> elt) -> UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
  275 insertUDFMIntoLeft_C f udfml udfmr =
  276   addListToUDFM_Directly_C f udfml $ udfmToList udfmr
  277 
  278 lookupUDFM :: Uniquable key => UniqDFM key elt -> key -> Maybe elt
  279 lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
  280 
  281 lookupUDFM_Directly :: UniqDFM key elt -> Unique -> Maybe elt
  282 lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m
  283 
  284 elemUDFM :: Uniquable key => key -> UniqDFM key elt -> Bool
  285 elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
  286 
  287 -- | Performs a deterministic fold over the UniqDFM.
  288 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
  289 foldUDFM :: (elt -> a -> a) -> a -> UniqDFM key elt -> a
  290 foldUDFM k z m = foldr k z (eltsUDFM m)
  291 
  292 -- | Performs a nondeterministic strict fold over the UniqDFM.
  293 -- It's O(n), same as the corresponding function on `UniqFM`.
  294 -- If you use this please provide a justification why it doesn't introduce
  295 -- nondeterminism.
  296 nonDetStrictFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM key elt -> a
  297 nonDetStrictFoldUDFM k z (UDFM m _i) = foldl' k' z m
  298   where
  299     k' acc (TaggedVal v _) = k v acc
  300 
  301 eltsUDFM :: UniqDFM key elt -> [elt]
  302 eltsUDFM (UDFM m _i) =
  303   map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
  304 
  305 filterUDFM :: (elt -> Bool) -> UniqDFM key elt -> UniqDFM key elt
  306 filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
  307 
  308 filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM key elt -> UniqDFM key elt
  309 filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
  310   where
  311   p' k (TaggedVal v _) = p (getUnique k) v
  312 
  313 -- | Converts `UniqDFM` to a list, with elements in deterministic order.
  314 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
  315 udfmToList :: UniqDFM key elt -> [(Unique, elt)]
  316 udfmToList (UDFM m _i) =
  317   [ (getUnique k, taggedFst v)
  318   | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
  319 
  320 -- Determines whether two 'UniqDFM's contain the same keys.
  321 equalKeysUDFM :: UniqDFM key a -> UniqDFM key b -> Bool
  322 equalKeysUDFM (UDFM m1 _) (UDFM m2 _) = liftEq (\_ _ -> True) m1 m2
  323 
  324 isNullUDFM :: UniqDFM key elt -> Bool
  325 isNullUDFM (UDFM m _) = M.null m
  326 
  327 sizeUDFM :: UniqDFM key elt -> Int
  328 sizeUDFM (UDFM m _i) = M.size m
  329 
  330 intersectUDFM :: UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
  331 intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
  332   -- M.intersection is left biased, that means the result will only have
  333   -- a subset of elements from the left set, so `i` is a good upper bound.
  334 
  335 udfmIntersectUFM :: UniqDFM key elt1 -> UniqFM key elt2 -> UniqDFM key elt1
  336 udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
  337   -- M.intersection is left biased, that means the result will only have
  338   -- a subset of elements from the left set, so `i` is a good upper bound.
  339 
  340 disjointUDFM :: UniqDFM key elt -> UniqDFM key elt -> Bool
  341 disjointUDFM (UDFM x _i) (UDFM y _j) = M.disjoint x y
  342 
  343 disjointUdfmUfm :: UniqDFM key elt -> UniqFM key elt2 -> Bool
  344 disjointUdfmUfm (UDFM x _i) y = M.disjoint x (ufmToIntMap y)
  345 
  346 minusUDFM :: UniqDFM key elt1 -> UniqDFM key elt2 -> UniqDFM key elt1
  347 minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
  348   -- M.difference returns a subset of a left set, so `i` is a good upper
  349   -- bound.
  350 
  351 udfmMinusUFM :: UniqDFM key elt1 -> UniqFM key elt2 -> UniqDFM key elt1
  352 udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
  353   -- M.difference returns a subset of a left set, so `i` is a good upper
  354   -- bound.
  355 
  356 ufmMinusUDFM :: UniqFM key elt1 -> UniqDFM key elt2 -> UniqFM key elt1
  357 ufmMinusUDFM x (UDFM y _i) = unsafeIntMapToUFM (M.difference (ufmToIntMap x) y)
  358 
  359 -- | Partition UniqDFM into two UniqDFMs according to the predicate
  360 partitionUDFM :: (elt -> Bool) -> UniqDFM key elt -> (UniqDFM key elt, UniqDFM key elt)
  361 partitionUDFM p (UDFM m i) =
  362   case M.partition (p . taggedFst) m of
  363     (left, right) -> (UDFM left i, UDFM right i)
  364 
  365 -- | Delete a list of elements from a UniqDFM
  366 delListFromUDFM  :: Uniquable key => UniqDFM key elt -> [key] -> UniqDFM key elt
  367 delListFromUDFM = foldl' delFromUDFM
  368 
  369 -- | This allows for lossy conversion from UniqDFM to UniqFM
  370 udfmToUfm :: UniqDFM key elt -> UniqFM key elt
  371 udfmToUfm (UDFM m _i) = unsafeIntMapToUFM (M.map taggedFst m)
  372 
  373 listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM key elt
  374 listToUDFM = foldl' (\m (k, v) -> addToUDFM m k v) emptyUDFM
  375 
  376 listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM key elt
  377 listToUDFM_Directly = foldl' (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
  378 
  379 -- | Apply a function to a particular element
  380 adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM key elt -> key -> UniqDFM key elt
  381 adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
  382 
  383 -- | Apply a function to a particular element
  384 adjustUDFM_Directly :: (elt -> elt) -> UniqDFM key elt -> Unique -> UniqDFM key elt
  385 adjustUDFM_Directly f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey k) m) i
  386 
  387 -- | The expression (alterUDFM f k map) alters value x at k, or absence
  388 -- thereof. alterUDFM can be used to insert, delete, or update a value in
  389 -- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
  390 -- more efficient.
  391 alterUDFM
  392   :: Uniquable key
  393   => (Maybe elt -> Maybe elt)  -- How to adjust
  394   -> UniqDFM key elt               -- old
  395   -> key                       -- new
  396   -> UniqDFM key elt               -- result
  397 alterUDFM f (UDFM m i) k =
  398   UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
  399   where
  400   alterf Nothing = inject $ f Nothing
  401   alterf (Just (TaggedVal v _)) = inject $ f (Just v)
  402   inject Nothing = Nothing
  403   inject (Just v) = Just $ TaggedVal v i
  404 
  405 -- | Map a function over every value in a UniqDFM
  406 mapUDFM :: (elt1 -> elt2) -> UniqDFM key elt1 -> UniqDFM key elt2
  407 mapUDFM f (UDFM m i) = UDFM (MS.map (fmap f) m) i
  408 -- Critical this is strict map, otherwise you get a big space leak when reloading
  409 -- in GHCi because all old ModDetails are retained (see pruneHomePackageTable).
  410 -- Modify with care.
  411 
  412 mapMaybeUDFM :: forall elt1 elt2 key.
  413                 (elt1 -> Maybe elt2) -> UniqDFM key elt1 -> UniqDFM key elt2
  414 mapMaybeUDFM f (UDFM m i) = UDFM (M.mapMaybe (traverse f) m) i
  415 
  416 anyUDFM :: (elt -> Bool) -> UniqDFM key elt -> Bool
  417 anyUDFM p (UDFM m _i) = M.foldr ((||) . p . taggedFst) False m
  418 
  419 allUDFM :: (elt -> Bool) -> UniqDFM key elt -> Bool
  420 allUDFM p (UDFM m _i) = M.foldr ((&&) . p . taggedFst) True m
  421 
  422 -- This should not be used in committed code, provided for convenience to
  423 -- make ad-hoc conversions when developing
  424 alwaysUnsafeUfmToUdfm :: UniqFM key elt -> UniqDFM key elt
  425 alwaysUnsafeUfmToUdfm = listToUDFM_Directly . nonDetUFMToList
  426 
  427 -- | Cast the key domain of a UniqFM.
  428 --
  429 -- As long as the domains don't overlap in their uniques
  430 -- this is safe.
  431 unsafeCastUDFMKey :: UniqDFM key1 elt -> UniqDFM key2 elt
  432 unsafeCastUDFMKey = unsafeCoerce -- Only phantom parameter changes so
  433                                  -- this is safe and avoids reallocation.
  434 
  435 -- Output-ery
  436 
  437 instance Outputable a => Outputable (UniqDFM key a) where
  438     ppr ufm = pprUniqDFM ppr ufm
  439 
  440 pprUniqDFM :: (a -> SDoc) -> UniqDFM key a -> SDoc
  441 pprUniqDFM ppr_elt ufm
  442   = brackets $ fsep $ punctuate comma $
  443     [ ppr uq <+> text ":->" <+> ppr_elt elt
  444     | (uq, elt) <- udfmToList ufm ]
  445 
  446 pprUDFM :: UniqDFM key a    -- ^ The things to be pretty printed
  447        -> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
  448        -> SDoc          -- ^ 'SDoc' where the things have been pretty
  449                         -- printed
  450 pprUDFM ufm pp = pp (eltsUDFM ufm)