Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect reverse dependencies #802

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
4bf8dbb
Collect reverse dependencies
pepeiborra Mar 14, 2021
7cd0112
don't use return
pepeiborra Mar 16, 2021
c150aea
backwards compat.
pepeiborra Mar 16, 2021
7b39567
redundant pragma
pepeiborra Mar 16, 2021
e864c62
export
pepeiborra Mar 16, 2021
7e56674
alwaysRerun
pepeiborra Mar 16, 2021
9a85eaa
Add SCC pragmas, needed because not exported
pepeiborra Mar 17, 2021
7f2d282
clarify error call
pepeiborra Mar 17, 2021
d5693f8
Unbreak early cutoff
pepeiborra Mar 17, 2021
1867be0
Add some more comments
pepeiborra Mar 18, 2021
d4b49f6
compatibility with older base
pepeiborra Mar 19, 2021
5c8c2a8
Masking async exceptions
pepeiborra Mar 19, 2021
6649576
redundant import
pepeiborra Mar 19, 2021
c3983fa
add more diagnostics
pepeiborra Mar 19, 2021
3d47a41
fix another bug
pepeiborra Mar 19, 2021
ba0fbe1
avoid copying Result records
pepeiborra Mar 19, 2021
32a4f6c
do not mark AlwaysRerun as dirty by default
pepeiborra Mar 19, 2021
d01c40c
Existential wrapper for ShakeValue
pepeiborra Mar 19, 2021
0dfbd0a
fix typo
pepeiborra Mar 19, 2021
834a764
microopt: store the RDeps in an IORef
pepeiborra Mar 19, 2021
d357f9f
use getResult
pepeiborra Mar 21, 2021
ee46bc2
Add stateful dirty set to keep track across runs
pepeiborra Mar 21, 2021
67d2501
Increase diagnostic output
pepeiborra Mar 21, 2021
0f29380
mask until the dirty set is updated
pepeiborra Mar 21, 2021
52ed87e
redundant import
pepeiborra Mar 21, 2021
e0ee243
comments
pepeiborra Mar 21, 2021
4270b2e
Store the reverse dependencies in an Ids array
pepeiborra Mar 21, 2021
0313892
imports
pepeiborra Mar 21, 2021
e6c0199
Compat. with GHC 8.0
pepeiborra Mar 22, 2021
6669768
shakeReverseDependencies
pepeiborra Apr 2, 2021
0a7ce83
Special handling of alwaysRerun
pepeiborra May 13, 2021
ec620ac
Visit only the dirty dependencies
pepeiborra May 15, 2021
35cdf3a
fix imports
pepeiborra May 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions model/Model.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ For those who like concrete details, which might change at any point in the futu
,built :: Step -- when it was actually run
,changed :: Step -- when the result last changed
,depends :: [[Id]] -- dependencies
,rdepends :: [Id] -- reverse dependencies
,execution :: Float -- duration of last run
,traces :: [Trace] -- a trace of the expensive operations
} deriving Show
Expand Down Expand Up @@ -180,13 +181,13 @@ isn't the case, and "output" would still be clean.
>
> In you rule `File -(ModTime, [(File, ModTime)]`. Is the time stored for a dependency
>
> 1 - the time the dependency has been last used
> 1 - the time the dependency has been last used
>
> 2 - the dependency last modification when the dependency has been used?
>
> For example. Let's say B depends on A and A has been modified yesterday.
>
> If I'm building B today: scenario (1) would be store
> If I'm building B today: scenario (1) would be store
>
> database B = (Today, [(A, Today)])
>
Expand Down
2 changes: 1 addition & 1 deletion src/Development/Shake.hs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ import Development.Shake.Internal.Value
import Development.Shake.Internal.Options
import Development.Shake.Internal.Core.Types
import Development.Shake.Internal.Core.Action
import Development.Shake.Internal.Core.Build
import Development.Shake.Internal.Core.Rules
import Development.Shake.Internal.Resource
import Development.Shake.Internal.Derived
Expand All @@ -137,7 +138,6 @@ import Development.Shake.Internal.Rules.File
import Development.Shake.Internal.Rules.Files
import Development.Shake.Internal.Rules.Oracle
import Development.Shake.Internal.Rules.OrderOnly
import Development.Shake.Internal.Rules.Rerun

-- $writing
--
Expand Down
55 changes: 46 additions & 9 deletions src/Development/Shake/Database.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}

Expand All @@ -20,6 +21,8 @@ module Development.Shake.Database(
shakeWithDatabase,
shakeOneShotDatabase,
shakeRunDatabase,
shakeRunDatabaseForKeys,
SomeShakeValue(..),
shakeLiveFilesDatabase,
shakeProfileDatabase,
shakeErrorsDatabase,
Expand All @@ -29,15 +32,20 @@ module Development.Shake.Database(
import Control.Concurrent.Extra
import Control.Exception
import Control.Monad
import Control.Monad.Extra (whenJust)
import Control.Monad.IO.Class
import qualified Data.HashSet as HashSet
import Data.IORef
import Data.Maybe
import General.Cleanup
import Development.Shake.Internal.Errors
import Development.Shake.Internal.Options
import Development.Shake.Internal.Core.Rules
import Development.Shake.Internal.Core.Run
import Development.Shake.Internal.Core.Types
import Development.Shake.Internal.Rules.Default
import Development.Shake.Internal.Value (SomeShakeValue(..), newKey)
import Development.Shake.Internal.Core.Database (flushDirty, markDirty, getIdFromKey, runLocked)


data UseState
Expand Down Expand Up @@ -135,17 +143,46 @@ shakeErrorsDatabase (ShakeDatabase use s) =
-- actions along with a list of actions to run after the database was closed, as added with
-- 'Development.Shake.runAfter' and 'Development.Shake.removeFilesAfter'.
shakeRunDatabase :: ShakeDatabase -> [Action a] -> IO ([a], [IO ()])
shakeRunDatabase (ShakeDatabase use s) as =
shakeRunDatabase = shakeRunDatabaseForKeys Nothing

-- | Given an open 'ShakeDatabase', run both whatever actions were added to the 'Rules',
-- plus the list of 'Action' given here.
--
-- Requires 'shakeReverseDependencies', otherwise it falls back to 'shakeRunDatabase'.
--
-- If a set of dirty keys is given, only the reverse dependencies of these keys
-- will be considered potentially changed; all other keys will be assumed unchanged.
-- This includes the 'AlwaysRerunQ' key which is by default always dirty, but
-- will not here, unless it is included in the input.
--
-- Returns the results from the explicitly passed actions along with a list
-- of actions to run after the database was closed, as added with
-- 'Development.Shake.runAfter' and 'Development.Shake.removeFilesAfter'.
shakeRunDatabaseForKeys
:: Maybe [SomeShakeValue] -- ^ Set of keys changed since last run
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't find the caller where Just value is used. Am I missing something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an external API. See for example how it is used in HLS:

https://github.com/pepeiborra/ide/blob/keysChanged/ghcide/src/Development/IDE/Core/Shake.hs#L758

-> ShakeDatabase
-> [Action a]
-> IO ([a], [IO ()])
shakeRunDatabaseForKeys keysChanged (ShakeDatabase use s) as = uninterruptibleMask $ \continue ->
withOpen use "shakeRunDatabase" (\o -> o{openRequiresReset=True}) $ \Open{..} -> do
when openRequiresReset $ do
when openOneShot $
throwM $ errorStructured "Error when calling shakeRunDatabase twice, after calling shakeOneShotDatabase" [] ""
reset s
(refs, as) <- fmap unzip $ forM as $ \a -> do
ref <- newIORef Nothing
pure (ref, liftIO . writeIORef ref . Just =<< a)
after <- run s openOneShot $ map void as
results <- mapM readIORef refs
case sequence results of
Just result -> pure (result, after)
Nothing -> throwM $ errorInternal "Expected all results were written, but some where not"
runLocked (database s) $ flushDirty (database s)

-- record the keys changed and continue
whenJust keysChanged $ \kk -> do
getId <- getIdFromKey (database s)
let ids = mapMaybe (\(SomeShakeValue x) -> getId $ newKey x) kk
markDirty (database s) $ HashSet.fromList ids

continue $ do
(refs, as) <- fmap unzip $ forM as $ \a -> do
ref <- newIORef Nothing
pure (ref, liftIO . writeIORef ref . Just =<< a)
after <- run s openOneShot (isJust keysChanged) $ map void as
results <- mapM readIORef refs
case sequence results of
Just result -> pure (result, after)
Nothing -> throwM $ errorInternal "Expected all results were written, but some where not"
101 changes: 88 additions & 13 deletions src/Development/Shake/Internal/Core/Build.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Development.Shake.Internal.Core.Build(
historyIsEnabled, historySave, historyLoad,
applyKeyValue,
apply, apply1,
alwaysRerun
) where

import Development.Shake.Classes
Expand All @@ -19,6 +20,7 @@ import Development.Shake.Internal.Core.Action
import Development.Shake.Internal.History.Shared
import Development.Shake.Internal.History.Cloud
import Development.Shake.Internal.Options
import Development.Shake.Internal.Rules.Rerun
import Development.Shake.Internal.Core.Monad
import General.Wait
import qualified Data.ByteString.Char8 as BS
Expand All @@ -30,6 +32,7 @@ import Control.Exception
import Control.Monad.Extra
import Numeric.Extra
import qualified Data.HashMap.Strict as Map
import qualified Data.HashSet as HashSet
import Development.Shake.Internal.Core.Rules
import Data.Typeable
import Data.Maybe
Expand Down Expand Up @@ -101,41 +104,91 @@ buildOne global@Global{..} stack database i k r = case addStack i k stack of
pure $ Left e
Right stack -> Later $ \continue -> do
setIdKeyStatus global database i k (Running (NoShow continue) r)
let go = buildRunMode global stack database r
let go = buildRunMode global stack database i r
fromLater go $ \mode -> liftIO $ addPool PoolStart globalPool $
runKey global stack k r mode $ \res -> do
runKey global stack k r mode $ \result -> do
runLocked database $ do
let val = fmap runValue res
let val = fmap runValue result
res <- liftIO $ getKeyValueFromId database i
w <- case res of
Just (_, Running (NoShow w) _) -> pure w
-- We used to be able to hit here, but we fixed it by ensuring the thread pool workers are all
-- dead _before_ any exception bubbles up
_ -> throwM $ errorInternal $ "expected Waiting but got " ++ maybe "nothing" (statusType . snd) res ++ ", key " ++ show k
setIdKeyStatus global database i k $ either mkError Ready val

-- Make sure that the reverse dependencies are marked to avoid unsoundness
maskLocked $ do
setIdKeyStatus global database i k $ either mkError Ready val
liftIO $ unmarkDirty database i

-- update reverse dependencies efficiently - have they changed since last time?
case result of
Right RunResult{..}
| shakeReverseDependencies globalOptions &&
runChanged `elem` [ChangedRecomputeDiff, ChangedRecomputeSame ] ->
updateReverseDeps i database (depends <$> r) (depends runValue)
_ -> pure ()

w val
case res of
case result of
Right RunResult{..} | runChanged /= ChangedNothing -> setDisk database i k $ Loaded runValue{result=runStore}
_ -> pure ()
where
mkError e = Failed e $ if globalOneShot then Nothing else r



-- | Refresh all the reverse dependencies of an id
updateReverseDeps :: Id -> Database -> Maybe [Depends] -> [Depends] -> Locked ()
updateReverseDeps myId db prev new = {-# SCC "updateReverseDeps" #-} do
let added = foldMap fromDepends new
deleted = [] -- an efficient impl. is expensive in space, so we overestimate for now
forM_ added $ doOne (HashSet.insert myId)
forM_ deleted $ doOne (HashSet.delete myId)
where
doOne f id = do
rdeps <- liftIO $ getReverseDependencies db id
setReverseDependencies db id (f $ fromMaybe mempty rdeps)

-- | Compute the value for a given RunMode and a restore function to run
buildRunMode :: Global -> Stack -> Database -> Maybe (Result a) -> Wait Locked RunMode
buildRunMode global stack database me = do
changed <- case me of
buildRunMode :: Global -> Stack -> Database -> Id -> Maybe (Result a) -> Wait Locked RunMode
buildRunMode global stack database i r = do
changed <- case r of
Nothing -> pure True
Just me -> buildRunDependenciesChanged global stack database me
Just me -> do
isDirty <- liftIO $ if globalUseDirtySet global then isDirty database i else pure True
if isDirty
-- Event if I am dirty, it is still possible that all my dependencies are unchanged
-- thanks to early cutoff, and therefore we must check to avoid redundant work
then buildRunDependenciesChanged global stack database i me
-- If I am not dirty then none of my dependencies are, so they must be unchanged
else do
-- The only exception is rules with a direct dependency on alwaysRerun
lookup <- liftIO $ getIdFromKey database
let alwaysRerunId = lookup $ newKey $ AlwaysRerunQ ()
pure $ case alwaysRerunId of
Nothing -> False
Just id -> any (\(Depends x) -> id `elem` x) (depends me)
pure $ if changed then RunDependenciesChanged else RunDependenciesSame

isDirtyOrAlwaysRerun :: MonadIO m => DatabasePoly Key v -> m (Id -> Bool)
isDirtyOrAlwaysRerun database = do
lookup <- liftIO $ getIdFromKey database
dirtySet <- liftIO $ getDirtySet database
let alwaysRerunId = lookup $ newKey $ AlwaysRerunQ ()
pure $ \id -> Just id == alwaysRerunId || id `HashSet.member` dirtySet

-- | Have the dependencies changed
buildRunDependenciesChanged :: Global -> Stack -> Database -> Result a -> Wait Locked Bool
buildRunDependenciesChanged global stack database me = isJust <$> firstJustM id
[firstJustWaitUnordered (fmap test . lookupOne global stack database) x | Depends x <- depends me]
buildRunDependenciesChanged :: Global -> Stack -> Database -> Id -> Result a -> Wait Locked Bool
buildRunDependenciesChanged global stack database i r = do
isDirty <- isDirtyOrAlwaysRerun database
isJust <$> firstJustM id
[firstJustWaitUnordered (fmap test . lookupOne global stack database) x'
| Depends x <- depends r
, let x' = if globalUseDirtySet global then filter isDirty x else x
]
where
test (Right dep) | changed dep <= built me = Nothing
test (Right dep) | changed dep <= built r = Nothing
test _ = Just ()


Expand Down Expand Up @@ -341,3 +394,25 @@ runIdentify :: Map.HashMap TypeRep BuiltinRule -> Key -> Value -> Maybe BS.ByteS
runIdentify mp k v
| Just BuiltinRule{..} <- Map.lookup (typeKey k) mp = builtinIdentity k v
| otherwise = throwImpure $ errorInternal "runIdentify can't find rule"

-------------------------------------------------------------------------------
-- SPECIAL RULES

-- | Always rerun the associated action. Useful for defining rules that query
-- the environment. For example:
--
-- @
-- \"ghcVersion.txt\" 'Development.Shake.%>' \\out -> do
-- 'alwaysRerun'
-- 'Development.Shake.Stdout' stdout <- 'Development.Shake.cmd' \"ghc --numeric-version\"
-- 'Development.Shake.writeFileChanged' out stdout
-- @
--
-- In @make@, the @.PHONY@ attribute on file-producing rules has a similar effect.
--
-- Note that 'alwaysRerun' is applied when a rule is executed. Modifying an existing rule
-- to insert 'alwaysRerun' will /not/ cause that rule to rerun next time.
alwaysRerun :: Action ()
alwaysRerun = do
historyDisable
apply1 $ AlwaysRerunQ ()
42 changes: 40 additions & 2 deletions src/Development/Shake/Internal/Core/Database.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, RecordWildCards #-}

{-# LANGUAGE TupleSections #-}
module Development.Shake.Internal.Core.Database(
Locked, runLocked,
maskLocked,
DatabasePoly, createDatabase,
mkId,
getValueFromKey, getIdFromKey, getKeyValues, getKeyValueFromId, getKeyValuesFromId,
setMem, setDisk, modifyAllMem
setMem, setDisk, modifyAllMem,
isDirty, getDirtySet, markDirty, unmarkDirty, flushDirty,
getReverseDependencies, setReverseDependencies
) where

import Data.Tuple.Extra
Expand All @@ -20,6 +24,9 @@ import Control.Monad.IO.Class
import qualified General.Ids as Ids
import Control.Monad.Fail
import Prelude
import Control.Exception (mask_)
import Data.HashSet (HashSet)
import qualified Data.HashSet as HSet


newtype Locked a = Locked (IO a)
Expand All @@ -28,6 +35,8 @@ newtype Locked a = Locked (IO a)
runLocked :: DatabasePoly k v -> Locked b -> IO b
runLocked db (Locked act) = withLock (lock db) act

maskLocked :: Locked a -> Locked a
maskLocked (Locked act) = Locked $ mask_ act

-- | Invariant: The database does not have any cycles where a Key depends on itself.
-- Everything is mutable. intern and status must form a bijecttion.
Expand All @@ -37,21 +46,27 @@ data DatabasePoly k v = Database
{lock :: Lock
,intern :: IORef (Intern k) -- ^ Key |-> Id mapping
,status :: Ids.Ids (k, v) -- ^ Id |-> (Key, Status) mapping
,rdeps :: Ids.Ids (HashSet Id) -- ^ Id |-> reverse dependencies
,journal :: Id -> k -> v -> IO () -- ^ Record all changes to status
,vDefault :: v
,clean,dirty :: IORef (HashSet Id)
-- ^ An approximation of the dirty set across runs of 'shakeRunDatabaseForKeys'
}


createDatabase
:: (Eq k, Hashable k)
=> Ids.Ids (k, v)
-> Ids.Ids (HashSet Id)
-> (Id -> k -> v -> IO ())
-> v
-> IO (DatabasePoly k v)
createDatabase status journal vDefault = do
createDatabase status rdeps journal vDefault = do
xs <- Ids.toList status
intern <- newIORef $ Intern.fromList [(k, i) | (i, (k,_)) <- xs]
lock <- newLock
dirty <- newIORef mempty
clean <- newIORef mempty
pure Database{..}


Expand Down Expand Up @@ -80,6 +95,14 @@ getIdFromKey Database{..} = do
is <- readIORef intern
pure $ flip Intern.lookup is

isDirty :: DatabasePoly k v -> Id -> IO Bool
isDirty Database{..} i = HSet.member i <$> readIORef dirty

getDirtySet :: DatabasePoly k v -> IO (HashSet Id)
getDirtySet Database{..} = readIORef dirty

getReverseDependencies :: DatabasePoly k v -> Id -> IO (Maybe (HashSet Id))
getReverseDependencies Database{..} = Ids.lookup rdeps

---------------------------------------------------------------------
-- MUTATING
Expand All @@ -101,10 +124,25 @@ mkId Database{..} k = liftIO $ do
setMem :: DatabasePoly k v -> Id -> k -> v -> Locked ()
setMem Database{..} i k v = liftIO $ Ids.insert status i (k,v)

setReverseDependencies :: DatabasePoly k v -> Id -> HashSet Id -> Locked ()
setReverseDependencies Database{..} = (liftIO.) . Ids.insert rdeps

modifyAllMem :: DatabasePoly k v -> (v -> v) -> Locked ()
modifyAllMem Database{..} f = liftIO $ Ids.forMutate status $ \(k,v) ->
let !v' = f v
in (k, v')

setDisk :: DatabasePoly k v -> Id -> k -> v -> IO ()
setDisk = journal

markDirty :: DatabasePoly k v -> HashSet Id -> IO ()
markDirty Database{..} ids = atomicModifyIORef'_ dirty $ HSet.union ids

unmarkDirty :: DatabasePoly k v -> Id -> IO ()
unmarkDirty Database{..} i = do
atomicModifyIORef'_ clean (HSet.insert i)

flushDirty :: DatabasePoly k v -> Locked ()
flushDirty Database{..} = liftIO $ do
cleanIds <- atomicModifyIORef' clean (mempty,)
atomicModifyIORef'_ dirty (`HSet.difference` cleanIds)