From c4c8bf873028fa788e258b9635e9a6fe826c694a Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 8 Jan 2025 13:43:24 +0300 Subject: [PATCH 1/6] Add global lock on haskell side This way we can run python in python in multithreaded code without deadlocking GC is now performed in separate thread. --- inline-python.cabal | 1 + src/Python/Inline/Literal.hs | 2 +- src/Python/Inline/QQ.hs | 1 - src/Python/Internal/Eval.hs | 192 ++++++++++++++++++++++++++++------- test/TST/Run.hs | 2 + 5 files changed, 159 insertions(+), 39 deletions(-) diff --git a/inline-python.cabal b/inline-python.cabal index 3f6bba4..5588bc6 100644 --- a/inline-python.cabal +++ b/inline-python.cabal @@ -53,6 +53,7 @@ Library , process , transformers >=0.4 , inline-c >=0.9.1 + , stm >=2.4 , template-haskell -any , text >=2 , bytestring diff --git a/src/Python/Inline/Literal.hs b/src/Python/Inline/Literal.hs index f314347..ca86b02 100644 --- a/src/Python/Inline/Literal.hs +++ b/src/Python/Inline/Literal.hs @@ -464,7 +464,7 @@ instance (FromPy a1, FromPy a2, ToPy b) => ToPy (a1 -> a2 -> IO b) where -- | Execute haskell callback function pyCallback :: Program (Ptr PyObject) (Ptr PyObject) -> IO (Ptr PyObject) -pyCallback io = unPy $ ensureGIL $ evalContT io `catch` convertHaskell2Py +pyCallback io = grabPyLock $ unPy $ ensureGIL $ evalContT io `catch` convertHaskell2Py -- | Load argument from python object for haskell evaluation loadArg diff --git a/src/Python/Inline/QQ.hs b/src/Python/Inline/QQ.hs index e8bfa8c..2b82bde 100644 --- a/src/Python/Inline/QQ.hs +++ b/src/Python/Inline/QQ.hs @@ -13,7 +13,6 @@ module Python.Inline.QQ import Language.Haskell.TH.Quote import Python.Internal.EvalQQ -import Python.Internal.Eval -- | Evaluate python code in context of main module. All variables diff --git a/src/Python/Internal/Eval.hs b/src/Python/Internal/Eval.hs index 0b3e14b..f6c1639 100644 --- a/src/Python/Internal/Eval.hs +++ b/src/Python/Internal/Eval.hs @@ -8,6 +8,9 @@ module Python.Internal.Eval ( -- * Evaluator runPy , unPy + -- * Locks + , ensurePyLock + , grabPyLock -- * Initialization , initializePython , finalizePython @@ -30,9 +33,12 @@ module Python.Internal.Eval ) where import Control.Concurrent +import Control.Concurrent.STM +import Control.Monad import Control.Monad.Catch import Control.Monad.IO.Class import Control.Monad.Trans.Cont +import Foreign.Concurrent qualified as GHC import Foreign.Ptr import Foreign.ForeignPtr import Foreign.C.Types @@ -40,6 +46,7 @@ import Foreign.C.String import Foreign.Marshal.Array import Foreign.Storable import System.Environment +import System.IO.Unsafe import Language.C.Inline qualified as C import Language.C.Inline.Unsafe qualified as CU @@ -122,6 +129,74 @@ C.include "" -- Execution of Py monad ---------------------------------------------------------------- +data PyState + = NotInitialized + | InInitialization + | InitFailed + | Running !(Chan (Ptr PyObject)) !ThreadId + | InFinalization + | Finalized + +data PyLock + = LockUninialized + | LockUnlocked + | Locked !ThreadId [ThreadId] + | LockedByGC + | LockFinalized + +globalPyState :: TVar PyState +globalPyState = unsafePerformIO $ newTVarIO NotInitialized +{-# NOINLINE globalPyState #-} + +globalPyLock :: TVar PyLock +globalPyLock = unsafePerformIO $ newTVarIO LockUninialized +{-# NOINLINE globalPyLock #-} + +acquireLock :: ThreadId -> STM () +acquireLock tid = readTVar globalPyLock >>= \case + LockUninialized -> error "Python is not started" + LockFinalized -> error "Python is already stopped" + LockedByGC -> retry + LockUnlocked -> writeTVar globalPyLock $ Locked tid [] + Locked t xs + | t == tid -> writeTVar globalPyLock $ Locked t (t : xs) + | otherwise -> retry + +grabLock :: ThreadId -> STM () +grabLock tid = readTVar globalPyLock >>= \case + LockUninialized -> error "Python is not started" + LockFinalized -> error "Python is already stopped" + LockedByGC -> retry + LockUnlocked -> writeTVar globalPyLock $ Locked tid [] + Locked t xs -> writeTVar globalPyLock $ Locked tid (t : xs) + +releaseLock :: ThreadId -> STM () +releaseLock tid = readTVar globalPyLock >>= \case + LockUninialized -> error "Python is not started" + LockFinalized -> error "Python is already stopped" + LockUnlocked -> error "INTERNAL ERROR releasing unlocked" + LockedByGC -> error "INTERNAL ERROR lock held by GC" + Locked t xs + | t /= tid -> error "INTERNAL ERROR releasing wrong lock" + | otherwise -> writeTVar globalPyLock $! case xs of + [] -> LockUnlocked + t':ts -> Locked t' ts + +ensurePyLock :: IO a -> IO a +ensurePyLock action = do + tid <- myThreadId + bracket_ (atomically $ acquireLock tid) + (atomically $ releaseLock tid) + action + +grabPyLock :: IO a -> IO a +grabPyLock action = do + tid <- myThreadId + bracket_ (atomically $ grabLock tid) + (atomically $ releaseLock tid) + action + + -- | Execute python action. It will be executed with GIL held and -- async exceptions masked. runPy :: Py a -> IO a @@ -133,7 +208,7 @@ runPy py -- We check whether interpreter is initialized. Throw exception if -- it wasn't. Better than segfault isn't it? go = mask_ $ isInitialized >>= \case - True -> unPy (ensureGIL py) + True -> ensurePyLock $ unPy (ensureGIL py) False -> error "Python is not initialized" -- | Execute python action. This function is unsafe and should be only @@ -157,30 +232,17 @@ isInitialized = do -- initialized it's a noop. initializePython :: IO () -- See NOTE: [Python and threading] -initializePython - | rtsSupportsBoundThreads = runInBoundThread $ mask_ $ do - -- In multithreaded RTS we need to release GIL so other threads - -- may take it. - [CU.exp| int { Py_IsInitialized() } |] >>= \case - 0 -> do doInializePython - [CU.exp| void { PyEval_SaveThread() } |] - _ -> pure () - | otherwise = mask_ $ - [CU.exp| int { Py_IsInitialized() } |] >>= \case - 0 -> do doInializePython - [CU.exp| void { PyEval_SaveThread() } |] - _ -> pure () +initializePython = [CU.exp| int { Py_IsInitialized() } |] >>= \case + 0 | rtsSupportsBoundThreads -> runInBoundThread $ mask_ $ doInializePython + | otherwise -> mask_ $ doInializePython + _ -> pure () -- | Destroy python interpreter. finalizePython :: IO () -- See NOTE: [Python and threading] finalizePython - | rtsSupportsBoundThreads = runInBoundThread $ do - [CU.exp| void { PyGILState_Ensure() } |] - mask_ doFinalizePython - | otherwise = mask_ $ do - [CU.exp| void { PyGILState_Ensure() } |] - doFinalizePython + | rtsSupportsBoundThreads = runInBoundThread $ mask_ doFinalizePython + | otherwise = mask_ $ doFinalizePython -- | Bracket which ensures that action is executed with properly -- initialized interpreter @@ -190,7 +252,30 @@ withPython = bracket_ initializePython finalizePython doInializePython :: IO () doInializePython = do - -- NOTE: I'd like more direct access to argv + -- First we need to grab global python lock on haskell side + join $ atomically $ do + readTVar globalPyState >>= \case + Finalized -> error "Python was already finalized" + InitFailed -> error "Python was unable to initialize" + InInitialization -> retry + InFinalization -> retry + Running{} -> pure $ pure () + NotInitialized -> do + writeTVar globalPyState InInitialization + pure $ + (do doInializePythonIO + gc_chan <- newChan + gc_tid <- if + | rtsSupportsBoundThreads -> forkOS $ gcThread gc_chan + | otherwise -> forkIO $ gcThread gc_chan + atomically $ do + writeTVar globalPyState $ Running gc_chan gc_tid + writeTVar globalPyLock $ LockUnlocked + ) `onException` atomically (writeTVar globalPyState InitFailed) + +doInializePythonIO :: IO () +doInializePythonIO = do + -- FIXME: I'd like more direct access to argv argv0 <- getProgName argv <- getArgs let n_argv = fromIntegral $ length argv + 1 @@ -226,6 +311,8 @@ doInializePython = do goto error; }; PyConfig_Clear(&cfg); + // Release GIL so other threads may take it + PyEval_SaveThread(); return 0; // Error case error: @@ -236,12 +323,27 @@ doInializePython = do _ -> error "Failed to initialize interpreter" doFinalizePython :: IO () -doFinalizePython = [C.block| void { - if( Py_IsInitialized() ) { - Py_Finalize(); - } - } |] - +doFinalizePython = join $ atomically $ readTVar globalPyState >>= \case + NotInitialized -> error "Python is not initialized" + InitFailed -> error "Python failed to initialize" + Finalized -> pure $ pure () + InInitialization -> retry + InFinalization -> retry + Running _ gc_tid -> do + readTVar globalPyLock >>= \case + LockUninialized -> error "Internal error: Lock not initialized" + LockFinalized -> error "Internal error: Lock is already finalized" + Locked{} -> retry + LockedByGC -> retry + LockUnlocked -> do + writeTVar globalPyLock LockFinalized + writeTVar globalPyState Finalized + pure $ do + killThread gc_tid + [C.block| void { + PyGILState_Ensure(); + Py_Finalize(); + } |] ---------------------------------------------------------------- -- Creation of PyObject @@ -280,16 +382,32 @@ takeOwnership p = ContT $ \c -> c p `finally` decref p newPyObject :: Ptr PyObject -> Py PyObject -- See NOTE: [GC] newPyObject p = Py $ do - PyObject <$> newForeignPtr fptrXDECREF p - -fptrXDECREF :: FunPtr (Ptr PyObject -> IO ()) -fptrXDECREF = [C.funPtr| void inline_py_fptr_XDECREF(PyObject* p) { - if( Py_IsFinalizing() || !Py_IsInitialized () ) - return; - PyGILState_STATE st = PyGILState_Ensure(); - Py_XDECREF(p); - PyGILState_Release(st); - } |] + fptr <- newForeignPtr_ p + GHC.addForeignPtrFinalizer fptr $ + readTVarIO globalPyState >>= \case + Running ch _ -> writeChan ch p + _ -> pure () + pure $ PyObject fptr + +gcThread :: Chan (Ptr PyObject) -> IO () +gcThread ch = forever $ do + decrefGC =<< readChan ch + +decrefGC :: Ptr PyObject -> IO () +decrefGC p = join $ atomically $ readTVar globalPyLock >>= \case + LockUninialized -> pure $ pure () + LockFinalized -> pure $ pure () + LockedByGC -> pure $ pure () + Locked{} -> retry + LockUnlocked -> do + writeTVar globalPyLock LockedByGC + pure $ do + [CU.block| void { + PyGILState_STATE st = PyGILState_Ensure(); + Py_XDECREF( $(PyObject* p) ); + PyGILState_Release(st); + } |] + atomically $ writeTVar globalPyLock LockUnlocked ---------------------------------------------------------------- -- Conversion of exceptions diff --git a/test/TST/Run.hs b/test/TST/Run.hs index c468e0c..07c0833 100644 --- a/test/TST/Run.hs +++ b/test/TST/Run.hs @@ -3,6 +3,7 @@ module TST.Run(tests) where import Control.Monad +import Control.Monad.IO.Class import Test.Tasty import Test.Tasty.HUnit import Python.Inline @@ -13,6 +14,7 @@ tests :: TestTree tests = testGroup "Run python" [ testCase "Empty QQ" $ runPy [py_| |] , testCase "Second init is noop" $ initializePython + , testCase "Nested runPy" $ runPy $ liftIO $ runPy $ pure () , testCase "Python exceptions are converted" $ runPy $ throwsPy [py_| 1 / 0 |] , testCase "Scope pymain->any" $ runPy $ do [pymain| From ec631d23549bddbc48f0999ab67479097749e6ef Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Tue, 7 Jan 2025 23:18:55 +0300 Subject: [PATCH 2/6] DRAFT --- src/Python/Internal/Eval.hs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/Python/Internal/Eval.hs b/src/Python/Internal/Eval.hs index f6c1639..4e011c2 100644 --- a/src/Python/Internal/Eval.hs +++ b/src/Python/Internal/Eval.hs @@ -63,14 +63,29 @@ C.include "" -- NOTE: [Python and threading] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- --- Python support threading to some extent and it interacts with --- haskell threading in interesting and generally unpleasant way. +-- Python (cpython to be precise) support threading to some extent and +-- it interacts with haskell threading in interesting and generally +-- unpleasant ways. So python's threads are: -- --- 1. Any thread interacting with python interpreter must hold +-- 1. They're OS threads. Python is designed to be embeddable and can +-- live with threads scheduled by outside entity. +-- +-- 2. Any OS thread interacting with python interpreter must hold -- global interpreter lock (GIL) -- --- 2. GIL uses thread local state. +-- 3. GIL uses thread local state. +-- +-- Haskell has two runtimes. Single threaded one doesn't cause any +-- troubles and won't be discussed further. Multithreaded one +-- implement N-M threading and schedules N green thread over M OS +-- threads as it see fit. -- +-- One could think that running python code in bound threads and +-- making sure that GIL is held would suffice. It doesn't. +-- RTS m + +-- + -- This means python must run in bound threads. Or in case of -- single-threaded RTS we could just make safe FFI calls. There's only -- one thread anyway. From a9f3f843c6d0922e0ba8923d9ac1aedb5e35e6dd Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 8 Jan 2025 19:21:31 +0300 Subject: [PATCH 3/6] Document threading Also specialcase GC in single-threaded case --- src/Python/Inline/Literal.hs | 2 +- src/Python/Internal/Eval.hs | 300 +++++++++++++++++++++-------------- 2 files changed, 179 insertions(+), 123 deletions(-) diff --git a/src/Python/Inline/Literal.hs b/src/Python/Inline/Literal.hs index ca86b02..613f7a7 100644 --- a/src/Python/Inline/Literal.hs +++ b/src/Python/Inline/Literal.hs @@ -464,7 +464,7 @@ instance (FromPy a1, FromPy a2, ToPy b) => ToPy (a1 -> a2 -> IO b) where -- | Execute haskell callback function pyCallback :: Program (Ptr PyObject) (Ptr PyObject) -> IO (Ptr PyObject) -pyCallback io = grabPyLock $ unPy $ ensureGIL $ evalContT io `catch` convertHaskell2Py +pyCallback io = callbackEnsurePyLock $ unPy $ ensureGIL $ evalContT io `catch` convertHaskell2Py -- | Load argument from python object for haskell evaluation loadArg diff --git a/src/Python/Internal/Eval.hs b/src/Python/Internal/Eval.hs index 4e011c2..b6f3a9d 100644 --- a/src/Python/Internal/Eval.hs +++ b/src/Python/Internal/Eval.hs @@ -5,18 +5,19 @@ -- | -- Evaluation of python expressions. module Python.Internal.Eval - ( -- * Evaluator - runPy - , unPy - -- * Locks - , ensurePyLock - , grabPyLock + ( -- * Locks + ensurePyLock + , callbackEnsurePyLock -- * Initialization , initializePython , finalizePython , withPython - -- * PyObject wrapper + -- * Evaluator + , runPy + , unPy + -- * GC-related , newPyObject + -- * C-API wrappers , decref , incref , takeOwnership @@ -63,60 +64,68 @@ C.include "" -- NOTE: [Python and threading] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- --- Python (cpython to be precise) support threading to some extent and --- it interacts with haskell threading in interesting and generally --- unpleasant ways. So python's threads are: +-- Python (cpython to be precise) support threading to and it +-- interacts with haskell threading in interesting and generally +-- unpleasant ways. In short python's threads are: -- --- 1. They're OS threads. Python is designed to be embeddable and can --- live with threads scheduled by outside entity. +-- 1. OS threads. Python is designed to be embeddable and can +-- live with threads scheduled by outside python's runtime. -- -- 2. Any OS thread interacting with python interpreter must hold --- global interpreter lock (GIL) +-- global interpreter lock (GIL). -- -- 3. GIL uses thread local state. -- -- Haskell has two runtimes. Single threaded one doesn't cause any -- troubles and won't be discussed further. Multithreaded one --- implement N-M threading and schedules N green thread over M OS +-- implement N-M threading and schedules N green thread on M OS -- threads as it see fit. -- -- One could think that running python code in bound threads and --- making sure that GIL is held would suffice. It doesn't. --- RTS m - +-- making sure that GIL is held would suffice. It doesn't. Doing so +-- would quickly results in deadlock. Exact reason for that is not +-- understood. -- - --- This means python must run in bound threads. Or in case of --- single-threaded RTS we could just make safe FFI calls. There's only --- one thread anyway. +-- Another problem is GHC may schedule two threads each running python +-- code on same capability. They won't have any problems taking GIL +-- and will run concurrently stepping on each other's toes. +-- +-- Only way to solve this problem is to introduce another lock on +-- haskell side. It's visible to haskell RTS so we won't get deadlocks +-- and it makes sure that only one haskell thread interacts with +-- python at a time. -- --- In order to track code with such requirements `Py` monad (just --- newtype over `IO`). All code interacting with Python must live in --- it. In addition it should run with async exceptions masked since --- writing code which works with bare pointers AND safe in presence of --- async exceptions is almost impossible. -- -- --- And this is not end of our problems with threading. Python --- designate thread in which it was initialized as a main thread. It --- has special status but if don't take precautions we don't know --- which haskell thread it is. +-- Also python designate thread in which python interpreter was +-- initialized as a main thread. It has special status for example +-- some libraries may run only in main thread (e.g. tkinter). But if +-- we don't take special precautions we won't know which thread it +-- is. +-- +-- +-- +-- There's of course question how well python threading interacts with +-- haskell. No one knows, probably it won't work well. -- NOTE: [GC] -- ~~~~~~~~~~ -- --- CPython uses reference counting which works very well with +-- CPython uses reference counting which should work very well with -- ForeignPtr. But there's a catch: decrementing counter is only --- possible if one holds GIL. And one could not touch GIL if --- interpreter is not initialized or being finalized. +-- possible if one holds GIL. Taking GIL may block and doing so during +-- GC may eventually will block GC thread and the whole program. -- --- We do not need to care whether thread is bound or not since this is --- single C call which will not getting migrated. +-- Current solution is not quite satisfactory: finalizer writes +-- pointer to `Chan` which delivers it to thread which decrements +-- counter. It's not very good solution since we need to take locks +-- for each DECREF which is relatively costly (O(1μs)). But better +-- solutions are not obvious. -- --- Still it's a question whether it's OK to call blocking code in --- ForeignPtr's finalizers. +-- Problem above is only relevant for multithreaded RTS there's no +-- other threads that could hold lock and taking GIL can't fail. @@ -141,31 +150,73 @@ C.include "" ---------------------------------------------------------------- --- Execution of Py monad +-- Lock and global state ---------------------------------------------------------------- +globalPyState :: TVar PyState +globalPyState = unsafePerformIO $ newTVarIO NotInitialized +{-# NOINLINE globalPyState #-} + +globalPyLock :: TVar PyLock +globalPyLock = unsafePerformIO $ newTVarIO LockUninialized +{-# NOINLINE globalPyLock #-} + + +-- | State of python interpreter data PyState = NotInitialized + -- ^ Initialization is not done. Initial state. | InInitialization + -- ^ Interpreter is being initialized. | InitFailed - | Running !(Chan (Ptr PyObject)) !ThreadId + -- ^ Initialization was attempted but failed for whatever reason. + | Running !(Chan (Ptr PyObject)) !(Maybe ThreadId) + -- ^ Interpreter is running | InFinalization + -- ^ Interpreter is being finalized. | Finalized + -- ^ Interpreter was shut down. + +-- | Lock. It's necessary for lock to reentrant since thread may take +-- it several times for example by nesting 'runPy'. We use +-- 'ThreadId' as a key to figure out whether thread may retake lock +-- or not. +-- +-- Another special case is callbacks. Callback (via 'FunPtr') will +-- start new haskell thread so we need to add primitive for grabbing +-- lock regardless of current 'ThreadId' data PyLock = LockUninialized + -- ^ There's no interpreter and lock does not exist. | LockUnlocked + -- ^ Lock could be taked | Locked !ThreadId [ThreadId] | LockedByGC | LockFinalized + deriving Show -globalPyState :: TVar PyState -globalPyState = unsafePerformIO $ newTVarIO NotInitialized -{-# NOINLINE globalPyState #-} +-- | Execute code ensuring that python lock is held by current thread. +ensurePyLock :: IO a -> IO a +ensurePyLock action = do + tid <- myThreadId + bracket_ (atomically $ acquireLock tid) + (atomically $ releaseLock tid) + action + +-- | Retake lock regardless of thread which hold lock. Lock must be +-- already taken. Caller must make sure that thread holding lock is +-- block for duration of action. +-- +-- This is very unsafe. It must be used only in callbacks from +-- python to haskell +callbackEnsurePyLock :: IO a -> IO a +callbackEnsurePyLock action = do + tid <- myThreadId + bracket_ (atomically $ grabLock tid) + (atomically $ releaseLock tid) + action -globalPyLock :: TVar PyLock -globalPyLock = unsafePerformIO $ newTVarIO LockUninialized -{-# NOINLINE globalPyLock #-} acquireLock :: ThreadId -> STM () acquireLock tid = readTVar globalPyLock >>= \case @@ -197,54 +248,15 @@ releaseLock tid = readTVar globalPyLock >>= \case [] -> LockUnlocked t':ts -> Locked t' ts -ensurePyLock :: IO a -> IO a -ensurePyLock action = do - tid <- myThreadId - bracket_ (atomically $ acquireLock tid) - (atomically $ releaseLock tid) - action - -grabPyLock :: IO a -> IO a -grabPyLock action = do - tid <- myThreadId - bracket_ (atomically $ grabLock tid) - (atomically $ releaseLock tid) - action - - --- | Execute python action. It will be executed with GIL held and --- async exceptions masked. -runPy :: Py a -> IO a --- See NOTE: [Python and threading] -runPy py - | rtsSupportsBoundThreads = runInBoundThread go -- Multithreaded RTS - | otherwise = go -- Single-threaded RTS - where - -- We check whether interpreter is initialized. Throw exception if - -- it wasn't. Better than segfault isn't it? - go = mask_ $ isInitialized >>= \case - True -> ensurePyLock $ unPy (ensureGIL py) - False -> error "Python is not initialized" - --- | Execute python action. This function is unsafe and should be only --- called in thread of interpreter. -unPy :: Py a -> IO a -unPy (Py io) = io - - -isInitialized :: IO Bool -isInitialized = do - i <- [CU.exp| int { !Py_IsFinalizing() && Py_IsInitialized() } |] - pure $! i /= 0 - ---------------------------------------------------------------- --- Initialization of interpreter +-- Initialization and finalization ---------------------------------------------------------------- -- | Initialize python interpreter. If interpreter is already --- initialized it's a noop. +-- initialized it's a noop. Calling after python was shut down will +-- result in error. initializePython :: IO () -- See NOTE: [Python and threading] initializePython = [CU.exp| int { Py_IsInitialized() } |] >>= \case @@ -281,8 +293,8 @@ doInializePython = do (do doInializePythonIO gc_chan <- newChan gc_tid <- if - | rtsSupportsBoundThreads -> forkOS $ gcThread gc_chan - | otherwise -> forkIO $ gcThread gc_chan + | rtsSupportsBoundThreads -> Just <$> forkOS (gcThread gc_chan) + | otherwise -> pure Nothing atomically $ do writeTVar globalPyState $ Running gc_chan gc_tid writeTVar globalPyLock $ LockUnlocked @@ -354,44 +366,40 @@ doFinalizePython = join $ atomically $ readTVar globalPyState >>= \case writeTVar globalPyLock LockFinalized writeTVar globalPyState Finalized pure $ do - killThread gc_tid + mapM_ killThread gc_tid [C.block| void { PyGILState_Ensure(); Py_Finalize(); } |] + ---------------------------------------------------------------- --- Creation of PyObject +-- Running Py monad ---------------------------------------------------------------- -decref :: Ptr PyObject -> Py () -decref p = Py [CU.exp| void { Py_DECREF($(PyObject* p)) } |] - -incref :: Ptr PyObject -> Py () -incref p = Py [CU.exp| void { Py_INCREF($(PyObject* p)) } |] +-- | Execute python action. It will take global lock and no other +-- python action could start execution until one currently running +-- finished execution normally or with exception. +runPy :: Py a -> IO a +-- See NOTE: [Python and threading] +runPy py + | rtsSupportsBoundThreads = runInBoundThread go -- Multithreaded RTS + | otherwise = go -- Single-threaded RTS + where + -- We check whether interpreter is initialized. Throw exception if + -- it wasn't. Better than segfault isn't it? + go = ensurePyLock $ unPy (ensureGIL py) --- | Ensure that we hold GIL for duration of action -ensureGIL :: Py a -> Py a -ensureGIL action = do - -- NOTE: We're cheating here and looking behind the veil. - -- PyGILState_STATE is defined as enum. Let hope it will stay - -- this way. - gil_state <- Py [CU.exp| int { PyGILState_Ensure() } |] - action `finally` Py [CU.exp| void { PyGILState_Release($(int gil_state)) } |] +-- | Execute python action. This function is unsafe and should be only +-- called in thread of interpreter. +unPy :: Py a -> IO a +unPy (Py io) = io --- | Drop GIL temporarily -dropGIL :: IO a -> Py a -dropGIL action = do - -- NOTE: We're cheating here and looking behind the veil. - -- PyGILState_STATE is defined as enum. Let hope it will stay - -- this way. - st <- Py [CU.exp| PyThreadState* { PyEval_SaveThread() } |] - Py $ action `finally` [CU.exp| void { PyEval_RestoreThread($(PyThreadState *st)) } |] --- | Decrement reference counter at end of ContT block -takeOwnership :: Ptr PyObject -> Program r (Ptr PyObject) -takeOwnership p = ContT $ \c -> c p `finally` decref p +---------------------------------------------------------------- +-- GC-related functions +---------------------------------------------------------------- -- | Wrap raw python object into newPyObject :: Ptr PyObject -> Py PyObject @@ -400,10 +408,14 @@ newPyObject p = Py $ do fptr <- newForeignPtr_ p GHC.addForeignPtrFinalizer fptr $ readTVarIO globalPyState >>= \case - Running ch _ -> writeChan ch p + Running ch _ + | rtsSupportsBoundThreads -> writeChan ch p + | otherwise -> singleThreadedDecrefCG p _ -> pure () pure $ PyObject fptr +-- | Thread doing garbage collection for python object in +-- multithreaded runtime. gcThread :: Chan (Ptr PyObject) -> IO () gcThread ch = forever $ do decrefGC =<< readChan ch @@ -417,12 +429,56 @@ decrefGC p = join $ atomically $ readTVar globalPyLock >>= \case LockUnlocked -> do writeTVar globalPyLock LockedByGC pure $ do - [CU.block| void { - PyGILState_STATE st = PyGILState_Ensure(); - Py_XDECREF( $(PyObject* p) ); - PyGILState_Release(st); - } |] - atomically $ writeTVar globalPyLock LockUnlocked + gcDecref p `finally` atomically (writeTVar globalPyLock LockUnlocked) + +singleThreadedDecrefCG :: Ptr PyObject -> IO () +singleThreadedDecrefCG p = readTVarIO globalPyLock >>= \case + LockUninialized -> pure () + LockFinalized -> pure () + LockedByGC -> gcDecref p + Locked{} -> gcDecref p + LockUnlocked -> gcDecref p + +gcDecref :: Ptr PyObject -> IO () +gcDecref p = [CU.block| void { + PyGILState_STATE st = PyGILState_Ensure(); + Py_XDECREF( $(PyObject* p) ); + PyGILState_Release(st); + } |] + + +---------------------------------------------------------------- +-- C-API wrappers +---------------------------------------------------------------- + +decref :: Ptr PyObject -> Py () +decref p = Py [CU.exp| void { Py_DECREF($(PyObject* p)) } |] + +incref :: Ptr PyObject -> Py () +incref p = Py [CU.exp| void { Py_INCREF($(PyObject* p)) } |] + +-- | Ensure that we hold GIL for duration of action +ensureGIL :: Py a -> Py a +ensureGIL action = do + -- NOTE: We're cheating here and looking behind the veil. + -- PyGILState_STATE is defined as enum. Let hope it will stay + -- this way. + gil_state <- Py [CU.exp| int { PyGILState_Ensure() } |] + action `finally` Py [CU.exp| void { PyGILState_Release($(int gil_state)) } |] + +-- | Drop GIL temporarily +dropGIL :: IO a -> Py a +dropGIL action = do + -- NOTE: We're cheating here and looking behind the veil. + -- PyGILState_STATE is defined as enum. Let hope it will stay + -- this way. + st <- Py [CU.exp| PyThreadState* { PyEval_SaveThread() } |] + Py $ action `finally` [CU.exp| void { PyEval_RestoreThread($(PyThreadState *st)) } |] + +-- | Decrement reference counter at end of ContT block +takeOwnership :: Ptr PyObject -> Program r (Ptr PyObject) +takeOwnership p = ContT $ \c -> c p `finally` decref p + ---------------------------------------------------------------- -- Conversion of exceptions From b5c2146d1f09ed3ccec796070d5e8714f13c47d2 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 8 Jan 2025 20:07:25 +0300 Subject: [PATCH 4/6] Update haddocks --- README.md | 3 +- src/Python/Inline.hs | 78 ++++++++++++++++++++++++++++++++++-- src/Python/Inline/Literal.hs | 9 ++++- test/TST/FromPy.hs | 1 + test/TST/Util.hs | 1 + 5 files changed, 85 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2181894..a4968c0 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ main = withPython $ do let input = [1..10] :: [Int] let square :: Int -> IO Int square x = pure (x * x) - print =<< fromPy' @[Int] =<< [pye| [ square_hs(x) for x in input_hs ] |] + print =<< runPy $ do + fromPy' @[Int] =<< [pye| [ square_hs(x) for x in input_hs ] |] ``` it would output: diff --git a/src/Python/Inline.hs b/src/Python/Inline.hs index cbe6942..2d20549 100644 --- a/src/Python/Inline.hs +++ b/src/Python/Inline.hs @@ -1,6 +1,42 @@ --- | +-- | This library allows to embed as quasiquotes and execute arbitrary +-- python code in haskell programs. Take for example following program: +-- +-- > {-# LANGUAGE QuasiQuotes #-} +-- > import Python.Inline +-- > import Python.Inline.QQ +-- > +-- > main :: IO () +-- > main = withPython $ do +-- > let input = [1..10] :: [Int] +-- > let square :: Int -> Py Int +-- > square x = pure (x * x) +-- > print =<< runPy $ do +-- > fromPy' @[Int] =<< [pye| [ square_hs(x) for x in input_hs ] |] +-- +-- Quasiquotation 'Python.Inline.QQ.pye' captures variables @input@ +-- and @square@ from environment and produces python object which +-- `fromPy'` converts to haskell list. As one expect it would output: +-- +-- > [1,4,9,16,25,36,49,64,81,100] +-- +-- Module "Python.Inline.QQ" provides several quasiquoters with +-- different semantics but general rules are: +-- +-- 1. All python variables ending with @_hs@ are captured from +-- environment and converted to python objects according to their +-- 'ToPy' instance. +-- +-- 2. Syntax errors in embedded python will be caught during +-- compilation. +-- +-- 3. All code interacting with python must be in 'Py' monad which +-- could be run using 'runPy'. +-- +-- 4. Python interpreter must be initialized before calling any +-- python code. module Python.Inline ( -- * Interpreter initialization + -- $initialization initializePython , finalizePython , withPython @@ -8,8 +44,8 @@ module Python.Inline , Py , runPy , PyObject - , PyError(..) -- * Conversion between haskell and python + -- $conversion , toPy , fromPyEither , fromPy @@ -18,8 +54,42 @@ module Python.Inline , FromPy ) where - import Python.Types import Python.Inline.Literal - import Python.Internal.Eval + + +-- $initialization +-- +-- Python supports being initialized and shut down multiple times. +-- This however has caveats. Quoting it documentation: +-- +-- > Bugs and caveats: The destruction of modules and objects in +-- > modules is done in random order; this may cause destructors +-- > (__del__() methods) to fail when they depend on other objects +-- > (even functions) or modules. Dynamically loaded extension +-- > modules loaded by Python are not unloaded. Small amounts of +-- > memory allocated by the Python interpreter may not be freed (if +-- > you find a leak, please report it). Memory tied up in circular +-- > references between objects is not freed. Some memory allocated +-- > by extension modules may not be freed. Some extensions may not +-- > work properly if their initialization routine is called more +-- > than once. +-- +-- More importantly for this library. All pointers held by 'PyObject' +-- becomes invalid after interpreter is shut down. If GC tries to run +-- finalizers after interpreter is intialized again program will +-- surely segfault. +-- +-- For that reason it's only possible to initialize python once and +-- attempts to initialize python after is was shut down will raise +-- exceptions. + + +-- $conversion +-- +-- Python objects are opaque blobs and accessing them may involve +-- running arbitrary python code. Most notable iteration protocol or +-- any of dunder methods. For that reason conversion from python to +-- haskell must happen in 'Py' monad. Conversion also always performs +-- full copy. Conversion from haskell to python is stateful as well. diff --git a/src/Python/Inline/Literal.hs b/src/Python/Inline/Literal.hs index 613f7a7..c1acd8d 100644 --- a/src/Python/Inline/Literal.hs +++ b/src/Python/Inline/Literal.hs @@ -294,6 +294,7 @@ instance (ToPy a, ToPy b) => ToPy (a,b) where p_b <- takeOwnership =<< checkNull (basicToPy b) liftIO [CU.exp| PyObject* { PyTuple_Pack(2, $(PyObject* p_a), $(PyObject* p_b)) } |] +-- | Will accept any iterable instance (FromPy a, FromPy b) => FromPy (a,b) where basicFromPy p_tup = evalContT $ do -- Unpack 2-tuple. @@ -318,6 +319,7 @@ instance (ToPy a, ToPy b, ToPy c) => ToPy (a,b,c) where liftIO [CU.exp| PyObject* { PyTuple_Pack(3, $(PyObject *p_a), $(PyObject *p_b), $(PyObject *p_c)) } |] +-- | Will accept any iterable instance (FromPy a, FromPy b, FromPy c) => FromPy (a,b,c) where basicFromPy p_tup = evalContT $ do -- Unpack 3-tuple. @@ -345,6 +347,7 @@ instance (ToPy a, ToPy b, ToPy c, ToPy d) => ToPy (a,b,c,d) where liftIO [CU.exp| PyObject* { PyTuple_Pack(4, $(PyObject *p_a), $(PyObject *p_b), $(PyObject *p_c), $(PyObject *p_d)) } |] +-- | Will accept any iterable instance (FromPy a, FromPy b, FromPy c, FromPy d) => FromPy (a,b,c,d) where basicFromPy p_tup = evalContT $ do -- Unpack 3-tuple. @@ -368,6 +371,7 @@ instance (FromPy a, FromPy b, FromPy c, FromPy d) => FromPy (a,b,c,d) where instance (ToPy a) => ToPy [a] where basicToPy = basicListToPy +-- | Will accept any iterable instance (FromPy a) => FromPy [a] where basicFromPy p_list = do p_iter <- Py [CU.block| PyObject* { @@ -427,6 +431,7 @@ instance (FromPy a) => FromPy [a] where -- with async exception out of the blue +-- | Converted to 0-ary function instance (ToPy b) => ToPy (IO b) where basicToPy f = Py $ do -- @@ -436,6 +441,7 @@ instance (ToPy b) => ToPy (IO b) where [CU.exp| PyObject* { inline_py_callback_METH_NOARGS($(PyCFunction f_ptr)) } |] +-- | Only accepts positional parameters instance (FromPy a, Show a, ToPy b) => ToPy (a -> IO b) where basicToPy f = Py $ do -- @@ -445,7 +451,7 @@ instance (FromPy a, Show a, ToPy b) => ToPy (a -> IO b) where -- [CU.exp| PyObject* { inline_py_callback_METH_O($(PyCFunction f_ptr)) } |] - +-- | Only accepts positional parameters instance (FromPy a1, FromPy a2, ToPy b) => ToPy (a1 -> a2 -> IO b) where basicToPy f = Py $ do -- @@ -507,7 +513,6 @@ raiseBadNArgs expected got = Py [CU.block| PyObject* { } |] - type FunWrapper a = a -> IO (FunPtr a) foreign import ccall "wrapper" wrapCFunction diff --git a/test/TST/FromPy.hs b/test/TST/FromPy.hs index 5f3279b..17bfed8 100644 --- a/test/TST/FromPy.hs +++ b/test/TST/FromPy.hs @@ -7,6 +7,7 @@ import Test.Tasty import Test.Tasty.HUnit import Python.Inline import Python.Inline.QQ +import Python.Types tests :: TestTree tests = testGroup "FromPy" diff --git a/test/TST/Util.hs b/test/TST/Util.hs index dad133f..8843f2b 100644 --- a/test/TST/Util.hs +++ b/test/TST/Util.hs @@ -6,6 +6,7 @@ import Control.Monad.Catch import Test.Tasty.HUnit import Python.Inline +import Python.Types throwsPy :: Py () -> Py () throwsPy io = (io >> liftIO (assertFailure "Evaluation should raise python exception")) From 4cc72e22298247c3a7187ce84f89a1c4063fabb2 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 8 Jan 2025 20:09:43 +0300 Subject: [PATCH 5/6] Run threaded tests using 2 threads --- inline-python.cabal | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inline-python.cabal b/inline-python.cabal index 5588bc6..9e8df87 100644 --- a/inline-python.cabal +++ b/inline-python.cabal @@ -96,10 +96,12 @@ library test TST.Roundtrip TST.Util +-- Running tests using several threads does very good job at finding threading +-- bugs. Especially deadlocks test-suite inline-python-tests import: language type: exitcode-stdio-1.0 - Ghc-options: -threaded + Ghc-options: -threaded -with-rtsopts=-N2 hs-source-dirs: test/exe main-is: main.hs build-depends: base From fc8b0003843ceb9835f6a2fe7fe3843c742d3262 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 8 Jan 2025 20:11:03 +0300 Subject: [PATCH 6/6] Attempt to add timeout of test --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ffa05dd..8e9a92f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,4 +70,4 @@ jobs: # ---------------- - name: Test run: | - if [ "${{ matrix.skip-test }}" == "" ]; then cabal test all; fi + if [ "${{ matrix.skip-test }}" == "" ]; then timeout 3m cabal test all; fi