{-# LANGUAGE CPP, BangPatterns, ForeignFunctionInterface #-}
--------------------------------------------------------------------
-- |
-- Module    :  System.IO.Posix.MMap
-- Copyright :  (c) Galois, Inc. 2007
-- License   :  BSD3
--
-- Maintainer:  Don Stewart <dons@galois.com>
-- Stability :  provisional
-- Portability: non-portable -- posix only
--
-- Lazy, chunk-wise memory mapping.
--
-- Memory map a file as a lazy ByteString. Finalisers are associated
-- cached-sized portions of the file, which will be deallocated as
-- those chunks go out of scope.
--
-- Unlike strict Bytestrings, mmapFile for Lazy ByteStrings will
-- deallocate chunks of the file.
--
-- The storage manager is used to free chunks of the mapped memory. When
-- the garbage collector notices there are no further references to 
-- a chunk, a call to munmap is made.
--
-- In effect, the file is mmapped once, lazily, then covered with finalizers
-- for each chunk. When any chunk goes out of scope, that part is
-- deallocated. We must allocate the spine of the structure strictly
-- though, to ensure finalizers are registered for the entire file.
--
-- The Haskell garbage collector decides when to run based on heap
-- pressure, however the mmap stores memory outside the Haskell heap, 
-- so those resources are not counted when deciding to run the garbage
-- collect. The result is that finalizers run less often than you might
-- expect, and it is possible to write a lazy bytestring mmap program 
-- that never deallocates (and thus doesn't run in constant space).
-- 'performGC' or 'finalizerForeignPtr' can be used to trigger collection
-- at sensible points.
--
-- Note: this operation may break referential transparency! If 
-- any other process on the system changes the file when it is mapped
-- into Haskell, the contents of your 'ByteString' will change.
--
module System.IO.Posix.MMap.Lazy (

      unsafeMMapFile -- :: FilePath -> IO ByteString

    ) where

import System.IO.Posix.MMap.Internal

-- import System.IO
import Foreign.C.Types
import Foreign.Ptr
-- import Control.Monad

import Control.Exception
import Data.Word
import Data.ByteString.Lazy.Internal

import System.Posix

--
-- | The 'unsafeMMapFile' function maps a file or device into memory as
-- a lazy ByteString, made of 64*pagesize unmappable chunks of bytes.
--
-- Memory mapped files will behave as if they were read lazily -- 
-- pages from the file will be loaded into memory on demand.
-- 
-- The storage manager is used to free chunks that go out of scope,
-- and unlike strict bytestrings, memory mapped lazy ByteStrings will
-- be deallocated in chunks (so you can write traversals that run in
-- constant space).
--
-- However, the size of the mmapped resource is not known by the Haskell
-- GC, it appears only as a small ForeignPtr. This means that the
-- Haskell GC may not not run as often as you'd like, leading to delays
-- in unmapping chunks.
-- 
-- Appropriate use of performGC or finalizerForeignPtr may be required
-- to ensure deallocation, as resources allocated by mmap are not
-- tracked by the Haskell garbage collector.
--
-- For example, when writing out a lazy bytestring allocated with mmap,
-- you may wish to finalizeForeignPtr when each chunk is written, as the 
-- chunk goes out of scope, rather than relying on the garbage collector
-- to notice the chunk has gone.
--
-- This operation is unsafe: if the file is written to by any other
-- process on the system, the 'ByteString' contents will change in
-- Haskell.
--
unsafeMMapFile :: FilePath -> IO ByteString
unsafeMMapFile :: FilePath -> IO ByteString
unsafeMMapFile FilePath
path = do
    Fd
fd   <- FilePath -> OpenMode -> Maybe FileMode -> OpenFileFlags -> IO Fd
openFd FilePath
path OpenMode
ReadOnly Maybe FileMode
forall a. Maybe a
Nothing OpenFileFlags
defaultFileFlags
    IO () -> IO ByteString -> IO ByteString
forall {b} {a}. IO b -> IO a -> IO a
always (Fd -> IO ()
closeFd Fd
fd) (IO ByteString -> IO ByteString) -> IO ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ do
        FileStatus
stat <- Fd -> IO FileStatus
getFdStatus Fd
fd
        let size :: CSize
size = FileOffset -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral (FileStatus -> FileOffset
fileSize FileStatus
stat)
        Ptr Word8
ptr  <- CSize -> CInt -> IO (Ptr Word8)
c_mmap CSize
size (Fd -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Fd
fd)
        if Ptr Word8
ptr Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Word8
forall a. Ptr a
nullPtr
          then FilePath -> IO ByteString
forall a. HasCallStack => FilePath -> a
error FilePath
"System.IO.Posix.MMap.Lazy: unable to mmap file!"
          else CSize -> Ptr Word8 -> CSize -> IO ByteString
chunks CSize
chunk_size Ptr Word8
ptr (CSize -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
size)
  where
    always :: IO b -> IO a -> IO a
always = (IO a -> IO b -> IO a) -> IO b -> IO a -> IO a
forall a b c. (a -> b -> c) -> b -> a -> c
flip IO a -> IO b -> IO a
forall a b. IO a -> IO b -> IO a
finally

    -- must be page aligned.
    chunk_size :: CSize
chunk_size = CSize
64 CSize -> CSize -> CSize
forall a. Num a => a -> a -> a
* CInt -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
pagesize -- empircally derived

--
-- Break the file up into chunks.
    -- Have separate munmap finalizers for each chunk.
--
chunks :: CSize -> Ptr Word8 -> CSize -> IO ByteString
chunks :: CSize -> Ptr Word8 -> CSize -> IO ByteString
chunks CSize
chunk_size Ptr Word8
p CSize
bytes = Ptr Word8 -> CSize -> IO ByteString
loop Ptr Word8
p CSize
bytes
#ifndef __HADDOCK__
  where
    loop :: Ptr Word8 -> CSize -> IO ByteString
loop !Ptr Word8
ptr !CSize
rest
          | CSize
rest CSize -> CSize -> Bool
forall a. Ord a => a -> a -> Bool
<= CSize
0 = ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
Empty
          | Bool
otherwise = let s :: CSize
s     = CSize -> CSize -> CSize
forall a. Ord a => a -> a -> a
min CSize
chunk_size CSize
rest
                            ptr' :: Ptr b
ptr'  = Ptr Word8
ptr Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
s
                            rest' :: CSize
rest' = CSize
rest CSize -> CSize -> CSize
forall a. Num a => a -> a -> a
- CSize
s
                        in do ByteString
c  <- Ptr Word8 -> CSize -> IO ByteString
unsafePackMMapPtr Ptr Word8
ptr CSize
s
                              ByteString
cs <- Ptr Word8 -> CSize -> IO ByteString
loop Ptr Word8
forall a. Ptr a
ptr' CSize
rest' -- need to be strict
                              ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> ByteString -> ByteString
chunk ByteString
c ByteString
cs)   -- to ensure we cover the whole file
                                                    -- with finalizers
#endif

foreign import ccall unsafe "unistd.h getpagesize"
    pagesize :: CInt