-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit de91738
Showing
8 changed files
with
1,081 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
module Main where | ||
|
||
import Criterion.Main | ||
|
||
main :: IO () | ||
main = defaultMain [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} | ||
-- Consider splitting off as a separate package | ||
-- Copyright (c) 2008 Gustav Munkby | ||
-- Copyright (c) 2008 Jean-Philippe Bernardy | ||
|
||
-- | This module defines a Rope representation. | ||
|
||
-- While the representation are ByteStrings stored in a finger tree, the indices | ||
-- are actually in number of characters. | ||
|
||
-- This is currently based on utf8-string, but a couple of other packages might be | ||
-- better: text, compact-string. | ||
|
||
-- At the moment none of them has a lazy | ||
-- implementation, which forces us to always export plain Strings. | ||
-- (Utf8-string does not have a proper newtype) | ||
|
||
module Yi.OldRope ( | ||
Rope, | ||
|
||
-- * Conversions to Rope | ||
fromString, | ||
|
||
-- * Conversions from Rope | ||
toString, toReverseString, | ||
|
||
-- * List-like functions | ||
null, empty, take, drop, length, reverse, countNewLines, | ||
|
||
split, splitAt, splitAtLine, | ||
|
||
append, concat, | ||
|
||
-- * IO | ||
readFile, writeFile, | ||
|
||
-- * Low level functions | ||
splitAtChunkBefore | ||
) where | ||
|
||
import Prelude hiding (null, head, tail, length, take, drop, splitAt, head, tail, foldl, reverse, readFile, writeFile, concat) | ||
import qualified Data.List as L | ||
|
||
import qualified Data.ByteString.UTF8 as B | ||
import qualified Data.ByteString as B (append, concat) | ||
import qualified Data.ByteString as Byte | ||
import Data.ByteString (ByteString) | ||
import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, readFile, split) | ||
import qualified Data.ByteString.Lazy.UTF8 as LB | ||
|
||
import qualified Data.FingerTree as T | ||
import Data.FingerTree hiding (null, empty, reverse, split) | ||
|
||
import Data.Binary | ||
import Data.Char (ord) | ||
import Data.Monoid | ||
import Data.String (IsString(..)) | ||
|
||
import System.IO.Cautious (writeFileL) | ||
|
||
defaultChunkSize :: Int | ||
defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256) | ||
|
||
-- The FingerTree does not store measurements for single chunks, which | ||
-- means that the length of chunks often have to be recomputed. | ||
mkChunk :: ByteString -> Chunk | ||
mkChunk s = Chunk (fromIntegral $ B.length s) s | ||
data Chunk = Chunk { chunkSize :: {-# UNPACK #-} !Word8, fromChunk :: {-# UNPACK #-} !ByteString } | ||
deriving (Eq, Show) | ||
|
||
data Size = Indices {charIndex :: {-# UNPACK #-} !Int, lineIndex :: {-# UNPACK #-} !Int} -- lineIndex is lazy because we do not often want the line count. However, we need this to avoid stack overflows on large files! | ||
deriving Show | ||
|
||
instance Monoid Size where | ||
mempty = Indices 0 0 | ||
mappend (Indices c1 l1) (Indices c2 l2) = Indices (c1+c2) (l1+l2) | ||
|
||
newtype Rope = Rope { fromRope :: FingerTree Size Chunk } | ||
deriving (Eq, Show) | ||
|
||
(-|) :: Chunk -> FingerTree Size Chunk -> FingerTree Size Chunk | ||
b -| t | chunkSize b == 0 = t | ||
| otherwise = b <| t | ||
|
||
(|-) :: FingerTree Size Chunk -> Chunk -> FingerTree Size Chunk | ||
t |- b | chunkSize b == 0 = t | ||
| otherwise = t |> b | ||
|
||
-- Newlines are preserved by UTF8 encoding and decoding | ||
newline :: Word8 | ||
newline = fromIntegral (ord '\n') | ||
|
||
instance Measured Size Chunk where | ||
measure (Chunk l s) = Indices (fromIntegral l) -- note that this is the length in characters, not bytes. | ||
(Byte.count newline s) | ||
|
||
-- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', so the 'foldr' needed for 'toList' is inefficient, | ||
-- and can cause stack overflows. So, we roll our own (somewhat inefficient) version of 'toList' to avoid this. | ||
toList :: Measured v a => FingerTree v a -> [a] | ||
toList t = case viewl t of | ||
c :< cs -> c : toList cs | ||
EmptyL -> [] | ||
|
||
toLazyByteString :: Rope -> LB.ByteString | ||
toLazyByteString = LB.fromChunks . fmap fromChunk . toList . fromRope | ||
|
||
reverse :: Rope -> Rope | ||
reverse = Rope . fmap' (mkChunk . B.fromString . L.reverse . B.toString . fromChunk) . T.reverse . fromRope | ||
|
||
toReverseString :: Rope -> String | ||
toReverseString = concatMap (L.reverse . B.toString . fromChunk) . toList . T.reverse . fromRope | ||
|
||
toString :: Rope -> String | ||
toString = LB.toString . toLazyByteString | ||
|
||
fromLazyByteString :: LB.ByteString -> Rope | ||
fromLazyByteString = Rope . toTree T.empty | ||
where | ||
toTree acc b | LB.null b = acc | ||
| otherwise = let (h,t) = LB.splitAt (fromIntegral defaultChunkSize) b | ||
chunk = mkChunk $ B.concat $ LB.toChunks h | ||
in acc `seq` chunk `seq` toTree (acc |> chunk) t | ||
|
||
instance IsString Rope where | ||
fromString = Rope . toTree T.empty | ||
where | ||
toTree acc [] = acc | ||
toTree acc b = let (h,t) = L.splitAt defaultChunkSize b | ||
chunk = mkChunk $ B.fromString h | ||
in acc `seq` chunk `seq` toTree (acc |> chunk) t | ||
|
||
null :: Rope -> Bool | ||
null (Rope a) = T.null a | ||
|
||
empty :: Rope | ||
empty = Rope T.empty | ||
|
||
-- | Get the length of the string. (This information cached, so O(1) amortized runtime.) | ||
length :: Rope -> Int | ||
length = charIndex . measure . fromRope | ||
|
||
-- | Count the number of newlines in the strings. (This information cached, so O(1) amortized runtime.) | ||
countNewLines :: Rope -> Int | ||
countNewLines = lineIndex . measure . fromRope | ||
|
||
-- | Append two strings by merging the two finger trees. | ||
append :: Rope -> Rope -> Rope | ||
append (Rope a) (Rope b) = Rope $ | ||
case T.viewr a of | ||
EmptyR -> b | ||
l :> Chunk len x -> case T.viewl b of | ||
EmptyL -> a | ||
Chunk len' x' :< r -> if fromIntegral len + fromIntegral len' < defaultChunkSize | ||
then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r | ||
else a >< b | ||
|
||
concat :: [Rope] -> Rope | ||
concat = L.foldl' append empty | ||
|
||
take, drop :: Int -> Rope -> Rope | ||
take n = fst . splitAt n | ||
drop n = snd . splitAt n | ||
|
||
-- | Split the string at the specified position. | ||
splitAt :: Int -> Rope -> (Rope, Rope) | ||
splitAt n (Rope t) = | ||
case T.viewl c of | ||
Chunk len x :< r | n' /= 0 -> | ||
let (lx, rx) = B.splitAt n' x in (Rope $ l |> Chunk (fromIntegral n') lx, Rope $ Chunk (len - fromIntegral n') rx -| r) | ||
_ -> (Rope l, Rope c) | ||
where | ||
(l, c) = T.split ((> n) . charIndex) t | ||
n' = n - charIndex (measure l) | ||
|
||
-- | Split the rope on a chunk, so that the desired | ||
-- position lies within the first chunk of the second rope. | ||
splitAtChunkBefore :: Int -> Rope -> (Rope, Rope) | ||
splitAtChunkBefore n (Rope t) = | ||
let (l, c) = T.split ((> n) . charIndex) t in (Rope l, Rope c) | ||
|
||
-- | Split before the specified line. Lines are indexed from 0. | ||
splitAtLine :: Int -> Rope -> (Rope, Rope) | ||
splitAtLine n | n <= 0 = \r -> (empty, r) | ||
| otherwise = splitAtLine' (n-1) | ||
|
||
-- | Split after the specified line. Lines are indexed from 0. | ||
splitAtLine' :: Int -> Rope -> (Rope, Rope) | ||
splitAtLine' n (Rope t) = | ||
case T.viewl c of | ||
ch@(Chunk _ x) :< r -> | ||
let (lx, rx) = cutExcess excess x | ||
excess = lineIndex (measure l) + lineIndex (measure ch) - n - 1 | ||
in (Rope $ l |- mkChunk lx, Rope $ mkChunk rx -| r) | ||
_ -> (Rope l, Rope c) | ||
where | ||
(l, c) = T.split ((n <) . lineIndex) t | ||
|
||
split :: Word8 -> Rope -> [Rope] | ||
split c = map fromLazyByteString . LB.split c . toLazyByteString | ||
|
||
cutExcess :: Int -> ByteString -> (ByteString, ByteString) | ||
cutExcess i s = let idx = gt i $ L.reverse $ Byte.elemIndices newline s | ||
in Byte.splitAt (idx+1) s -- take one extra byte to that the newline is found on the left. | ||
where gt _ [] = Byte.length s | ||
gt 0 (x:_ ) = x | ||
gt n (_:xs) = gt (n-1) xs | ||
|
||
|
||
instance Binary Rope where | ||
put = put . toString | ||
get = fromString `fmap` get | ||
|
||
|
||
writeFile :: FilePath -> Rope -> IO () | ||
writeFile f = writeFileL f . toLazyByteString | ||
|
||
readFile :: FilePath -> IO Rope | ||
readFile f = fromLazyByteString `fmap` LB.readFile f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
module Yi.Rope (Rope, fromString, toString, toReverseString, null, empty, | ||
take, drop, Data.Rope.length, reverse, countNewLines, | ||
Yi.Rope.split, Yi.Rope.splitAt, Yi.Rope.splitAtLine, | ||
Yi.Rope.append, Yi.Rope.concat, Yi.Rope.readFile, | ||
Yi.Rope.writeFile, Yi.Rope.splitAtChunkBefore) where | ||
|
||
import qualified Codec.Binary.UTF8.Generic as G | ||
import Data.Binary | ||
import qualified Data.ByteString.Lazy as LB (readFile, split, count, reverse) | ||
import Data.Monoid | ||
import Data.Rope | ||
import qualified Prelude as P | ||
import Prelude hiding (null, take, drop, reverse) | ||
import System.IO.Cautious (writeFileL) | ||
|
||
toReverseString :: Rope -> String | ||
toReverseString = P.reverse . toString | ||
|
||
reverse :: Rope -> Rope | ||
reverse = fromLazyByteString . LB.reverse . toLazyByteString | ||
|
||
countNewLines :: Rope -> Int | ||
countNewLines = fromIntegral . LB.count 10 . toLazyByteString | ||
|
||
split :: Word8 -> Rope -> [Rope] | ||
split c = map fromLazyByteString . LB.split c . toLazyByteString | ||
|
||
splitAt :: Int -> Rope -> (Rope, Rope) | ||
splitAt = G.splitAt | ||
|
||
splitAtChunkBefore :: Int -> Rope -> (Rope, Rope) | ||
splitAtChunkBefore = Yi.Rope.splitAt | ||
|
||
-- | Split before the specified line. Lines are indexed from 0. | ||
splitAtLine :: Int -> Rope -> (Rope, Rope) | ||
splitAtLine n r | n <= 0 = (mempty, r) | ||
| otherwise = splitAtLine' (n - 1) r | ||
|
||
-- | Split after the specified line. Lines are indexed from 0. | ||
splitAtLine' :: Int -> Rope -> (Rope, Rope) | ||
splitAtLine' n r = let ls = P.take (n + 1) (G.lines' r) | ||
in G.splitAt (sum $ map G.length ls) r | ||
|
||
append :: Rope -> Rope -> Rope | ||
append = (<>) | ||
|
||
concat :: [Rope] -> Rope | ||
concat = mconcat | ||
|
||
writeFile :: FilePath -> Rope -> IO () | ||
writeFile f = writeFileL f . toLazyByteString | ||
|
||
readFile :: FilePath -> IO Rope | ||
readFile f = fromLazyByteString `fmap` LB.readFile f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{-# OPTIONS_GHC -F -pgmF hspec-discover #-} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{-# LANGUAGE UnicodeSyntax #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
module Yi.RopeSpec (main, spec) where | ||
|
||
import Control.Applicative | ||
import Data.Binary | ||
import qualified Data.ByteString as BS | ||
import qualified Yi.Rope as R | ||
import qualified Yi.OldRope as O | ||
import Test.Hspec | ||
import Test.Hspec.QuickCheck | ||
import Test.QuickCheck (Arbitrary(..)) | ||
import Test.QuickCheck.Instances () | ||
|
||
main ∷ IO () | ||
main = hspec spec | ||
|
||
infixr 2 `isLike` | ||
isLike :: (Show a, Eq a) => (R.Rope -> a) -> (O.Rope -> a) | ||
-> String -> Expectation | ||
f `isLike` g = \s -> f (R.fromString s) `shouldBe` g (O.fromString s) | ||
|
||
infixr 2 `stringIsLike` | ||
stringIsLike :: (String -> R.Rope) -> (String -> O.Rope) | ||
-> String -> Expectation | ||
f `stringIsLike` g = \s -> (R.toString . f $ s) `shouldBe` (O.toString . g $ s) | ||
|
||
infixr 2 `sIsLike` | ||
sIsLike :: (R.Rope -> R.Rope) -> (O.Rope -> O.Rope) -> String -> Expectation | ||
f `sIsLike` g = R.toString . f `isLike` O.toString . g | ||
|
||
infixr 2 `ssIsLike` | ||
|
||
ssIsLike :: (R.Rope -> R.Rope) -> (O.Rope -> O.Rope) -> String -> Expectation | ||
f `ssIsLike` g = \s -> | ||
(R.toString . f . R.fromString) s `shouldBe` (O.toString . g . O.fromString) s | ||
|
||
spec ∷ Spec | ||
spec = do | ||
describe "Comparisons" $ do | ||
prop "toString" $ R.toString `isLike` O.toString | ||
prop "toReverseString" $ R.toReverseString `isLike` O.toReverseString | ||
prop "null" $ R.null `isLike` O.null | ||
prop "empty" $ const R.empty `stringIsLike` const O.empty | ||
prop "take" $ \i -> R.take i `sIsLike` O.take i | ||
prop "drop" $ \i -> R.drop i `sIsLike` O.drop i | ||
prop "length" $ R.length `isLike` O.length | ||
prop "reverse" $ R.reverse `sIsLike` O.reverse | ||
prop "countNewLines" $ R.countNewLines `isLike` O.countNewLines | ||
prop "split" | ||
$ \i -> map R.toString . R.split i `isLike` map O.toString . O.split i | ||
prop "fst . splitAt" $ \i -> fst . R.splitAt i `sIsLike` fst . O.splitAt i | ||
prop "snd . splitAt" $ \i -> snd . R.splitAt i `sIsLike` snd . O.splitAt i | ||
prop "fst . splitAtLine" | ||
$ \i -> fst . R.splitAtLine i `sIsLike` fst . O.splitAtLine i | ||
prop "snd . splitAtLine" | ||
$ \i -> snd . R.splitAtLine i `sIsLike` snd . O.splitAtLine i | ||
prop "append" | ||
$ \s -> R.append (R.fromString s) `ssIsLike` O.append (O.fromString s) | ||
prop "concat" $ \s -> (R.toString . R.concat . map R.fromString) s | ||
`shouldBe` | ||
(O.toString . O.concat . map O.fromString) s |
Oops, something went wrong.