Set up comparison env

yi-editor · Sep 6, 2014 · de91738 · de91738
commit de91738
Show file tree

Hide file tree

Showing 8 changed files with 1,081 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
diff --git a/Setup.hs b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/bench/MainBenchmarkSuite.hs b/bench/MainBenchmarkSuite.hs
@@ -0,0 +1,6 @@
+module Main where
+
+import Criterion.Main
+
+main :: IO ()
+main = defaultMain []
diff --git a/src/Yi/OldRope.hs b/src/Yi/OldRope.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-}
+-- Consider splitting off as a separate package
+-- Copyright (c) 2008 Gustav Munkby
+-- Copyright (c) 2008 Jean-Philippe Bernardy
+
+-- | This module defines a Rope representation.
+
+-- While the representation are ByteStrings stored in a finger tree, the indices
+-- are actually in number of characters.
+
+-- This is currently based on utf8-string, but a couple of other packages might be
+-- better: text, compact-string.
+
+-- At the moment none of them has a lazy
+-- implementation, which forces us to always export plain Strings.
+-- (Utf8-string does not have a proper newtype)
+
+module Yi.OldRope (
+   Rope,
+
+   -- * Conversions to Rope
+   fromString,
+
+   -- * Conversions from Rope
+   toString, toReverseString,
+
+   -- * List-like functions
+   null, empty, take, drop,  length, reverse, countNewLines,
+
+   split, splitAt, splitAtLine,
+
+   append, concat,
+
+   -- * IO
+   readFile, writeFile,
+
+   -- * Low level functions
+   splitAtChunkBefore
+  ) where
+
+import Prelude hiding (null, head, tail, length, take, drop, splitAt, head, tail, foldl, reverse, readFile, writeFile, concat)
+import qualified Data.List as L
+
+import qualified Data.ByteString.UTF8 as B
+import qualified Data.ByteString as B (append, concat)
+import qualified Data.ByteString as Byte
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, readFile, split)
+import qualified Data.ByteString.Lazy.UTF8 as LB
+
+import qualified Data.FingerTree as T
+import Data.FingerTree hiding (null, empty, reverse, split)
+
+import Data.Binary
+import Data.Char (ord)
+import Data.Monoid
+import Data.String (IsString(..))
+
+import System.IO.Cautious (writeFileL)
+
+defaultChunkSize :: Int
+defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256)
+
+-- The FingerTree does not store measurements for single chunks, which
+-- means that the length of chunks often have to be recomputed.
+mkChunk :: ByteString -> Chunk
+mkChunk s = Chunk (fromIntegral $ B.length s) s
+data Chunk = Chunk { chunkSize :: {-# UNPACK #-} !Word8, fromChunk :: {-# UNPACK #-} !ByteString }
+  deriving (Eq, Show)
+
+data Size = Indices {charIndex :: {-# UNPACK #-} !Int, lineIndex :: {-# UNPACK #-} !Int} -- lineIndex is lazy because we do not often want the line count. However, we need this to avoid stack overflows on large files!
+  deriving Show
+
+instance Monoid Size where
+    mempty = Indices 0 0
+    mappend (Indices c1 l1) (Indices c2 l2) = Indices (c1+c2) (l1+l2)
+
+newtype Rope = Rope { fromRope :: FingerTree Size Chunk }
+   deriving (Eq, Show)
+
+(-|) :: Chunk -> FingerTree Size Chunk -> FingerTree Size Chunk
+b -| t | chunkSize b == 0 = t
+       | otherwise        = b <| t
+
+(|-) :: FingerTree Size Chunk -> Chunk -> FingerTree Size Chunk
+t |- b | chunkSize b == 0 = t
+       | otherwise        = t |> b
+
+-- Newlines are preserved by UTF8 encoding and decoding
+newline :: Word8
+newline = fromIntegral (ord '\n')
+
+instance Measured Size Chunk where
+   measure (Chunk l s) = Indices (fromIntegral l)  -- note that this is the length in characters, not bytes.
+                                 (Byte.count newline s)
+
+-- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', so the 'foldr' needed for 'toList' is inefficient,
+-- and can cause stack overflows. So, we roll our own (somewhat inefficient) version of 'toList' to avoid this.
+toList :: Measured v a => FingerTree v a -> [a]
+toList t = case viewl t of
+              c :< cs -> c : toList cs
+              EmptyL -> []
+
+toLazyByteString :: Rope -> LB.ByteString
+toLazyByteString = LB.fromChunks . fmap fromChunk . toList . fromRope
+
+reverse :: Rope -> Rope
+reverse = Rope . fmap' (mkChunk . B.fromString . L.reverse . B.toString . fromChunk) . T.reverse . fromRope
+
+toReverseString :: Rope -> String
+toReverseString = concatMap (L.reverse . B.toString . fromChunk) . toList . T.reverse . fromRope
+
+toString :: Rope -> String
+toString = LB.toString . toLazyByteString
+
+fromLazyByteString :: LB.ByteString -> Rope
+fromLazyByteString = Rope . toTree T.empty
+   where
+     toTree acc b | LB.null b = acc
+                  | otherwise = let (h,t) = LB.splitAt (fromIntegral defaultChunkSize) b
+                                    chunk = mkChunk $ B.concat $ LB.toChunks h
+                                in acc `seq` chunk `seq` toTree (acc |> chunk) t
+
+instance IsString Rope where
+    fromString = Rope . toTree T.empty
+       where
+         toTree acc [] = acc
+         toTree acc b  = let (h,t) = L.splitAt defaultChunkSize b
+                             chunk = mkChunk $ B.fromString h
+                         in acc `seq` chunk `seq` toTree (acc |> chunk) t
+
+null :: Rope -> Bool
+null (Rope a) = T.null a
+
+empty :: Rope
+empty = Rope T.empty
+
+-- | Get the length of the string. (This information cached, so O(1) amortized runtime.)
+length :: Rope -> Int
+length = charIndex . measure . fromRope
+
+-- | Count the number of newlines in the strings. (This information cached, so O(1) amortized runtime.)
+countNewLines :: Rope -> Int
+countNewLines = lineIndex . measure . fromRope
+
+-- | Append two strings by merging the two finger trees.
+append :: Rope -> Rope -> Rope
+append (Rope a) (Rope b) = Rope $
+     case T.viewr a of
+       EmptyR -> b
+       l :> Chunk len x -> case T.viewl b of
+                   EmptyL  -> a
+                   Chunk len' x' :< r -> if fromIntegral len + fromIntegral len' < defaultChunkSize
+                                then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r
+                                else a >< b
+
+concat :: [Rope] -> Rope
+concat = L.foldl' append empty
+
+take, drop :: Int -> Rope -> Rope
+take n = fst . splitAt n
+drop n = snd . splitAt n
+
+-- | Split the string at the specified position.
+splitAt :: Int -> Rope -> (Rope, Rope)
+splitAt n (Rope t) =
+   case T.viewl c of
+     Chunk len x :< r | n' /= 0 ->
+       let (lx, rx) = B.splitAt n' x in (Rope $ l |> Chunk (fromIntegral n') lx, Rope $ Chunk (len - fromIntegral n') rx -| r)
+     _ -> (Rope l, Rope c)
+   where
+     (l, c) = T.split ((> n) . charIndex) t
+     n' = n - charIndex (measure l)
+
+-- | Split the rope on a chunk, so that the desired
+--   position lies within the first chunk of the second rope.
+splitAtChunkBefore :: Int -> Rope -> (Rope, Rope)
+splitAtChunkBefore n (Rope t) =
+  let (l, c) = T.split ((> n) . charIndex) t in (Rope l, Rope c)
+
+-- | Split before the specified line. Lines are indexed from 0.
+splitAtLine :: Int -> Rope -> (Rope, Rope)
+splitAtLine n | n <= 0     = \r -> (empty, r)
+              | otherwise = splitAtLine' (n-1)
+
+-- | Split after the specified line. Lines are indexed from 0.
+splitAtLine' :: Int -> Rope -> (Rope, Rope)
+splitAtLine' n (Rope t) =
+   case T.viewl c of
+     ch@(Chunk _ x) :< r ->
+       let (lx, rx) = cutExcess excess x
+           excess = lineIndex (measure l) + lineIndex (measure ch) - n - 1
+       in (Rope $ l |- mkChunk lx, Rope $ mkChunk rx -| r)
+     _ -> (Rope l, Rope c)
+   where
+     (l, c) = T.split ((n <) . lineIndex) t
+
+split :: Word8 -> Rope -> [Rope]
+split c = map fromLazyByteString . LB.split c . toLazyByteString
+
+cutExcess :: Int -> ByteString -> (ByteString, ByteString)
+cutExcess i s = let idx = gt i $ L.reverse $ Byte.elemIndices newline s
+                in Byte.splitAt (idx+1) s -- take one extra byte to that the newline is found on the left.
+    where gt _ []     = Byte.length s
+          gt 0 (x:_ ) = x
+          gt n (_:xs) = gt (n-1) xs
+
+
+instance Binary Rope where
+     put = put . toString
+     get = fromString `fmap` get
+
+
+writeFile :: FilePath -> Rope -> IO ()
+writeFile f = writeFileL f . toLazyByteString
+
+readFile :: FilePath -> IO Rope
+readFile f = fromLazyByteString `fmap` LB.readFile f
diff --git a/src/Yi/Rope.hs b/src/Yi/Rope.hs
@@ -0,0 +1,54 @@
+module Yi.Rope (Rope, fromString, toString, toReverseString, null, empty,
+                take, drop, Data.Rope.length, reverse, countNewLines,
+                Yi.Rope.split, Yi.Rope.splitAt, Yi.Rope.splitAtLine,
+                Yi.Rope.append, Yi.Rope.concat, Yi.Rope.readFile,
+                Yi.Rope.writeFile, Yi.Rope.splitAtChunkBefore) where
+
+import qualified Codec.Binary.UTF8.Generic as G
+import           Data.Binary
+import qualified Data.ByteString.Lazy as LB (readFile, split, count, reverse)
+import           Data.Monoid
+import           Data.Rope
+import qualified Prelude as P
+import           Prelude hiding (null, take, drop, reverse)
+import           System.IO.Cautious (writeFileL)
+
+toReverseString :: Rope -> String
+toReverseString = P.reverse . toString
+
+reverse :: Rope -> Rope
+reverse = fromLazyByteString . LB.reverse . toLazyByteString
+
+countNewLines :: Rope -> Int
+countNewLines = fromIntegral . LB.count 10 . toLazyByteString
+
+split :: Word8 -> Rope -> [Rope]
+split c = map fromLazyByteString . LB.split c . toLazyByteString
+
+splitAt :: Int -> Rope -> (Rope, Rope)
+splitAt = G.splitAt
+
+splitAtChunkBefore :: Int -> Rope -> (Rope, Rope)
+splitAtChunkBefore = Yi.Rope.splitAt
+
+-- | Split before the specified line. Lines are indexed from 0.
+splitAtLine :: Int -> Rope -> (Rope, Rope)
+splitAtLine n r | n <= 0     = (mempty, r)
+                | otherwise = splitAtLine' (n - 1) r
+
+-- | Split after the specified line. Lines are indexed from 0.
+splitAtLine' :: Int -> Rope -> (Rope, Rope)
+splitAtLine' n r = let ls = P.take (n + 1) (G.lines' r)
+                   in G.splitAt (sum $ map G.length ls) r
+
+append :: Rope -> Rope -> Rope
+append = (<>)
+
+concat :: [Rope] -> Rope
+concat = mconcat
+
+writeFile :: FilePath -> Rope -> IO ()
+writeFile f = writeFileL f . toLazyByteString
+
+readFile :: FilePath -> IO Rope
+readFile f = fromLazyByteString `fmap` LB.readFile f
diff --git a/test/Spec.hs b/test/Spec.hs
@@ -0,0 +1 @@
+{-# OPTIONS_GHC -F -pgmF hspec-discover #-}
diff --git a/test/Yi/RopeSpec.hs b/test/Yi/RopeSpec.hs
@@ -0,0 +1,62 @@
+{-# LANGUAGE UnicodeSyntax #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module Yi.RopeSpec (main, spec) where
+
+import           Control.Applicative
+import           Data.Binary
+import qualified Data.ByteString as BS
+import qualified Yi.Rope as R
+import qualified Yi.OldRope as O
+import           Test.Hspec
+import           Test.Hspec.QuickCheck
+import           Test.QuickCheck (Arbitrary(..))
+import           Test.QuickCheck.Instances ()
+
+main ∷ IO ()
+main = hspec spec
+
+infixr 2 `isLike`
+isLike :: (Show a, Eq a) => (R.Rope -> a) -> (O.Rope -> a)
+       -> String -> Expectation
+f `isLike` g = \s -> f (R.fromString s) `shouldBe` g (O.fromString s)
+
+infixr 2 `stringIsLike`
+stringIsLike :: (String -> R.Rope) -> (String -> O.Rope)
+             -> String -> Expectation
+f `stringIsLike` g = \s -> (R.toString . f $ s) `shouldBe` (O.toString . g $ s)
+
+infixr 2 `sIsLike`
+sIsLike :: (R.Rope -> R.Rope) -> (O.Rope -> O.Rope) -> String -> Expectation
+f `sIsLike` g = R.toString . f `isLike` O.toString . g
+
+infixr 2 `ssIsLike`
+
+ssIsLike :: (R.Rope -> R.Rope) -> (O.Rope -> O.Rope) -> String -> Expectation
+f `ssIsLike` g = \s ->
+  (R.toString . f . R.fromString) s `shouldBe` (O.toString . g . O.fromString) s
+
+spec ∷ Spec
+spec = do
+  describe "Comparisons" $ do
+    prop "toString" $ R.toString `isLike` O.toString
+    prop "toReverseString" $ R.toReverseString `isLike` O.toReverseString
+    prop "null" $ R.null `isLike` O.null
+    prop "empty" $ const R.empty `stringIsLike` const O.empty
+    prop "take" $ \i -> R.take i `sIsLike` O.take i
+    prop "drop" $ \i -> R.drop i `sIsLike` O.drop i
+    prop "length" $ R.length `isLike` O.length
+    prop "reverse" $ R.reverse `sIsLike` O.reverse
+    prop "countNewLines" $ R.countNewLines `isLike` O.countNewLines
+    prop "split"
+      $ \i -> map R.toString . R.split i `isLike` map O.toString . O.split i
+    prop "fst . splitAt" $ \i -> fst . R.splitAt i `sIsLike` fst . O.splitAt i
+    prop "snd . splitAt" $ \i -> snd . R.splitAt i `sIsLike` snd . O.splitAt i
+    prop "fst . splitAtLine"
+      $ \i -> fst . R.splitAtLine i `sIsLike` fst . O.splitAtLine i
+    prop "snd . splitAtLine"
+      $ \i -> snd . R.splitAtLine i `sIsLike` snd . O.splitAtLine i
+    prop "append"
+      $ \s -> R.append (R.fromString s) `ssIsLike` O.append (O.fromString s)
+    prop "concat" $ \s -> (R.toString . R.concat . map R.fromString) s
+                          `shouldBe`
+                          (O.toString . O.concat . map O.fromString) s