Skip to content

Changes encodeChar to return a NonEmpty list #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 49 additions & 19 deletions Codec/Binary/UTF8/String.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ module Codec.Binary.UTF8.String (
, utf8Encode
) where

#if __GLASGOW_HASKELL__ > 710
import qualified Data.List.NonEmpty as NE
#endif
import Data.Word (Word8,Word32)
import Data.Bits ((.|.),(.&.),shiftL,shiftR)
import Data.Char (chr,ord)
Expand All @@ -46,30 +49,57 @@ replacement_character :: Char
replacement_character = '\xfffd'

-- | Encode a single Haskell 'Char' to a list of 'Word8' values, in UTF8 format.
encodeChar :: Char -> [Word8]
encodeChar = map fromIntegral . go . ord
#if __GLASGOW_HASKELL__ < 802
encodeChar :: Char -> (Word8, [Word8])
encodeChar = (\(x, xs) -> (fromIntegral x, fmap fromIntegral xs)) . go . ord
where
go oc
| oc <= 0x7f = [oc]

| oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
, 0x80 + oc .&. 0x3f
]

| oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]
| otherwise = [ 0xf0 + (oc `shiftR` 18)
, 0x80 + ((oc `shiftR` 12) .&. 0x3f)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]

| oc <= 0x7f = ( oc
, [])

| oc <= 0x7ff = ( 0xc0 + (oc `shiftR` 6)
, [ 0x80 + oc .&. 0x3f ])

| oc <= 0xffff = ( 0xe0 + (oc `shiftR` 12)
, [ 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
])

| otherwise = ( 0xf0 + (oc `shiftR` 18)
, [ 0x80 + ((oc `shiftR` 12) .&. 0x3f)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
])
#else
encodeChar :: Char -> NE.NonEmpty Word8
encodeChar = fmap fromIntegral . go . ord
where
go oc
| oc <= 0x7f = oc NE.:|
[]

| oc <= 0x7ff = 0xc0 + (oc `shiftR` 6) NE.:|
[ 0x80 + oc .&. 0x3f ]

| oc <= 0xffff = 0xe0 + (oc `shiftR` 12) NE.:|
[ 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]

| otherwise = 0xf0 + (oc `shiftR` 18) NE.:|
[ 0x80 + ((oc `shiftR` 12) .&. 0x3f)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]
#endif

-- | Encode a Haskell 'String' to a list of 'Word8' values, in UTF8 format.
encode :: String -> [Word8]
encode = concatMap encodeChar
#if __GLASGOW_HASKELL__ < 802
encode = concatMap ((\(x, xs) -> x:xs) . encodeChar)
#else
encode = concatMap (NE.toList . encodeChar)
#endif

--
-- | Decode a UTF8 string packed into a list of 'Word8' values, directly to 'String'
Expand Down