{-# LANGUAGE PatternGuards #-}
module Text.CharRanges
  ( Range(..)
  , range
  , single
  , CharSet
  , toSet
  , member
  ) where

import           Data.List
import           Data.Set (Set)
import qualified Data.Set as Set

data Range = Single {-# UNPACK #-} !Char
           | Range  {-# UNPACK #-} !Char {-# UNPACK #-} !Char
             deriving (Range -> Range -> Bool
(Range -> Range -> Bool) -> (Range -> Range -> Bool) -> Eq Range
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Range -> Range -> Bool
$c/= :: Range -> Range -> Bool
== :: Range -> Range -> Bool
$c== :: Range -> Range -> Bool
Eq, Int -> Range -> ShowS
[Range] -> ShowS
Range -> String
(Int -> Range -> ShowS)
-> (Range -> String) -> ([Range] -> ShowS) -> Show Range
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Range] -> ShowS
$cshowList :: [Range] -> ShowS
show :: Range -> String
$cshow :: Range -> String
showsPrec :: Int -> Range -> ShowS
$cshowsPrec :: Int -> Range -> ShowS
Show)

newtype CharRange = CR { CharRange -> Range
unCR :: Range }

-- | A rather hacked-up instance.
--   This is to support fast lookups using 'Data.Set' (see 'toSet').
--   x == y iff x and y overlap
instance Eq CharRange where
    CR (Single Char
x)    == :: CharRange -> CharRange -> Bool
== CR (Single Char
y) = Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
y
    CR (Single Char
a)    == CR (Range Char
x Char
y) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
a Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y
    CR (Range Char
x Char
y)   == CR (Single Char
a) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
a Bool -> Bool -> Bool
&& Char
a Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y
    CR (Range Char
lx Char
ux) == CR (Range Char
ly Char
uy) = (Char
lx Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
uy Bool -> Bool -> Bool
&& Char
ly Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
ux)
                                        Bool -> Bool -> Bool
|| (Char
lx Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
uy Bool -> Bool -> Bool
&& Char
ly Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
ux) -- INTENTIONAL
                                           -- For some strange reason GHC
                                           -- (7.6.3) seems to have problems
                                           -- optimizing this expressions
                                           -- without the additional or

instance Ord CharRange where
    CR (Single Char
x)  <= :: CharRange -> CharRange -> Bool
<= CR (Single Char
y) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y
    CR (Single Char
x)  <= CR (Range Char
y Char
_) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y
    CR (Range Char
_ Char
x) <= CR (Single Char
y) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y
    CR (Range Char
_ Char
x) <= CR (Range Char
y Char
_) = Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
y

newtype CharSet = CharSet (Set CharRange)

-- | Allows quick lookups using ranges.
toSet :: [Range] -> CharSet
toSet :: [Range] -> CharSet
toSet = Set CharRange -> CharSet
CharSet (Set CharRange -> CharSet)
-> ([Range] -> Set CharRange) -> [Range] -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [CharRange] -> Set CharRange
forall a. [a] -> Set a
Set.fromDistinctAscList ([CharRange] -> Set CharRange)
-> ([Range] -> [CharRange]) -> [Range] -> Set CharRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Range] -> [CharRange]
prepareRanges
  where prepareRanges :: [Range] -> [CharRange]
        prepareRanges :: [Range] -> [CharRange]
prepareRanges =  [CharRange] -> [CharRange]
go ([CharRange] -> [CharRange])
-> ([Range] -> [CharRange]) -> [Range] -> [CharRange]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [CharRange] -> [CharRange]
forall a. Ord a => [a] -> [a]
sort ([CharRange] -> [CharRange])
-> ([Range] -> [CharRange]) -> [Range] -> [CharRange]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range -> CharRange) -> [Range] -> [CharRange]
forall a b. (a -> b) -> [a] -> [b]
map Range -> CharRange
CR -- we could use unsafeCoerce to
                                            -- avoid the cost of mapping
        go :: [CharRange] -> [CharRange]
go (CharRange
r1:CharRange
r2:[CharRange]
rs) | Just CharRange
r' <- CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges CharRange
r1 CharRange
r2 = [CharRange] -> [CharRange]
go (CharRange
r'CharRange -> [CharRange] -> [CharRange]
forall a. a -> [a] -> [a]
:[CharRange]
rs)
                      | rss :: [CharRange]
rss@(CharRange
r3:[CharRange]
rs') <- [CharRange] -> [CharRange]
go (CharRange
r2CharRange -> [CharRange] -> [CharRange]
forall a. a -> [a] -> [a]
:[CharRange]
rs) =
            case CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges CharRange
r1 CharRange
r3 of
                Maybe CharRange
Nothing -> CharRange
r1CharRange -> [CharRange] -> [CharRange]
forall a. a -> [a] -> [a]
:[CharRange]
rss
                Just CharRange
r' -> CharRange
r'CharRange -> [CharRange] -> [CharRange]
forall a. a -> [a] -> [a]
:[CharRange]
rs'
        go [CharRange]
rs = [CharRange]
rs

maybeMergeRanges :: CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges :: CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges CharRange
x CharRange
y = if CharRange
x CharRange -> CharRange -> Bool
forall a. Eq a => a -> a -> Bool
== CharRange
y -- overlap
                       then CharRange -> Maybe CharRange
forall a. a -> Maybe a
Just (CharRange -> Maybe CharRange)
-> (Range -> CharRange) -> Range -> Maybe CharRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Range -> CharRange
CR (Range -> Maybe CharRange) -> Range -> Maybe CharRange
forall a b. (a -> b) -> a -> b
$ Range -> Range -> Range
minMax (CharRange -> Range
unCR CharRange
x) (CharRange -> Range
unCR CharRange
y)
                       else Maybe CharRange
forall a. Maybe a
Nothing
{-# INLINE maybeMergeRanges #-}

minMax :: Range -> Range -> Range
minMax :: Range -> Range -> Range
minMax (Range Char
lx Char
ux) (Range Char
ly Char
uy) = Char -> Char -> Range
Range (Char -> Char -> Char
forall a. Ord a => a -> a -> a
min Char
lx Char
ly) (Char -> Char -> Char
forall a. Ord a => a -> a -> a
max Char
ux Char
uy)
minMax (Single Char
_) Range
y = Range
y
minMax Range
x (Single Char
_) = Range
x
{-# INLINE minMax #-}

range :: Char -> Char -> Range
range :: Char -> Char -> Range
range Char
x Char
y = if Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
y then Char -> Char -> Range
Range Char
x Char
y
            else String -> Range
forall a. HasCallStack => String -> a
error String
"range: x not smaller than y"
{-# INLINE range #-}

single :: Char -> Range
single :: Char -> Range
single = Char -> Range
Single
{-# INLINE single #-}

member :: Char -> CharSet -> Bool
member :: Char -> CharSet -> Bool
member Char
x (CharSet Set CharRange
cs) = CharRange -> Set CharRange -> Bool
forall a. Ord a => a -> Set a -> Bool
Set.member (Range -> CharRange
CR (Range -> CharRange) -> Range -> CharRange
forall a b. (a -> b) -> a -> b
$ Char -> Range
Single Char
x) Set CharRange
cs