Skip to content

Commit 747df48

Browse files
authored
Script extensions (#91)
* Download files * Generate scripts extensions
1 parent b8c618c commit 747df48

9 files changed

Lines changed: 804 additions & 183 deletions

File tree

ucd.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ UCD_FILES="\
1919
DerivedCoreProperties.txt:e3eddd7d469cd1b0feed7528defad1a1cc7c6a9ceb0ae4446a6d10921ed2e7bc \
2020
DerivedNormalizationProps.txt:b2c444c20730b097787fdf50bd7d6dd3fc5256ab8084f5b35b11c8776eca674c \
2121
NameAliases.txt:14b3b677d33f95c51423dce6eef4a6a28b4b160451ecedee4b91edb6745cf4a3 \
22+
PropertyValueAliases.txt:eb755757e20b72b330b2948df3cf2ff7adb0e31bb060140dc09dafb132ace2cd \
2223
PropList.txt:6bddfdb850417a5bee6deff19290fd1b138589909afb50f5a049f343bf2c6722 \
2324
Scripts.txt:52db475c4ec445e73b0b16915448c357614946ad7062843c563e00d7535c6510 \
25+
ScriptExtensions.txt:d37eedf63ff9c48bac863d5f76862373d6cf5269fd21253d499e2430d638c01d \
2426
SpecialCasing.txt:c667b45908fd269af25fd55d2fc5bbc157fb1b77675936e25c513ce32e080334 \
2527
UnicodeData.txt:36018e68657fdcb3485f636630ffe8c8532e01c977703d2803f5b89d6c5feafb \
2628
extracted/DerivedCombiningClass.txt:12b0c3af9b600b49488d66545a3e7844ea980809627201bf9afeebe1c9f16f4e \

unicode-data-scripts/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# README
22

33
`unicode-data-scripts` provides Haskell APIs to efficiently access the Unicode
4-
character scripts from the
4+
character [scripts](https://www.unicode.org/reports/tr24/) from the
55
[Unicode character database](https://www.unicode.org/ucd/).
66

77
The Haskell data structures are generated programmatically from the

unicode-data-scripts/bench/Main.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ main = defaultMain
1616
, bgroup "scriptDefinition"
1717
[ benchNF "unicode-data" (show . S.scriptDefinition)
1818
]
19+
, bgroup "scriptExtensions"
20+
[ benchChars "unicode-data" (show . S.scriptExtensions)
21+
]
1922
]
2023
]
2124
where

unicode-data-scripts/lib/Unicode/Char/General/Scripts.hs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
{-# LANGUAGE CPP #-}
22

33
-- |
4-
-- Module : Unicode.Char.General
4+
-- Module : Unicode.Char.General.Scripts
55
-- Copyright : (c) 2020 Composewell Technologies and Contributors
66
-- License : Apache-2.0
77
-- Maintainer : streamly@composewell.com
88
-- Stability : experimental
99
--
10-
-- Unicode scripts related functions.
10+
-- Unicode [scripts](https://www.unicode.org/reports/tr24/) related functions.
1111
--
1212
-- @since 0.1.0
1313
--
1414

1515
module Unicode.Char.General.Scripts
1616
( S.Script(..)
1717
, script
18+
, scriptExtensions
1819
, scriptDefinition
1920
)
2021
where
2122

2223
import Data.Char (chr)
24+
import Data.List.NonEmpty (NonEmpty)
2325
import GHC.Exts
2426
(Ptr(..), Char(..), Int(..),
2527
indexWord32OffAddr#, word2Int#,
@@ -32,8 +34,9 @@ import GHC.Exts (byteSwap32#)
3234
#endif
3335

3436
import qualified Unicode.Internal.Char.Scripts as S
37+
import qualified Unicode.Internal.Char.ScriptExtensions as S
3538

36-
-- | Character script.
39+
-- | Character [script](https://www.unicode.org/reports/tr24/).
3740
--
3841
-- @since 0.1.0
3942
{-# INLINE script #-}
@@ -82,3 +85,11 @@ scriptDefinition = unpack . S.scriptDefinition
8285
} in addRange (k# -# 2#) acc'
8386
else addRange (k# -# 1#) (C# (chr# c1#) : acc)
8487
} in addRange (n# -# 1#) mempty
88+
89+
-- | Character
90+
-- [script extensions](https://www.unicode.org/reports/tr24/#Script_Extensions).
91+
--
92+
-- @since 0.1.0
93+
{-# INLINE scriptExtensions #-}
94+
scriptExtensions :: Char -> NonEmpty S.Script
95+
scriptExtensions = S.decodeScriptExtensions . S.scriptExtensions

unicode-data-scripts/lib/Unicode/Internal/Char/ScriptExtensions.hs

Lines changed: 249 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)