Repo created

2025-11-21 15:13:05 +01:00 · 2025-11-21 15:13:05 +01:00 · 3ecd57d1b2
commit 3ecd57d1b2
parent f2d952b743
475 changed files with 37130 additions and 2 deletions
--- a/srcs/compose/README.md
+++ b/srcs/compose/README.md
@ -0,0 +1,10 @@
+# Compose sequences
+
+The `compose.py` program parses the compose sequences found in this directory
+and generates `srcs/juloo.keyboard2/ComposeKeyData.java`.
+
+## `compose/en_US_UTF_8_Compose.pre`
+
+This file is copied from the `xorg` project. Copyright applies.
+
+## `compose/extra.json`
--- a/srcs/compose/accent_aigu.json
+++ b/srcs/compose/accent_aigu.json
@ -0,0 +1,60 @@
+{
+  // latin
+  "a": "á",
+  "c": "ć",
+  "e": "é",
+  "g": "ǵ",
+  "i": "í",
+  "k": "ḱ",
+  "l": "ĺ",
+  "m": "ḿ", 
+  "n": "ń",
+  "o": "ó",
+  "p": "ṕ",
+  "r": "ŕ",
+  "s": "ś",
+  "u": "ú",
+  "w": "ẃ",
+  "y": "ý",
+  "z": "ź",
+  // extended latin (multiple diacritics)
+  "â": "ấ",
+  "ă": "ắ",
+  "å": "ǻ",
+  "æ": "ǽ",
+  "ç": "ḉ",
+  "ê": "ế",
+  "ē": "ḗ",
+  "ï": "ḯ",
+  "ô": "ố",
+  "ơ": "ớ",
+  "õ": "ṍ",
+  "ō": "ṓ",
+  "ø": "ǿ",
+  "ṡ": "ṥ",
+  "ü": "ǘ",
+  "ư": "ứ",
+  "ũ": "ṹ",
+  // greek
+  "α": "ά",
+  "ε": "έ",
+  "η": "ή",
+  "ι": "ί",
+  "ο": "ό",
+  "υ": "ύ",
+  // cyrillic
+  "к": "ќ",
+  "г": "ѓ",
+  // combining character
+  "ą": "ą\u0301",
+  "j": "j\u0301",
+  "у": "у\u0301",
+  "е": "е\u0301",
+  "а": "а\u0301",
+  "о": "о\u0301",
+  "и": "и\u0301",
+  "ы": "ы\u0301",
+  "э": "э\u0301",
+  "ю": "ю\u0301",
+  "я": "я\u0301"
+}
--- a/srcs/compose/accent_arrows.json
+++ b/srcs/compose/accent_arrows.json
@ -0,0 +1,13 @@
+{
+  "0": "↔",
+  "1": "↙",
+  "2": "↓",
+  "3": "↘",
+  "4": "←",
+  "5": "↕",
+  "6": "→",
+  "7": "↖",
+  "8": "↑",
+  "9": "↗",
+  ".": "↵"
+}
--- a/srcs/compose/accent_bar.json
+++ b/srcs/compose/accent_bar.json
@ -0,0 +1,30 @@
+{
+  // latin
+  "2": "ƻ",
+  "b": "ƀ",
+  "c": "ꞓ",
+  "d": "đ",
+  "f": "ꞙ",
+  "g": "ǥ",
+  "h": "ħ",
+  "i": "ɨ",
+  "j": "ɉ",
+  "k": "ꝁ",
+  "l": "ƚ",
+  "o": "ɵ",
+  "p": "ᵽ",
+  "q": "ꝗ",
+  "r": "ɍ",
+  "t": "ŧ",
+  "u": "ʉ",
+  "y": "ɏ",
+  "z": "ƶ",
+  // extended latin
+  "ȷ": "ɟ",
+  // cyrillic
+  "о": "ө",
+  "ӧ": "ӫ",
+  "ү": "ұ",
+  "ь": "ҍ",
+  "х": "ӿ"
+}
--- a/srcs/compose/accent_box.json
+++ b/srcs/compose/accent_box.json
@ -0,0 +1,13 @@
+{
+  "1": "└",
+  "2": "┴",
+  "3": "┘",
+  "4": "├",
+  "5": "┼",
+  "6": "┤",
+  "7": "┌",
+  "8": "┬",
+  "9": "┐",
+  "0": "─",
+  ".": "│"
+}
--- a/srcs/compose/accent_caron.json
+++ b/srcs/compose/accent_caron.json
@ -0,0 +1,33 @@
+{
+  // latin
+  "a": "ǎ",
+  "c": "č",
+  "d": "ď",
+  "e": "ě",
+  "g": "ǧ",
+  "h": "ȟ",
+  "i": "ǐ",
+  "j": "ǰ", // no uppercase
+  "k": "ǩ",
+  "l": "ľ",
+  "n": "ň",
+  "o": "ǒ",
+  "r": "ř",
+  "s": "š",
+  "t": "ť",
+  "u": "ǔ",
+  "z": "ž",
+  // extended latin
+  "ṡ": "ṧ",
+  "ü": "ǚ",
+  "ʒ": "ǯ",
+  // combining character
+  "в": "в\u030C",
+  "г": "г\u030C",
+  "ғ": "ғ\u030C",
+  "д": "д\u030C",
+  "з": "з\u030C",
+  "р": "р\u030C",
+  "т": "т\u030C",
+  "х": "х\u030C"
+}
--- a/srcs/compose/accent_cedille.json
+++ b/srcs/compose/accent_cedille.json
@ -0,0 +1,17 @@
+{
+  // latin
+  "c": "ç",
+  "d": "ḑ",
+  "e": "ȩ",
+  "g": "ģ",
+  "h": "ḩ",
+  "k": "ķ",
+  "l": "ļ",
+  "n": "ņ",
+  "r": "ŗ",
+  "s": "ş",
+  "t": "ţ",
+  // extended latin
+  "ć": "ḉ",
+  "ĕ": "ḝ"
+}
--- a/srcs/compose/accent_circonflexe.json
+++ b/srcs/compose/accent_circonflexe.json
@ -0,0 +1,41 @@
+{
+  "+": "⨣",
+  "≈": "⩯",
+  // latin
+  "a": "â",
+  "c": "ĉ",
+  "e": "ê",
+  "g": "ĝ",
+  "h": "ĥ",
+  "i": "î",
+  "j": "ĵ",
+  "o": "ô",
+  "ŝ": "ŝ",
+  "u": "û",
+  "w": "ŵ",
+  "x": "x̂",
+  "y": "ŷ",
+  "z": "ẑ",
+  // extended latin
+  "á": "ấ",
+  "à": "ầ",
+  "ã": "ẫ",
+  "ạ": "ậ",
+  "ả": "ẩ",
+  "é": "ế",
+  "è": "ề",
+  "ẽ": "ễ",
+  "ẹ": "ệ",
+  "ẻ": "ể",
+  "ó": "ố",
+  "ò": "ồ",
+  "ơ": "ổ",
+  "õ": "ỗ",
+  "ọ": "ộ",
+  // combining characters
+  "а": "а\u0302",
+  "е": "е\u0302",
+  "и": "и\u0302",
+  "о": "о\u0302",
+  "у": "у\u0302"
+}
--- a/srcs/compose/accent_dot_above.json
+++ b/srcs/compose/accent_dot_above.json
@ -0,0 +1,56 @@
+{
+  "a": "ȧ",
+  "b": "ḃ",
+  "c": "ċ",
+  "d": "ḋ",
+  "e": "ė",
+  "f": "ḟ",
+  "g": "ġ",
+  "h": "ḣ",
+  "m": "ṁ",
+  "n": "ṅ",
+  "o": "ȯ",
+  "p": "ṗ",
+  "r": "ṙ",
+  "s": "ṡ",
+  "t": "ṫ",
+  "w": "ẇ",
+  "x": "ẋ",
+  "y": "ẏ",
+  "z": "ż",
+  // remove dot since i and j already have one
+  "i": "ı",
+  "j": "ȷ",
+  // extended latin
+  "ā": "ǡ",
+  "ō": "ȱ",
+  "ś": "ṥ",
+  "ṣ": "ṩ",
+  "š": "ṧ",
+  "ſ": "ẛ",
+  // combining character
+  "k": "k\u0307",
+  "l": "l\u0307",
+  "q": "q\u0307",
+  "u": "u\u0307",
+  "v": "v\u0307",
+  "0": "0\u0307",
+  "1": "1\u0307",
+  "2": "2\u0307",
+  "3": "3\u0307",
+  "4": "4\u0307",
+  "5": "5\u0307",
+  "6": "6\u0307",
+  "7": "7\u0307",
+  "8": "8\u0307",
+  "9": "9\u0307",
+  // math
+  "∈": "⋵",
+  "⨯": "⨰",
+  "∧": "⩑",
+  "∨": "⩒",
+  "≡": "⩧",
+  "~": "⩪",
+  "⊆": "⫃",
+  "⊇": "⫄"
+}
--- a/srcs/compose/accent_dot_below.json
+++ b/srcs/compose/accent_dot_below.json
@ -0,0 +1,34 @@
+{
+  // latin
+  "a": "ạ",
+  "b": "ḅ",
+  "d": "ḍ",
+  "e": "ẹ",
+  "h": "ḥ",
+  "i": "ị",
+  "k": "ḳ",
+  "l": "ḷ",
+  "m": "ṃ",
+  "n": "ṇ",
+  "o": "ọ",
+  "r": "ṛ",
+  "s": "ṣ",
+  "t": "ṭ",
+  "u": "ụ",
+  "v": "ṿ",
+  "w": "ẉ",
+  "y": "ỵ",
+  "z": "ẓ",
+  // extended latin
+  "ă": "ặ",
+  "â": "ậ",
+  "ê": "ệ",
+  "ô": "ộ",
+  "ơ": "ợ",
+  "ṡ": "ṩ",
+  "ư": "ự",
+  // math
+  "-": "⨪",
+  "+": "⨥",
+  "=": "⩦"
+}
--- a/srcs/compose/accent_double_aigu.json
+++ b/srcs/compose/accent_double_aigu.json
@ -0,0 +1,14 @@
+{
+  " ": "˝",
+  // latin
+  "o": "ő",
+  "u": "ű",
+  // cyrillic
+  "у": "ӳ",
+  // combining character
+  "a": "a\u030b",
+  "e": "e\u030b",
+  "i": "i\u030b",
+  "m": "m\u030b",
+  "y": "y\u030b"
+}
--- a/srcs/compose/accent_double_grave.json
+++ b/srcs/compose/accent_double_grave.json
@ -0,0 +1,17 @@
+{
+    // latin
+    "a": "ȁ",
+    "e": "ȅ",
+    "i": "ȉ",
+    "o": "ȍ",
+    "r": "ȑ",
+    "u": "ȕ",
+    //cyrillic
+    "ѵ": "ѷ",
+    "а": "а\u030f",
+    "е": "е\u030f",
+    "и": "и\u030f",
+    "о": "о\u030f",
+    "р": "р\u030f",
+    "у": "у\u030f"
+}
--- a/srcs/compose/accent_grave.json
+++ b/srcs/compose/accent_grave.json
@ -0,0 +1,38 @@
+{
+  // latin
+  "a": "à",
+  "e": "è",
+  "i": "ì",
+  "n": "ǹ",
+  "o": "ò",
+  "u": "ù",
+  "w": "ẁ",
+  "y": "ỳ",
+  // extended latin
+  "â": "ầ",
+  "ă": "ằ",
+  "ê": "ề",
+  "ē": "ḕ",
+  "ơ": "ờ",
+  "ô": "ồ",
+  "ō": "ṑ",
+  "ü": "ǜ",
+  "ư": "ừ",
+  // greek (technically not a grave, but a varia)
+  "α": "ὰ",
+  "ε": "ὲ",
+  "η": "ὴ",
+  "ι": "ὶ",
+  "ο": "ὸ",
+  "υ": "ὺ",
+  "ω": "ὼ",
+  // there is more like ἒ, ᾣ, etc
+  // cyrillic
+  "е": "ѐ",
+  "и": "ѝ",
+  // combining character
+  "ɔ": "ɔ\u0300",
+  "s": "s\u0300",
+  "ʌ": "ʌ\u0300",
+  "z": "z\u0300"
+}
--- a/srcs/compose/accent_hook_above.json
+++ b/srcs/compose/accent_hook_above.json
@ -0,0 +1,14 @@
+{
+  "a": "ả",
+  "ă": "ẳ",
+  "â": "ẩ",
+  "e": "ẻ",
+  "ê": "ể",
+  "i": "ỉ",
+  "o": "ỏ",
+  "ô": "ổ",
+  "ơ": "ở",
+  "u": "ủ",
+  "ư": "ử",
+  "y": "ỷ"
+}
--- a/srcs/compose/accent_horn.json
+++ b/srcs/compose/accent_horn.json
@ -0,0 +1,14 @@
+{
+  "o": "ơ",
+  "ó": "ớ",
+  "ò": "ờ",
+  "ỏ": "ở",
+  "õ": "ỡ",
+  "ọ": "ợ",
+  "u": "ư",
+  "ú": "ứ",
+  "ù": "ừ",
+  "ủ": "ử",
+  "ũ": "ữ",
+  "ụ": "ự"
+}
--- a/srcs/compose/accent_macron.json
+++ b/srcs/compose/accent_macron.json
@ -0,0 +1,35 @@
+{
+  // latin
+  "a": "ā",
+  "e": "ē",
+  "g": "ḡ",
+  "i": "ī",
+  "o": "ō",
+  "u": "ū",
+  "y": "ȳ",
+  // extended latin
+  "æ": "ǣ",
+  "ä": "ǟ",
+  "ȧ": "ǡ",
+  "è": "ḕ",
+  "é": "ḗ",
+  "ḷ": "ḹ",
+  "ṛ": "ṝ",
+  "ö": "ȫ",
+  "ȯ": "ȱ",
+  "ǫ": "ǭ",
+  "õ": "ȭ",
+  "ò": "ṑ",
+  "ó": "ṓ",
+  "ü": "ǖ", // there is also ṻ
+  // cyrillic
+  "и": "ӣ",
+  "у": "ӯ",
+  // greek
+  "α": "ᾱ",
+  "ι": "ῑ",
+  "υ": "ῡ",
+  // combining characters
+  "l": "l\u0304",
+  "r": "r\u0304"
+}
--- a/srcs/compose/accent_ogonek.json
+++ b/srcs/compose/accent_ogonek.json
@ -0,0 +1,10 @@
+{
+  // latin
+  "a": "ą",
+  "e": "ę",
+  "i": "į",
+  "o": "ǫ",
+  "u": "ų",
+  // extended latin
+  "ō": "ǭ"
+}
--- a/srcs/compose/accent_ordinal.json
+++ b/srcs/compose/accent_ordinal.json
@ -0,0 +1,14 @@
+{
+  "a": "ª",
+  "o": "º",
+  "1": "ª",
+  "2": "º",
+  "3": "ⁿ",
+  "4": "ᵈ",
+  "5": "ᵉ",
+  "6": "ʳ",
+  "7": "ˢ",
+  "8": "ᵗ",
+  "9": "ʰ",
+  "*": "°"
+}
--- a/srcs/compose/accent_ring.json
+++ b/srcs/compose/accent_ring.json
@ -0,0 +1,11 @@
+{
+  // latin
+  "a": "å",
+  "u": "ů",
+  "w": "ẘ", // no uppercase
+  "y": "ẙ", // no uppercase
+  // extended latin
+  "á": "ǻ",
+  // extra
+  "~": "⸛"
+}
--- a/srcs/compose/accent_slash.json
+++ b/srcs/compose/accent_slash.json
@ -0,0 +1,18 @@
+{
+  "a": "ⱥ",
+  "b": "␢",
+  "c": "ȼ",
+  "e": "ɇ",
+  "g": "ꞡ",
+  "k": "ꝃ",
+  "l": "ł",
+  "n": "ꞥ",
+  "o": "ø",
+  "ó": "ǿ",
+  "ɔ": "ꬿ",
+  "r": "ꞧ",
+  "s": "ꞩ",
+  "t": "ⱦ",
+  "u": "ꞹ",
+  "v": "ꝟ"
+}
--- a/srcs/compose/accent_subscript.json
+++ b/srcs/compose/accent_subscript.json
@ -0,0 +1,45 @@
+{
+  // arabic numbers
+  "0": "₀",
+  "1": "₁",
+  "2": "₂",
+  "3": "₃",
+  "4": "₄",
+  "5": "₅",
+  "6": "₆",
+  "7": "₇",
+  "8": "₈",
+  "9": "₉",
+  // math operators
+  "+": "₊",
+  "-": "₋",
+  "=": "₌",
+  "(": "₍",
+  ")": "₎",
+  // latin
+  "a": "ₐ",
+  "e": "ₑ",
+  "h": "ₕ",
+  "i": "ᵢ",
+  "j": "ⱼ",
+  "k": "ₖ",
+  "l": "ₗ",
+  "m": "ₘ",
+  "n": "ₙ",
+  "o": "ₒ",
+  "p": "ₚ",
+  "r": "ᵣ",
+  "s": "ₛ",
+  "t": "ₜ",
+  "u": "ᵤ",
+  "v": "ᵥ",
+  "x": "ₓ",
+  // extended latin
+  "ə": "ₔ",
+  // greek
+  "β": "ᵦ",
+  "γ": "ᵧ",
+  "ρ": "ᵨ",
+  "φ": "ᵩ",
+  "χ": "ᵪ"
+}
--- a/srcs/compose/accent_superscript.json
+++ b/srcs/compose/accent_superscript.json
@ -0,0 +1,93 @@
+{
+  // numbers
+  "0": "⁰",
+  "1": "¹",
+  "2": "²",
+  "3": "³",
+  "4": "⁴",
+  "5": "⁵",
+  "6": "⁶",
+  "7": "⁷",
+  "8": "⁸",
+  "9": "⁹",
+  // math operators
+  "+": "⁺",
+  "-": "⁻",
+  "=": "⁼",
+  "(": "⁽",
+  ")": "⁾",
+  // latin
+  "n": "ⁿ",
+  
+  // since there are no more "superscript" characters,
+  // we substitute with "modifier letter small"s which looks the same
+  // latin
+  "a": "ᵃ",
+  "b": "ᵇ",
+  "c": "ᶜ",
+  "d": "ᵈ",
+  "e": "ᵉ",
+  "f": "ᶠ",
+  "g": "ᵍ",
+  "h": "ʰ",
+  "i": "ⁱ",
+  "j": "ʲ",
+  "k": "ᵏ",
+  "l": "ˡ",
+  // see above for n
+  "m": "ᵐ",
+  "o": "ᵒ",
+  "p": "ᵖ",
+  "q": "ꟴ", // there is no proper lowercase superscript q
+  "r": "ʳ",
+  "s": "ˢ",
+  "t": "ᵗ",
+  "u": "ᵘ",
+  "v": "ᵛ",
+  "w": "ʷ",
+  "x": "ˣ",
+  "y": "ʸ",
+  "z": "ᶻ",
+  // extended latin
+  "ɐ": "ᵄ",
+  "ᴂ": "ᵆ",
+  "ɕ": "ᶝ",
+  "ə": "ᵊ",
+  "ɛ": "ᵋ",
+  "ɜ": "ᶟ", // turned open e, ↓ not the same
+  "ᴈ": "ᵌ", // reversed open e
+  "ɥ": "ᶣ",
+  "ɦ": "ʱ",
+  "ᴉ": "ᵎ",
+  "ɨ": "ᶤ",
+  "ɟ": "ᶡ",
+  "ɱ": "ᶬ",
+  "ɯ": "ᵚ",
+  "ɰ": "ᶭ",
+  "ŋ": "ᵑ",
+  "ᴝ": "ᵙ",
+  "ɵ": "ᶱ",
+  "œ": "ꟹ",
+  "ɔ": "ᵓ",
+  "ɹ": "ʴ",
+  "ɻ": "ʵ",
+  "ʁ": "ʶ",
+  "ʂ": "ᶳ",
+  "ʉ": "ᶶ",
+  "ʃ": "ᶴ",
+  "ʒ": "ᶾ",
+  "ʍ": "ꭩ",
+  // greek
+  "ɒ": "ᶛ",
+  "β": "ᵝ",
+  "ɣ": "ˠ",
+  "δ": "ᵟ",
+  "φ": "ᵠ",
+  "χ": "ᵡ",
+  "ι": "ᶥ",
+  "ʊ": "ᶷ",
+  "ʌ": "ᶺ",
+  "θ": "ᶿ",
+  // cyrillic
+  "ө": "ᶱ"
+}
--- a/srcs/compose/accent_tilde.json
+++ b/srcs/compose/accent_tilde.json
@ -0,0 +1,21 @@
+{
+  // latin
+  "a": "ã",
+  "e": "ẽ",
+  "i": "ĩ",
+  "n": "ñ",
+  "o": "õ",
+  "u": "ũ",
+  "v": "ṽ",
+  "y": "ỹ",
+  // extended latin
+  "ă": "ẵ",
+  "â": "ẫ",
+  "ê": "ễ",
+  "ơ": "ỡ",
+  "ō": "ȭ",
+  "ó": "ṍ",
+  "ö": "ṏ",
+  "ư": "ữ",
+  "ú": "ṹ"
+}
--- a/srcs/compose/accent_trema.json
+++ b/srcs/compose/accent_trema.json
@ -0,0 +1,54 @@
+{
+  // fun
+  "~": "⍨",
+  "*": "⍣",
+  "∇": "⍢",
+  "°": "⍤",
+  // latin
+  "a": "ä",
+  "e": "ë",
+  "h": "ḧ",
+  "i": "ï",
+  "o": "ö",
+  "t": "ẗ",
+  "u": "ü",
+  "w": "ẅ",
+  "x": "ẍ",
+  "y": "ÿ",
+  // extended latin
+  "ā": "ǟ",
+  "ō": "ȫ",
+  "õ": "ṏ",
+  "í": "ḯ",
+  "ū": "ǖ", // there is also ṻ
+  "ú": "ǘ",
+  "ù": "ǜ",
+  "ǔ": "ǚ",
+  // greek
+  "ι": "ϊ",
+  "υ": "ϋ",
+  "ὺ": "ῢ",
+  "ύ": "ΰ",
+  "ῦ": "ῧ",
+  "ϒ": "ϔ",
+  // cyrillic
+  "а": "ӓ",
+  "ә": "ӛ",
+  "ж": "ӝ",
+  "з": "ӟ",
+  "и": "ӥ",
+  "о": "ӧ",
+  "ө": "ӫ",
+  "э": "ӭ",
+  "у": "ӱ",
+  "ч": "ӵ",
+  "ы": "ӹ",
+  // combining character
+  "c": "c\u0308",
+  "j": "j\u0308",
+  "k": "k\u0308",
+  "l": "l\u0308",
+  "m": "m\u0308",
+  "n": "n\u0308",
+  "s": "s\u0308"
+}
--- a/srcs/compose/compile.py
+++ b/srcs/compose/compile.py
@ -0,0 +1,339 @@
+import textwrap, sys, re, string, json, os, string
+from array import array
+
+# Compile compose sequences from Xorg's format or from JSON files into an
+# efficient state machine.
+# See [ComposeKey.java] for the interpreter.
+#
+# Takes input files as arguments and generate a Java file.
+# The initial state for each input is generated as a constant named after the
+# input file.
+
+# Parse symbol names from keysymdef.h. Many compose sequences in
+# en_US_UTF_8_Compose.pre reference theses. For example, all the sequences on
+# the Greek, Cyrillic and Hebrew scripts need these symbols.
+def parse_keysymdef_h(fname):
+    with open(fname, "r") as inp:
+        keysym_re = re.compile(r'^#define XK_(\S+)\s+\S+\s*/\*.U\+([0-9a-fA-F]+)\s')
+        for line in inp:
+            m = re.match(keysym_re, line)
+            if m != None:
+                yield (m.group(1), chr(int(m.group(2), 16)))
+
+dropped_sequences = 0
+warning_count = 0
+
+# [s] is a list of strings
+def seq_to_str(s, result=None):
+    msg = "+".join(s)
+    return msg if result is None else msg + " = " + result
+
+# Print a warning. If [seq] is passed, it is prepended to the message.
+def warn(msg, seq=None, result=None):
+    global warning_count
+    if seq is not None:
+        msg = f"Sequence {seq_to_str(seq, result=result)} {msg}"
+    print(f"Warning: {msg}", file=sys.stderr)
+    warning_count += 1
+
+# Parse XKB's Compose.pre files
+def parse_sequences_file_xkb(fname, xkb_char_extra_names):
+    # Parse a line of the form:
+    #     <Multi_key> <minus> <space>		: "~"	asciitilde # TILDE
+    # Sequences not starting with <Multi_key> are ignored.
+    line_re = re.compile(r'^((?:\s*<[^>]+>)+)\s*:\s*"((?:[^"\\]+|\\.)+)"\s*(\S+)?\s*(?:#.+)?$')
+    char_re = re.compile(r'\s*<(?:U([a-fA-F0-9]{4,6})|([^>]+))>')
+    def parse_seq_line(line):
+        global dropped_sequences
+        prefix = "<Multi_key>"
+        if not line.startswith(prefix):
+            return None
+        m = re.match(line_re, line[len(prefix):])
+        if m == None:
+            return None
+        def_ = m.group(1)
+        try:
+            def_ = parse_seq_chars(def_)
+            result = parse_seq_result(m.group(2))
+        except Exception as e:
+            # print(str(e) + ". Sequence dropped: " + line.strip(), file=sys.stderr)
+            dropped_sequences += 1
+            return None
+        return def_, result
+    char_names = { **xkb_char_extra_names }
+    # Interpret character names of the form "U0000" or using [char_names].
+    def parse_seq_char(sc):
+        uchar, named_char = sc
+        if uchar != "":
+            c = chr(int(uchar, 16))
+        elif len(named_char) == 1:
+            c = named_char
+        else:
+            if not named_char in char_names:
+                raise Exception("Unknown char: " + named_char)
+            c = char_names[named_char]
+        # The state machine can't represent sequence characters that do not fit
+        # in a 16-bit char.
+        if len(c) > 1 or ord(c[0]) > 65535:
+            raise Exception("Char out of range: " + r)
+        return c
+    # Interpret the left hand side of a sequence.
+    def parse_seq_chars(def_):
+        return list(map(parse_seq_char, re.findall(char_re, def_)))
+    # Interpret the result of a sequence, as outputed by [line_re].
+    def parse_seq_result(r):
+        if len(r) == 2 and r[0] == '\\':
+            return r[1]
+        return r
+    # Populate [char_names] with the information present in the file.
+    with open(fname, "r") as inp:
+        for line in inp:
+            m = re.match(line_re, line)
+            if m == None or m.group(3) == None:
+                continue
+            try:
+                char_names[m.group(3)] = parse_seq_result(m.group(2))
+            except Exception:
+                pass
+    # Parse the sequences
+    with open(fname, "r") as inp:
+        seqs = []
+        for line in inp:
+            s = parse_seq_line(line)
+            if s != None:
+                seqs.append(s)
+        return seqs
+
+# Basic support for comments in json files. Reads a file
+def strip_cstyle_comments(inp):
+    def strip_line(line):
+        i = line.find("//")
+        return line[:i] + "\n" if i >= 0 else line
+    return "".join(map(strip_line, inp))
+
+# Parse from a json file containing a dictionary sequence → result string.
+def parse_sequences_file_json(fname):
+    def tree_to_seqs(tree, prefix):
+        for c, r in tree.items():
+            if isinstance(r, str):
+                yield prefix + [c], r
+            else:
+                yield from tree_to_seqs(r, prefix + [c])
+    try:
+        with open(fname, "r") as inp:
+            tree = json.loads(strip_cstyle_comments(inp))
+        return list(tree_to_seqs(tree, []))
+    except Exception as e:
+        warn("Failed parsing '%s': %s" % (fname, str(e)))
+
+# Format of the sequences file is determined by its extension
+def parse_sequences_file(fname, xkb_char_extra_names={}):
+    if fname.endswith(".pre"):
+        return parse_sequences_file_xkb(fname, xkb_char_extra_names)
+    if fname.endswith(".json"):
+        return parse_sequences_file_json(fname)
+    raise Exception(fname + ": Unsupported format")
+
+# A sequence directory can contain several sequence files as well as
+# 'keysymdef.h'.
+def parse_sequences_dir(dname):
+    compose_files = []
+    xkb_char_extra_names = {}
+    # Parse keysymdef.h first if present
+    for fbasename in os.listdir(dname):
+        fname = os.path.join(dname, fbasename)
+        if fbasename == "keysymdef.h":
+            xkb_char_extra_names = dict(parse_keysymdef_h(fname))
+        else:
+            compose_files.append(fname)
+    sequences = []
+    for fname in compose_files:
+        sequences.extend(parse_sequences_file(fname, xkb_char_extra_names))
+    return sequences
+
+# Turn a list of sequences into a trie.
+def add_sequences_to_trie(seqs, trie):
+    global dropped_sequences
+    def add_seq_to_trie(seq, result):
+        t_ = trie
+        for c in seq[:-1]:
+            t_ = t_.setdefault(c, {})
+            if isinstance(t_, str):
+                return False
+        c = seq[-1]
+        if c in t_:
+            return False
+        t_[c] = result
+        return True
+    def existing_sequence_to_str(seq): # Used in error message
+        i = 0
+        t_ = trie
+        while i < len(seq):
+            if seq[i] not in t_: break # No collision ?
+            t_ = t_[seq[i]]
+            i += 1
+            if isinstance(t_, str): break
+        return "".join(seq[:i]) + " = " + str(t_)
+    for seq, result in seqs:
+        if not add_seq_to_trie(seq, result):
+            dropped_sequences += 1
+            warn("Sequence collide: '%s' and '%s = %s'" % (
+                existing_sequence_to_str(seq),
+                "".join(seq), result))
+
+# Compile the trie into a state machine.
+def make_automata(tries):
+    previous_leafs = {} # Deduplicate leafs
+    states = []
+    def add_tree(t):
+        this_node_index = len(states)
+        # Index and size of the new node
+        i = len(states)
+        s = len(t.keys())
+        # Add node header
+        states.append(("\0", s + 1))
+        i += 1
+        # Reserve space for the current node in both arrays
+        for c in range(s):
+            states.append((None, None))
+        # Add nested nodes and fill the current node
+        for c in sorted(t.keys()):
+            states[i] = (c, add_node(t[c]))
+            i += 1
+        return this_node_index
+    def add_leaf(c):
+        if c in previous_leafs:
+            return previous_leafs[c]
+        this_node_index = len(states)
+        previous_leafs[c] = this_node_index
+        # There are two encoding for leafs: character final state for 15-bit
+        # characters and string final state for the rest.
+        if len(c) > 1 or ord(c[0]) > 32767: # String final state
+            # A ':' can be added to the result of a sequence to force a string
+            # final state. For example, to go through KeyValue lookup.
+            if c.startswith(":"): c = c[1:]
+            javachars = array('H', c.encode("UTF-16-LE"))
+            states.append((-1, len(javachars) + 1))
+            for c in javachars:
+                states.append((c, 0))
+        else: # Character final state
+            states.append((c, 1))
+        return this_node_index
+    def add_node(n):
+        if type(n) == str:
+            return add_leaf(n)
+        else:
+            return add_tree(n)
+    states.append((1, 1)) # Add an empty state at the beginning.
+    entry_states = { n: add_tree(root) for n, root in tries.items() }
+    return entry_states, states
+
+# Debug
+def print_automata(automata):
+    i = 0
+    for (s, e) in automata:
+        s = "%#06x" % s if isinstance(s, int) else '"%s"' % str(s)
+        print("%3d %8s %d" % (i, s, e), file=sys.stderr)
+        i += 1
+
+# Report warnings about the compose sequences
+def check_for_warnings(tries):
+    def get(seq):
+        t = tries
+        for c in seq:
+            if c not in t:
+                return None
+            t = t[c]
+        return t if type(t) == str else None
+    # Check that compose+Upper+Upper have an equivalent compose+Upper+Lower or compose+Lower+Lower
+    for c1 in string.ascii_uppercase:
+        for c2 in string.ascii_uppercase:
+            seq = [c1, c2]
+            seq_l = [c1, c2.lower()]
+            seq_ll = [c1.lower(), c2.lower()]
+            r = get(seq)
+            r_l = get(seq_l)
+            r_ll = get(seq_ll)
+            if r is not None:
+                ll_warning = f" (but {seq_to_str(seq_ll)} = {r_ll} exists)" if r_ll is not None else ""
+                if r_l is None:
+                    if r != r_ll:
+                        warn(f"has no lower case equivalent {seq_to_str(seq_l)}{ll_warning}", seq=seq, result=r)
+                elif r != r_l:
+                    warn(f"is not the same as {seq_to_str(seq_l)} = {r_l}{ll_warning}", seq=seq, result=r)
+
+def batched(ar, n):
+    i = 0
+    while i + n < len(ar):
+        yield ar[i:i+n]
+        i += n
+    if i < len(ar):
+        yield ar[i:]
+
+# Print the state machine compiled by make_automata into java code that can be
+# used by [ComposeKeyData.java].
+def gen_java(entry_states, machine):
+    chars_map = {
+            # These characters cannot be used in unicode form as Java's parser
+            # unescape unicode sequences before parsing.
+            -1: "\\uFFFF",
+            "\"": "\\\"",
+            "\\": "\\\\",
+            "\n": "\\n",
+            "\r": "\\r",
+            ord("\""): "\\\"",
+            ord("\\"): "\\\\",
+            ord("\n"): "\\n",
+            ord("\r"): "\\r",
+            }
+    def char_repr(c):
+        if c in chars_map:
+            return chars_map[c]
+        if type(c) == int: # The edges array contains ints
+            return "\\u%04x" % c
+        if c in string.printable:
+            return c
+        return "\\u%04x" % ord(c)
+    def gen_array(array):
+        chars = list(map(char_repr, array))
+        return "\" +\n    \"".join(map(lambda b: "".join(b), batched(chars, 72)))
+    def gen_entry_state(s):
+        name, state = s
+        return "  public static final int %s = %d;" % (name, state)
+    print("""package juloo.keyboard2;
+
+/** This file is generated, see [srcs/compose/compile.py]. */
+
+public final class ComposeKeyData
+{
+  public static final char[] states =
+    ("%s").toCharArray();
+
+  public static final char[] edges =
+    ("%s").toCharArray();
+
+%s
+}""" % (
+    # Break the edges array every few characters using string concatenation.
+    gen_array(map(lambda s: s[0], machine)),
+    gen_array(map(lambda s: s[1], machine)),
+    "\n".join(map(gen_entry_state, entry_states.items())),
+))
+
+total_sequences = 0
+tries = {} # Orderred dict
+for fname in sorted(sys.argv[1:]):
+    tname, _ = os.path.splitext(os.path.basename(fname))
+    if os.path.isdir(fname):
+        sequences = parse_sequences_dir(fname)
+    else:
+        sequences = parse_sequences_file(fname)
+    add_sequences_to_trie(sequences, tries.setdefault(tname, {}))
+    total_sequences += len(sequences)
+
+check_for_warnings(tries["compose"])
+entry_states, automata = make_automata(tries)
+gen_java(entry_states, automata)
+
+print("Compiled %d sequences into %d states. Dropped %d sequences. Generated %d warnings." % (total_sequences, len(automata), dropped_sequences, warning_count), file=sys.stderr)
+# print_automata(automata)
--- a/srcs/compose/compose/arabic.json
+++ b/srcs/compose/compose/arabic.json
@ -0,0 +1,149 @@
+{
+  "ا": {
+    "ا": "combining_alef_above",
+    "ع": "أ",
+    "و": "ۉ",
+    "ي": "ؽ",
+    "ی": "ؽ",
+    "۷": "combining_alef_below",
+    "٧": "combining_alef_below"
+  },
+  "ت": {
+    "د": "ط",
+    "ر": "ڑ",
+    "ش": "ث",
+    "ن": "ٹ"
+  },
+  "ج": {
+    "ش": "چ"
+  },
+  "ح": {
+    "ح": "combining_sukun"
+  },
+  "د": {
+    "ت": "ڈ",
+    "ز": "ذ",
+    "ت": "ڑ",
+    "۷": "ڕ"
+  },
+  "س": {
+    "ش": "ص"
+  },
+  "ش": {
+    "ت": "ث"
+  },
+  "ع": {
+    "ا": "إ",
+    "ه": "ۀ",
+    "و": "ؤ",
+    "ي": "ئ",
+    "ی": "ئ",
+    "۷": "combining_hamza_below",
+    "۸": "combining_hamza_above",
+    "٧": "combining_hamza_below",
+    "٨": "combining_hamza_above"
+  },
+  "غ": {
+    "ك": "گ",
+    "ک": "گ"
+  },
+  "ف": {
+    "و": "ڡ"
+  },
+  "ق": {
+    "و": "ۊ"
+  },
+  "ل": {
+    "ل": "combining_shaddah",
+    "۷": "ڵ",
+    "٧": "ڵ"
+  },
+  "ن": {
+    "ت": "ٹ",
+    "ه": "combining_fathatan",
+    "و": "combining_dammatan",
+    "ی": "combining_kasratan",
+    "ي": "combining_kasratan"
+  },
+  "ه": {
+    " ": "ە",
+    "ت": "ة",
+    "ع": "ۀ",
+    "ن": "combining_fathatan",
+    "ه": "combining_fatha",
+    "و": "ۆ",
+    "ي": "ێ",
+    "ی": "ێ"
+  },
+  "و": {
+    "ث": "ۋ",
+    "ع": "ؤ",
+    "ف": "ڡ",
+    "ن": "combining_dammatan",
+    "و": "combining_dammah",
+    "۷": "ۆ",
+    "۸": "ۉ",
+    "۸": "ۉ",
+    "٧": "ۆ",
+    "٨": "ۉ",
+    "٨": "ۉ"
+  },
+  "ي": {
+    " ": "ے",
+    "ا": "ى",
+    "ع": "ئ",
+    "ي": "combining_kasra",
+    "۷": "ێ",
+    "۸": "ؽ",
+    "ن": "combining_kasratan",
+    "٧": "ێ",
+    "٨": "ؽ"
+  },
+  "ی": {
+    " ": "ے",
+    "ا": "ى",
+    "ع": "ئ",
+    "ن": "combining_kasratan",
+    "ی": "combining_kasra",
+    "۷": "ێ",
+    "۸": "ؽ",
+    "٧": "ێ",
+    "٨": "ؽ"
+  },
+  "۷": {
+    "ا": "combining_alef_below",
+    "ر": "ڕ",
+    "ع": "combining_hamza_below",
+    "ل": "ڵ",
+    "و": "ۆ",
+    "ي": "ێ",
+    "ی": "ێ",
+    "۷": "combining_arabic_v"
+  },
+  "۸": {
+    "ع": "combining_hamza_above",
+    "و": "ۉ",
+    "و": "ۉ",
+    "ي": "ؽ",
+    "ی": "ؽ",
+    "۸": "combining_arabic_inverted_v"
+  },
+  "٧": {
+    "ا": "combining_alef_below",
+    "ر": "ڕ",
+    "ع": "combining_hamza_below",
+    "ل": "ڵ",
+    "و": "ۆ",
+    "ي": "ێ",
+    "٧": "combining_arabic_v",
+    "ی": "ێ"
+  },
+  "٨": {
+    "ع": "combining_hamza_above",
+    "و": "ۉ",
+    "و": "ۉ",
+    "ي": "ؽ",
+    "٨": "combining_arabic_inverted_v",
+    "ی": "ؽ"
+  }
+}
--- a/srcs/compose/compose/cyrillic.json
+++ b/srcs/compose/compose/cyrillic.json
@ -0,0 +1,165 @@
+{
+  ",": {
+    "г": "ӻ",
+    "к": "ӄ",
+    "л": "ԓ",
+    "н": "ӈ",
+    "х": "ӽ",
+    "ѧ": "ӊ"
+  },
+  ".": {
+    "г": "ӷ",
+    "ж": "җ",
+    "й": "ҋ",
+    "к": "қ",
+    "л": "ԯ",
+    "м": "ӎ",
+    "н": "ӊ",
+    "х": "ҳ",
+    "ч": "ҷ",
+    "і": "ї"
+  },
+  "а": {
+    "е": "ѣ",
+    "у": "ѡ",
+    "ч": "combining_aigu",
+    "ы": "ѣ",
+    "ь": "ꙙ",
+    "ꙋ": "ꙍ",
+    "ꙑ": "ѣ"
+  },
+  "б": {
+    "ч": "combining_slavonic_psili"
+  },
+  "г": {
+    ",": "ӻ",
+    ".": "ӷ",
+    "й": "ғ",
+    "к": "ґ",
+    "х": "ҁ",
+    "ј": "ғ"
+  },
+  "д": {
+    "е": "ꙉ",
+    "ж": "џ",
+    "з": "ꙃ",
+    "й": "ꙉ",
+    "ј": "ꙉ",
+    "ѥ": "ђ"
+  },
+  "е": {
+    "ч": "combining_trema"
+  },
+  "ж": {
+    ".": "җ"
+  },
+  "з": {
+    "ф": "ҙ"
+  },
+  "и": {
+    "и": "ӣ",
+    "у": "ѵ"
+  },
+  "й": {
+    ".": "ҋ",
+    "ч": "combining_breve"
+  },
+  "к": {
+    ",": "ӄ",
+    ".": "қ",
+    "г": "ґ",
+    "с": "ѯ",
+    "х": "ҁ",
+    "ш": "ѯ"
+  },
+  "л": {
+    ",": "ԓ",
+    ".": "ԯ",
+    "ь": "љ"
+  },
+  "м": {
+    ".": "ӎ"
+  },
+  "н": {
+    ",": "ӈ",
+    "·": "ԩ",
+    "ч": "combining_titlo",
+    "ь": "њ"
+  },
+  "о": {
+    "т": "ѿ",
+    "у": "ѹ",
+    "ч": "combining_inverted_breve"
+  },
+  "п": {
+    "с": "ѱ"
+  },
+  "т": {
+    "й": "ћ",
+    "ф": "ѳ",
+    "ј": "ћ"
+  },
+  "у": {
+    "и": "ѵ",
+    "й": "ў",
+    "у": "ӯ",
+    "ч": "combining_pokrytie",
+    "і": "ѵ",
+    "ј": "ў"
+  },
+  "х": {
+    ",": "ӽ",
+    ".": "ҳ",
+    "ч": "combining_slavonic_dasia"
+  },
+  "ч": {
+    ".": "ҷ",
+    "а": "combining_aigu",
+    "б": "combining_slavonic_psili",
+    "е": "combining_trema",
+    "й": "combining_breve",
+    "н": "combining_titlo",
+    "о": "combining_inverted_breve",
+    "у": "combining_pokrytie",
+    "х": "combining_slavonic_dasia",
+    "ч": "combining_payerok",
+    "ч": "combining_payerok",
+    "ъ": "combining_vertical_tilde",
+    "ю": "combining_grave",
+    "ј": "combining_breve",
+    "ѧ": "combining_vzmet"
+  },
+  "ш": {
+    "т": "щ"
+  },
+  "ъ": {
+    "ч": "combining_vertical_tilde"
+  },
+  "ю": {
+    "а": "ꙓ",
+    "е": "ё",
+    "м": "ѭ",
+    "н": "ѩ",
+    "ч": "combining_grave"
+  },
+  "я": {
+    "ь": "ꙝ"
+  },
+  "і": {
+    "\"": "ї",
+    ".": "ї",
+    "у": "ѵ",
+    "і": "ӣ"
+  },
+  "ј": {
+    "а": "ꙗ",
+    "ч": "combining_breve",
+    "ѣ": "ꙝ"
+  },
+  "ѡ": {
+    "т": "ѿ"
+  },
+  "ѧ": {
+    "ч": "combining_vzmet"
+  }
+}
--- a/srcs/compose/compose/en_US_UTF_8_Compose.pre
+++ b/srcs/compose/compose/en_US_UTF_8_Compose.pre
--- a/srcs/compose/compose/extra.json
+++ b/srcs/compose/compose/extra.json
@ -0,0 +1,62 @@
+{
+  "V": {
+    "s": "Š",
+    "c": "Č",
+    "z": "Ž"
+  },
+  "\\": {
+    "n": "\\n",
+    "t": "\\t"
+  },
+  "n": {
+    "g": {
+      "~": "n͠g"
+    }
+  },
+  "N": {
+    "g": {
+      "~": "N͠g"
+    },
+    "g": "Ŋ",
+    "n": "ℕ"
+  },
+  "g": {
+    "~": "g̃",
+    "u": "Ğ"
+  },
+  "A": {
+    "a": "Å",
+    "e": "Æ",
+    "t": "@"
+  },
+  "a": {
+    "E": "Æ"
+  },
+  "O": {
+    "e": "Œ"
+  },
+  "S": {
+    "s": "ẞ"
+  },
+  "I": {
+    "j": "Ĳ"
+  },
+  "D": {
+    "h": "Ð"
+  },
+  "E": {
+    "e": "Ə"
+  },
+  "Q": {
+    "q": "ℚ"
+  },
+  "R": {
+    "r": "ℝ"
+  },
+  "T": {
+    "h": "Þ"
+  },
+  "Z": {
+    "z": "ℤ"
+  }
+}
--- a/srcs/compose/compose/keysymdef.h
+++ b/srcs/compose/compose/keysymdef.h
--- a/srcs/compose/fn.json
+++ b/srcs/compose/fn.json
@ -0,0 +1,268 @@
+{
+  "1": "f1",
+  "2": "f2",
+  "3": "f3",
+  "4": "f4",
+  "5": "f5",
+  "6": "f6",
+  "7": "f7",
+  "8": "f8",
+  "9": "f9",
+  "0": "f10",
+  "<": "«",
+  ">": "»",
+  "{": "‹",
+  "}": "›",
+  "[": "‘",
+  "]": "’",
+  "(": "“",
+  ")": "”",
+  "'": "‚",
+  "\"": "„",
+  "-": "–",
+  "_": "—",
+  "^": "¬",
+  "%": "‰",
+  "=": "≈",
+  "u": "µ",
+  "a": "æ",
+  "o": "œ",
+  "*": "°",
+  ".": "…",
+  ",": "·",
+  "!": "¡",
+  "?": "¿",
+  "|": "¦",
+  "§": "¶",
+  "†": "‡",
+  "×": "∙",
+  " ": "nbsp",
+
+  // arrows
+  "↖": "⇖",
+  "↑": "⇑",
+  "↗": "⇗",
+  "←": "⇐",
+  "→": "⇒",
+  "↙": "⇙",
+  "↓": "⇓",
+  "↘": "⇘",
+  "↔": "⇔",
+  "↕": "⇕",
+  // Currency symbols
+  "e": "€",
+  "l": "£",
+  "r": "₹",
+  "y": "¥",
+  "c": "¢",
+  "p": "₽",
+  "b": "₱",
+  "h": "₴",
+  "z": "₿",
+  // avoid showing these twice
+  "€": "removed",
+  "£": "removed",
+  // alternative greek letters
+  "π": "ϖ",
+  "θ": "ϑ",
+  "Θ": "ϴ",
+  "ε": "ϵ",
+  "β": "ϐ",
+  "ρ": "ϱ",
+  "σ": "ς",
+  "γ": "ɣ",
+  "φ": "ϕ",
+  "υ": "ϒ",
+  "κ": "ϰ",
+  // alternative math characters
+  "∪": "⋃",
+  "∩": "⋂",
+  "∃": "∄",
+  "∫": "∮",
+  "Π": "∏",
+  "Σ": "∑",
+  "∨": "⋁",
+  "∧": "⋀",
+  "⊷": "⊶",
+  "⊂": "⊆",
+  "⊃": "⊇",
+  "±": "∓",
+
+  // APL
+  "⍺": "⍶",
+  "⍵": "⍹",
+  "⋄": "⌺",
+  "⍝": "⍧",
+  "∆": "⍙",
+  "∇": "⍢",
+  "⊤": "⍡",
+  "⎕": "⍞",
+
+  // hebrew niqqud
+  "ק": "qamats", // kamatz
+  "ר": "hataf_qamats", // reduced kamatz
+  "ו": "holam",
+  "ם": "rafe",
+  "פ": "patah", // patach
+  "ש": "sheva",
+  "ד": "dagesh", // or mapiq
+  "ח": "hiriq",
+  "ף": "hataf_patah", // reduced patach
+  "ז": "qubuts", // kubuts
+  "ס": "segol",
+  "ב": "hataf_segol", // reduced segol
+  "צ": "tsere",
+
+  // Devanagari symbols
+  "ए": "ऍ",
+  "े": "ॅ",
+  "ऐ": "ऎ",
+  "ै": "ॆ",
+  "ऋ": "ॠ",
+  "ृ": "ॄ",
+  "ळ": "ऴ",
+  "र": "ऱ",
+  "क": "क़",
+  "ख": "ख़",
+  "ग": "ग़",
+  "घ": "ॻ",
+  "ढ": "ढ़",
+  "न": "ऩ",
+  "ड": "ड़",
+  "ट": "ॸ",
+  "ण": "ॾ",
+  "फ": "फ़",
+  "ऌ": "ॡ",
+  "ॢ": "ॣ",
+  "औ": "ॵ",
+  "ौ": "ॏ",
+  "ओ": "ऒ",
+  "ो": "ॊ",
+  "च": "ॼ",
+  "ज": "ज़",
+  "ब": "ॿ",
+  "व": "ॺ",
+  "य": "य़",
+  "अ": "ॲ",
+  "आ": "ऑ",
+  "ा": "ॉ",
+  "झ": "ॹ",
+  "ई": "ॴ",
+  "ी": "ऻ",
+  "इ": "ॳ",
+  "ि": "ऺ",
+  "उ": "ॶ",
+  "ऊ": "ॷ",
+  "ु": "ऄ",
+  "ष": "क्ष",
+  "थ": "त्र",
+  "द": "द्र",
+  "प": "प्र",
+  "श": "श्र",
+  "छ": "श्च",
+  "ँ": "ऀ",
+  "₹": "₨",
+  "ॖ": "ॗ",
+  "॓": "॔",
+  "॰": "ॱ",
+  "।": "॥",
+  "ं": "ॕ",
+  "़": "ॎ",
+  "ऽ": "ॽ",
+
+  // Persian numbers
+  "۱": "f1",
+  "۲": "f2",
+  "۳": "f3",
+  "۴": "f4",
+  "۵": "f5",
+  "۶": "f6",
+  "۷": "f7",
+  "۸": "f8",
+  "۹": "f9",
+  // Arabic numbers
+  "۰": "f10",
+  "١": "f1",
+  "٢": "f2",
+  "٣": "f3",
+  "٤": "f4",
+  "٥": "f5",
+  "٦": "f6",
+  "٧": "f7",
+  "٨": "f8",
+  "٩": "f9",
+  "٠": "f10",
+
+  // Cyrillic
+  "ꙑ": "ы",
+  "ы": "ꙑ",
+  "ш": "ѱ",
+  "з": "ꙁ",
+  "и": "і",
+  "і": "и",
+  "я": "ꙗ",
+  "е": "ѥ",
+  "ѡ": "ꙍ",
+  "о": "ѻ",
+  "а": "ѣ",
+  "э": "є",
+  "ъ": "ь",
+  "ь": "ъ",
+  "й": "ј",
+  "ꙉ": "ђ",
+  "ч": "ћ",
+  "ҁ": "қ",
+  "қ": "ҁ",
+  "џ": "ҷ",
+  "ҷ": "џ",
+  "ј": "й",
+  "у": "ꙋ",
+  "м": "ѫ",
+  "н": "ѧ",
+  "с": "ѕ",
+  "л": "ԯ",
+  "ԓ": "ԯ",
+  "\ua67d": "\u0483",
+  "\u0487": "\ua66f",
+  "ӈ": "ԩ",
+
+  // Arabic
+  ":": "zwnj",
+  "ل": "ڵ",
+  "\u064F": "ۆ", // combining_dammah
+  "\u0650": "ێ", // combining_kasra
+  "ر": "ڕ",
+  "ب": "ٮ",
+  "ه": "ھ",
+  "ث": "پ",
+  "ز": "ژ",
+  "غ": "گ",
+  "ك": "ک",
+  "ا": "آ",
+  "ي": "ی",
+  "ک": "ك",
+  "ط": "ظ",
+  "ص": "ض",
+  "ی": "ي",
+  "ق": "غ",
+  "ع": "ء",
+  "ح": "ہ",
+  "ێ": "combining_kasra",
+  "ئ": "combining_hamza_above",
+  "ؽ": "combining_arabic_inverted_v",
+  "ۉ": "combining_arabic_inverted_v",
+  "ڡ": "combining_dammah",
+  "ة": "combining_fatha",
+  "إ": "combining_hamza_below",
+  "ۆ": "combining_arabic_v",
+  "س": "ـ",
+  "ف": "ڤ",
+  "ن": "ں",
+
+  // Tamil
+  "ய": ":௰",
+  "ஒ": ":ௐ",
+  "ள": ":௱",
+  "ச": ":௲",
+  "வ": ":௳"
+}
--- a/srcs/compose/numpad_bengali.json
+++ b/srcs/compose/numpad_bengali.json
@ -0,0 +1,12 @@
+{
+  "0": "০",
+  "1": "১",
+  "2": "২",
+  "3": "৩",
+  "4": "৪",
+  "5": "৫",
+  "6": "৬",
+  "7": "৭",
+  "8": "৮",
+  "9": "৯"
+}
--- a/srcs/compose/numpad_devanagari.json
+++ b/srcs/compose/numpad_devanagari.json
@ -0,0 +1,12 @@
+{
+  "0": "०",
+  "1": "१",
+  "2": "२",
+  "3": "३",
+  "4": "४",
+  "5": "५",
+  "6": "६",
+  "7": "७",
+  "8": "८",
+  "9": "९"
+}
--- a/srcs/compose/numpad_gujarati.json
+++ b/srcs/compose/numpad_gujarati.json
@ -0,0 +1,12 @@
+{
+  "0": "૦",
+  "1": "૧",
+  "2": "૨",
+  "3": "૩",
+  "4": "૪",
+  "5": "૫",
+  "6": "૬",
+  "7": "૭",
+  "8": "૮",
+  "9": "૯"
+}
--- a/srcs/compose/numpad_hindu.json
+++ b/srcs/compose/numpad_hindu.json
@ -0,0 +1,14 @@
+// Used with Arabic despite the name; called "Hindi numerals" in Arabic
+// numpad_devanagari is used in Hindi
+{
+  "0": "٠",
+  "1": "١",
+  "2": "٢",
+  "3": "٣",
+  "4": "٤",
+  "5": "٥",
+  "6": "٦",
+  "7": "٧",
+  "8": "٨",
+  "9": "٩"
+}
--- a/srcs/compose/numpad_kannada.json
+++ b/srcs/compose/numpad_kannada.json
@ -0,0 +1,12 @@
+{
+  "0": "೦",
+  "1": "೧",
+  "2": "೨",
+  "3": "೩",
+  "4": "೪",
+  "5": "೫",
+  "6": "೬",
+  "7": "೭",
+  "8": "೮",
+  "9": "೯"
+}
--- a/srcs/compose/numpad_persian.json
+++ b/srcs/compose/numpad_persian.json
@ -0,0 +1,12 @@
+{
+  "0": "۰",
+  "1": "۱",
+  "2": "۲",
+  "3": "۳",
+  "4": "۴",
+  "5": "۵",
+  "6": "۶",
+  "7": "۷",
+  "8": "۸",
+  "9": "۹"
+}
--- a/srcs/compose/numpad_tamil.json
+++ b/srcs/compose/numpad_tamil.json
@ -0,0 +1,12 @@
+{
+  "0": "௦",
+  "1": "௧",
+  "2": "௨",
+  "3": "௩",
+  "4": "௪",
+  "5": "௫",
+  "6": "௬",
+  "7": "௭",
+  "8": "௮",
+  "9": "௯"
+}
--- a/srcs/compose/shift.json
+++ b/srcs/compose/shift.json
@ -0,0 +1,138 @@
+{
+  "↙": "⇙",
+  "↓": "⇓",
+  "↘": "⇘",
+  "←": "⇐",
+  "→": "⇒",
+  "↖": "⇖",
+  "↑": "⇑",
+  "↗": "⇗",
+  "└": "╚",
+  "┴": "╩",
+  "┘": "╝",
+  "├": "╠",
+  "┼": "╬",
+  "┤": "╣",
+  "┌": "╔",
+  "┬": "╦",
+  "┐": "╗",
+  "─": "═",
+  "│": "║",
+  "∈": "∉",
+  "∋": "∌",
+  "⊂": "⊄",
+  "⊃": "⊅",
+  "⊆": "⊈",
+  "⊇": "⊉",
+
+  // superscript
+  "ᵃ": "ᴬ",
+  "ᵇ": "ᴮ",
+  "ᶜ": "ꟲ",
+  "ᵈ": "ᴰ",
+  "ᵉ": "ᴱ",
+  "ᶠ": "ꟳ",
+  "ᵍ": "ᴳ",
+  "ʰ": "ᴴ",
+  "ⁱ": "ᴵ",
+  "ʲ": "ᴶ",
+  "ᵏ": "ᴷ",
+  "ˡ": "ᴸ",
+  "ᵐ": "ᴹ",
+  "ⁿ": "ᴺ",
+  "ᵒ": "ᴼ",
+  "ᵖ": "ᴾ",
+  "ʳ": "ᴿ",
+  "ᵗ": "ᵀ",
+  "ᵘ": "ᵁ",
+  "ᵛ": "ⱽ",
+  "ʷ": "ᵂ",
+  "ᶾ": "ᴣ",
+  "ᵠ": "ᶲ",
+
+  // german eszett has an uppercase, but because it is uncommon, java doesn't know about it
+  "ß": "ẞ",
+
+  // these characters don't have a preapplied uppercase version, so we use combining characters
+  "ẗ": "T\u0308",
+  "ẘ": "W\u030A",
+  "ẙ": "Y\u030A",
+  "ǰ": "J\u030C",
+  "ȷ": "J\u0307",
+
+  // In Turkish, upper case of 'iı' is 'İI' but Java's toUpperCase will
+  // return 'II'. To make 'İ' accessible, make it the shift of 'ı'. This
+  // has the inconvenient of swapping i and ı on the keyboard.
+  "ı": "İ",
+
+  "₹": "₨",
+  // Gujarati alternate characters
+  "અ": "આ",
+  "ઇ": "ઈ",
+  "િ": "ી",
+  "ઉ": "ઊ",
+  "ુ": "ૂ",
+  "એ": "ઐ",
+  "ે": "ૈ",
+  "ઓ": "ઔ",
+  "ો": "ૌ",
+  "ક": "ખ",
+  "ગ": "ઘ",
+  "ચ": "છ",
+  "જ": "ઝ",
+  "ટ": "ઠ",
+  "ડ": "ઢ",
+  "ન": "ણ",
+  "ત": "થ",
+  "દ": "ધ",
+  "પ": "ફ",
+  "બ": "ભ",
+  "મ": "ં",
+  "લ": "ળ",
+  "સ": "શ",
+  "હ": "ઃ",
+
+  // Tamil alternate characters
+  "௹": "₨",
+
+  // Modern Hindi and Sanskrit
+  "अ": "आ",
+  "इ": "ई",
+  "ि": "ी",
+  "उ": "ऊ",
+  "ु": "ू",
+  "ए": "ऐ",
+  "े": "ै",
+  "ओ": "औ",
+  "ो": "ौ",
+  "क": "ख",
+  "ग": "घ",
+  "च": "छ",
+  "ज": "झ",
+  "ट": "ठ",
+  "ड": "ढ",
+  "न": "ण",
+  "त": "थ",
+  "द": "ध",
+  "ब": "भ",
+  "म": "ं",
+  "ल": "ळ",
+  "स": "श",
+  "ह": "ः",
+  "ऋ": "ॠ",
+  "ृ": "ॄ",
+  "ऌ": "ॡ",
+  "ॢ": "ॣ",
+  "॒": "॑",
+  "ॅ": "ॲ",
+  "ॉ": "ऑ",
+
+  // Mathematical symbols
+  "\uD835": {
+    "\uDD68": "𝕎", // 𝕨 → 𝕎
+    "\uDD69": "𝕏", // 𝕩 → 𝕏
+    "\uDD57": "𝔽", // 𝕗 → 𝔽
+    "\uDD58": "𝔾", // 𝕘 → 𝔾
+    "\uDD64": "𝕊"  // 𝕤 → 𝕊
+  }
+}