Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-21 15:13:05 +01:00
parent f2d952b743
commit 3ecd57d1b2
475 changed files with 37130 additions and 2 deletions

10
srcs/compose/README.md Normal file
View file

@ -0,0 +1,10 @@
# Compose sequences
The `compose.py` program parses the compose sequences found in this directory
and generates `srcs/juloo.keyboard2/ComposeKeyData.java`.
## `compose/en_US_UTF_8_Compose.pre`
This file is copied from the `xorg` project. Copyright applies.
## `compose/extra.json`

View file

@ -0,0 +1,60 @@
{
// latin
"a": "á",
"c": "ć",
"e": "é",
"g": "ǵ",
"i": "í",
"k": "ḱ",
"l": "ĺ",
"m": "ḿ",
"n": "ń",
"o": "ó",
"p": "ṕ",
"r": "ŕ",
"s": "ś",
"u": "ú",
"w": "ẃ",
"y": "ý",
"z": "ź",
// extended latin (multiple diacritics)
"â": "ấ",
"ă": "ắ",
"å": "ǻ",
"æ": "ǽ",
"ç": "ḉ",
"ê": "ế",
"ē": "ḗ",
"ï": "ḯ",
"ô": "ố",
"ơ": "ớ",
"õ": "ṍ",
"ō": "ṓ",
"ø": "ǿ",
"ṡ": "ṥ",
"ü": "ǘ",
"ư": "ứ",
"ũ": "ṹ",
// greek
"α": "ά",
"ε": "έ",
"η": "ή",
"ι": "ί",
"ο": "ό",
"υ": "ύ",
// cyrillic
"к": "ќ",
"г": "ѓ",
// combining character
"ą": "ą\u0301",
"j": "j\u0301",
"у": "у\u0301",
"е": "е\u0301",
"а": "а\u0301",
"о": "о\u0301",
"и": "и\u0301",
"ы": "ы\u0301",
"э": "э\u0301",
"ю": "ю\u0301",
"я": "я\u0301"
}

View file

@ -0,0 +1,13 @@
{
"0": "↔",
"1": "↙",
"2": "↓",
"3": "↘",
"4": "←",
"5": "↕",
"6": "→",
"7": "↖",
"8": "↑",
"9": "↗",
".": "↵"
}

View file

@ -0,0 +1,30 @@
{
// latin
"2": "ƻ",
"b": "ƀ",
"c": "ꞓ",
"d": "đ",
"f": "",
"g": "ǥ",
"h": "ħ",
"i": "ɨ",
"j": "ɉ",
"k": "ꝁ",
"l": "ƚ",
"o": "ɵ",
"p": "ᵽ",
"q": "ꝗ",
"r": "ɍ",
"t": "ŧ",
"u": "ʉ",
"y": "ɏ",
"z": "ƶ",
// extended latin
"ȷ": "ɟ",
// cyrillic
"о": "ө",
"ӧ": "ӫ",
"ү": "ұ",
"ь": "ҍ",
"х": "ӿ"
}

View file

@ -0,0 +1,13 @@
{
"1": "└",
"2": "┴",
"3": "┘",
"4": "├",
"5": "┼",
"6": "┤",
"7": "┌",
"8": "┬",
"9": "┐",
"0": "─",
".": "│"
}

View file

@ -0,0 +1,33 @@
{
// latin
"a": "ǎ",
"c": "č",
"d": "ď",
"e": "ě",
"g": "ǧ",
"h": "ȟ",
"i": "ǐ",
"j": "ǰ", // no uppercase
"k": "ǩ",
"l": "ľ",
"n": "ň",
"o": "ǒ",
"r": "ř",
"s": "š",
"t": "ť",
"u": "ǔ",
"z": "ž",
// extended latin
"ṡ": "ṧ",
"ü": "ǚ",
"ʒ": "ǯ",
// combining character
"в": "в\u030C",
"г": "г\u030C",
"ғ": "ғ\u030C",
"д": "д\u030C",
"з": "з\u030C",
"р": "р\u030C",
"т": "т\u030C",
"х": "х\u030C"
}

View file

@ -0,0 +1,17 @@
{
// latin
"c": "ç",
"d": "ḑ",
"e": "ȩ",
"g": "ģ",
"h": "ḩ",
"k": "ķ",
"l": "ļ",
"n": "ņ",
"r": "ŗ",
"s": "ş",
"t": "ţ",
// extended latin
"ć": "ḉ",
"ĕ": "ḝ"
}

View file

@ -0,0 +1,41 @@
{
"+": "⨣",
"≈": "⩯",
// latin
"a": "â",
"c": "ĉ",
"e": "ê",
"g": "ĝ",
"h": "ĥ",
"i": "î",
"j": "ĵ",
"o": "ô",
"ŝ": "ŝ",
"u": "û",
"w": "ŵ",
"x": "x̂",
"y": "ŷ",
"z": "ẑ",
// extended latin
"á": "ấ",
"à": "ầ",
"ã": "ẫ",
"ạ": "ậ",
"ả": "ẩ",
"é": "ế",
"è": "ề",
"ẽ": "ễ",
"ẹ": "ệ",
"ẻ": "ể",
"ó": "ố",
"ò": "ồ",
"ơ": "ổ",
"õ": "ỗ",
"ọ": "ộ",
// combining characters
"а": "а\u0302",
"е": "е\u0302",
"и": "и\u0302",
"о": "о\u0302",
"у": "у\u0302"
}

View file

@ -0,0 +1,56 @@
{
"a": "ȧ",
"b": "ḃ",
"c": "ċ",
"d": "ḋ",
"e": "ė",
"f": "ḟ",
"g": "ġ",
"h": "ḣ",
"m": "ṁ",
"n": "ṅ",
"o": "ȯ",
"p": "ṗ",
"r": "ṙ",
"s": "ṡ",
"t": "ṫ",
"w": "ẇ",
"x": "ẋ",
"y": "ẏ",
"z": "ż",
// remove dot since i and j already have one
"i": "ı",
"j": "ȷ",
// extended latin
"ā": "ǡ",
"ō": "ȱ",
"ś": "ṥ",
"ṣ": "ṩ",
"š": "ṧ",
"ſ": "ẛ",
// combining character
"k": "k\u0307",
"l": "l\u0307",
"q": "q\u0307",
"u": "u\u0307",
"v": "v\u0307",
"0": "0\u0307",
"1": "1\u0307",
"2": "2\u0307",
"3": "3\u0307",
"4": "4\u0307",
"5": "5\u0307",
"6": "6\u0307",
"7": "7\u0307",
"8": "8\u0307",
"9": "9\u0307",
// math
"∈": "⋵",
"": "⨰",
"∧": "⩑",
"": "⩒",
"≡": "⩧",
"~": "⩪",
"⊆": "⫃",
"⊇": "⫄"
}

View file

@ -0,0 +1,34 @@
{
// latin
"a": "ạ",
"b": "ḅ",
"d": "ḍ",
"e": "ẹ",
"h": "ḥ",
"i": "ị",
"k": "ḳ",
"l": "ḷ",
"m": "ṃ",
"n": "ṇ",
"o": "ọ",
"r": "ṛ",
"s": "ṣ",
"t": "ṭ",
"u": "ụ",
"v": "ṿ",
"w": "ẉ",
"y": "ỵ",
"z": "ẓ",
// extended latin
"ă": "ặ",
"â": "ậ",
"ê": "ệ",
"ô": "ộ",
"ơ": "ợ",
"ṡ": "ṩ",
"ư": "ự",
// math
"-": "⨪",
"+": "⨥",
"=": "⩦"
}

View file

@ -0,0 +1,14 @@
{
" ": "˝",
// latin
"o": "ő",
"u": "ű",
// cyrillic
"у": "ӳ",
// combining character
"a": "a\u030b",
"e": "e\u030b",
"i": "i\u030b",
"m": "m\u030b",
"y": "y\u030b"
}

View file

@ -0,0 +1,17 @@
{
// latin
"a": "ȁ",
"e": "ȅ",
"i": "ȉ",
"o": "ȍ",
"r": "ȑ",
"u": "ȕ",
//cyrillic
"ѵ": "ѷ",
"а": "а\u030f",
"е": "е\u030f",
"и": "и\u030f",
"о": "о\u030f",
"р": "р\u030f",
"у": "у\u030f"
}

View file

@ -0,0 +1,38 @@
{
// latin
"a": "à",
"e": "è",
"i": "ì",
"n": "ǹ",
"o": "ò",
"u": "ù",
"w": "ẁ",
"y": "ỳ",
// extended latin
"â": "ầ",
"ă": "ằ",
"ê": "ề",
"ē": "ḕ",
"ơ": "ờ",
"ô": "ồ",
"ō": "ṑ",
"ü": "ǜ",
"ư": "ừ",
// greek (technically not a grave, but a varia)
"α": "ὰ",
"ε": "ὲ",
"η": "ὴ",
"ι": "ὶ",
"ο": "ὸ",
"υ": "ὺ",
"ω": "ὼ",
// there is more like , , etc
// cyrillic
"е": "ѐ",
"и": "ѝ",
// combining character
"ɔ": "ɔ\u0300",
"s": "s\u0300",
"ʌ": "ʌ\u0300",
"z": "z\u0300"
}

View file

@ -0,0 +1,14 @@
{
"a": "ả",
"ă": "ẳ",
"â": "ẩ",
"e": "ẻ",
"ê": "ể",
"i": "ỉ",
"o": "ỏ",
"ô": "ổ",
"ơ": "ở",
"u": "ủ",
"ư": "ử",
"y": "ỷ"
}

View file

@ -0,0 +1,14 @@
{
"o": "ơ",
"ó": "ớ",
"ò": "ờ",
"ỏ": "ở",
"õ": "ỡ",
"ọ": "ợ",
"u": "ư",
"ú": "ứ",
"ù": "ừ",
"ủ": "ử",
"ũ": "ữ",
"ụ": "ự"
}

View file

@ -0,0 +1,35 @@
{
// latin
"a": "ā",
"e": "ē",
"g": "ḡ",
"i": "ī",
"o": "ō",
"u": "ū",
"y": "ȳ",
// extended latin
"æ": "ǣ",
"ä": "ǟ",
"ȧ": "ǡ",
"è": "ḕ",
"é": "ḗ",
"ḷ": "ḹ",
"ṛ": "ṝ",
"ö": "ȫ",
"ȯ": "ȱ",
"ǫ": "ǭ",
"õ": "ȭ",
"ò": "ṑ",
"ó": "ṓ",
"ü": "ǖ", // there is also
// cyrillic
"и": "ӣ",
"у": "ӯ",
// greek
"α": "ᾱ",
"ι": "ῑ",
"υ": "ῡ",
// combining characters
"l": "l\u0304",
"r": "r\u0304"
}

View file

@ -0,0 +1,10 @@
{
// latin
"a": "ą",
"e": "ę",
"i": "į",
"o": "ǫ",
"u": "ų",
// extended latin
"ō": "ǭ"
}

View file

@ -0,0 +1,14 @@
{
"a": "ª",
"o": "º",
"1": "ª",
"2": "º",
"3": "ⁿ",
"4": "ᵈ",
"5": "ᵉ",
"6": "ʳ",
"7": "ˢ",
"8": "ᵗ",
"9": "ʰ",
"*": "°"
}

View file

@ -0,0 +1,11 @@
{
// latin
"a": "å",
"u": "ů",
"w": "ẘ", // no uppercase
"y": "ẙ", // no uppercase
// extended latin
"á": "ǻ",
// extra
"~": "⸛"
}

View file

@ -0,0 +1,18 @@
{
"a": "ⱥ",
"b": "␢",
"c": "ȼ",
"e": "ɇ",
"g": "ꞡ",
"k": "ꝃ",
"l": "ł",
"n": "ꞥ",
"o": "ø",
"ó": "ǿ",
"ɔ": "ꬿ",
"r": "ꞧ",
"s": "ꞩ",
"t": "ⱦ",
"u": "ꞹ",
"v": "ꝟ"
}

View file

@ -0,0 +1,45 @@
{
// arabic numbers
"0": "₀",
"1": "₁",
"2": "₂",
"3": "₃",
"4": "₄",
"5": "₅",
"6": "₆",
"7": "₇",
"8": "₈",
"9": "₉",
// math operators
"+": "₊",
"-": "₋",
"=": "₌",
"(": "₍",
")": "₎",
// latin
"a": "ₐ",
"e": "ₑ",
"h": "ₕ",
"i": "ᵢ",
"j": "ⱼ",
"k": "ₖ",
"l": "ₗ",
"m": "ₘ",
"n": "ₙ",
"o": "ₒ",
"p": "ₚ",
"r": "ᵣ",
"s": "ₛ",
"t": "ₜ",
"u": "ᵤ",
"v": "ᵥ",
"x": "ₓ",
// extended latin
"ə": "ₔ",
// greek
"β": "ᵦ",
"γ": "ᵧ",
"ρ": "ᵨ",
"φ": "ᵩ",
"χ": "ᵪ"
}

View file

@ -0,0 +1,93 @@
{
// numbers
"0": "⁰",
"1": "¹",
"2": "²",
"3": "³",
"4": "⁴",
"5": "⁵",
"6": "⁶",
"7": "⁷",
"8": "⁸",
"9": "⁹",
// math operators
"+": "⁺",
"-": "⁻",
"=": "⁼",
"(": "⁽",
")": "⁾",
// latin
"n": "ⁿ",
// since there are no more "superscript" characters,
// we substitute with "modifier letter small"s which looks the same
// latin
"a": "ᵃ",
"b": "ᵇ",
"c": "ᶜ",
"d": "ᵈ",
"e": "ᵉ",
"f": "ᶠ",
"g": "ᵍ",
"h": "ʰ",
"i": "ⁱ",
"j": "ʲ",
"k": "ᵏ",
"l": "ˡ",
// see above for n
"m": "ᵐ",
"o": "ᵒ",
"p": "ᵖ",
"q": "ꟴ", // there is no proper lowercase superscript q
"r": "ʳ",
"s": "ˢ",
"t": "ᵗ",
"u": "ᵘ",
"v": "ᵛ",
"w": "ʷ",
"x": "ˣ",
"y": "ʸ",
"z": "ᶻ",
// extended latin
"ɐ": "ᵄ",
"ᴂ": "ᵆ",
"ɕ": "ᶝ",
"ə": "ᵊ",
"ɛ": "ᵋ",
"ɜ": "ᶟ", // turned open e, not the same
"ᴈ": "ᵌ", // reversed open e
"ɥ": "ᶣ",
"ɦ": "ʱ",
"ᴉ": "ᵎ",
"ɨ": "ᶤ",
"ɟ": "ᶡ",
"ɱ": "ᶬ",
"ɯ": "ᵚ",
"ɰ": "ᶭ",
"ŋ": "ᵑ",
"ᴝ": "ᵙ",
"ɵ": "ᶱ",
"œ": "ꟹ",
"ɔ": "ᵓ",
"ɹ": "ʴ",
"ɻ": "ʵ",
"ʁ": "ʶ",
"ʂ": "ᶳ",
"ʉ": "ᶶ",
"ʃ": "ᶴ",
"ʒ": "ᶾ",
"ʍ": "ꭩ",
// greek
"ɒ": "ᶛ",
"β": "ᵝ",
"ɣ": "ˠ",
"δ": "ᵟ",
"φ": "ᵠ",
"χ": "ᵡ",
"ι": "ᶥ",
"ʊ": "ᶷ",
"ʌ": "ᶺ",
"θ": "ᶿ",
// cyrillic
"ө": "ᶱ"
}

View file

@ -0,0 +1,21 @@
{
// latin
"a": "ã",
"e": "ẽ",
"i": "ĩ",
"n": "ñ",
"o": "õ",
"u": "ũ",
"v": "ṽ",
"y": "ỹ",
// extended latin
"ă": "ẵ",
"â": "ẫ",
"ê": "ễ",
"ơ": "ỡ",
"ō": "ȭ",
"ó": "ṍ",
"ö": "ṏ",
"ư": "ữ",
"ú": "ṹ"
}

View file

@ -0,0 +1,54 @@
{
// fun
"~": "⍨",
"*": "⍣",
"∇": "⍢",
"°": "⍤",
// latin
"a": "ä",
"e": "ë",
"h": "ḧ",
"i": "ï",
"o": "ö",
"t": "ẗ",
"u": "ü",
"w": "ẅ",
"x": "ẍ",
"y": "ÿ",
// extended latin
"ā": "ǟ",
"ō": "ȫ",
"õ": "ṏ",
"í": "ḯ",
"ū": "ǖ", // there is also
"ú": "ǘ",
"ù": "ǜ",
"ǔ": "ǚ",
// greek
"ι": "ϊ",
"υ": "ϋ",
"ὺ": "ῢ",
"ύ": "ΰ",
"ῦ": "ῧ",
"ϒ": "ϔ",
// cyrillic
"а": "ӓ",
"ә": "ӛ",
"ж": "ӝ",
"з": "ӟ",
"и": "ӥ",
"о": "ӧ",
"ө": "ӫ",
"э": "ӭ",
"у": "ӱ",
"ч": "ӵ",
"ы": "ӹ",
// combining character
"c": "c\u0308",
"j": "j\u0308",
"k": "k\u0308",
"l": "l\u0308",
"m": "m\u0308",
"n": "n\u0308",
"s": "s\u0308"
}

339
srcs/compose/compile.py Normal file
View file

@ -0,0 +1,339 @@
import textwrap, sys, re, string, json, os, string
from array import array
# Compile compose sequences from Xorg's format or from JSON files into an
# efficient state machine.
# See [ComposeKey.java] for the interpreter.
#
# Takes input files as arguments and generate a Java file.
# The initial state for each input is generated as a constant named after the
# input file.
# Parse symbol names from keysymdef.h. Many compose sequences in
# en_US_UTF_8_Compose.pre reference theses. For example, all the sequences on
# the Greek, Cyrillic and Hebrew scripts need these symbols.
def parse_keysymdef_h(fname):
with open(fname, "r") as inp:
keysym_re = re.compile(r'^#define XK_(\S+)\s+\S+\s*/\*.U\+([0-9a-fA-F]+)\s')
for line in inp:
m = re.match(keysym_re, line)
if m != None:
yield (m.group(1), chr(int(m.group(2), 16)))
dropped_sequences = 0
warning_count = 0
# [s] is a list of strings
def seq_to_str(s, result=None):
msg = "+".join(s)
return msg if result is None else msg + " = " + result
# Print a warning. If [seq] is passed, it is prepended to the message.
def warn(msg, seq=None, result=None):
global warning_count
if seq is not None:
msg = f"Sequence {seq_to_str(seq, result=result)} {msg}"
print(f"Warning: {msg}", file=sys.stderr)
warning_count += 1
# Parse XKB's Compose.pre files
def parse_sequences_file_xkb(fname, xkb_char_extra_names):
# Parse a line of the form:
# <Multi_key> <minus> <space> : "~" asciitilde # TILDE
# Sequences not starting with <Multi_key> are ignored.
line_re = re.compile(r'^((?:\s*<[^>]+>)+)\s*:\s*"((?:[^"\\]+|\\.)+)"\s*(\S+)?\s*(?:#.+)?$')
char_re = re.compile(r'\s*<(?:U([a-fA-F0-9]{4,6})|([^>]+))>')
def parse_seq_line(line):
global dropped_sequences
prefix = "<Multi_key>"
if not line.startswith(prefix):
return None
m = re.match(line_re, line[len(prefix):])
if m == None:
return None
def_ = m.group(1)
try:
def_ = parse_seq_chars(def_)
result = parse_seq_result(m.group(2))
except Exception as e:
# print(str(e) + ". Sequence dropped: " + line.strip(), file=sys.stderr)
dropped_sequences += 1
return None
return def_, result
char_names = { **xkb_char_extra_names }
# Interpret character names of the form "U0000" or using [char_names].
def parse_seq_char(sc):
uchar, named_char = sc
if uchar != "":
c = chr(int(uchar, 16))
elif len(named_char) == 1:
c = named_char
else:
if not named_char in char_names:
raise Exception("Unknown char: " + named_char)
c = char_names[named_char]
# The state machine can't represent sequence characters that do not fit
# in a 16-bit char.
if len(c) > 1 or ord(c[0]) > 65535:
raise Exception("Char out of range: " + r)
return c
# Interpret the left hand side of a sequence.
def parse_seq_chars(def_):
return list(map(parse_seq_char, re.findall(char_re, def_)))
# Interpret the result of a sequence, as outputed by [line_re].
def parse_seq_result(r):
if len(r) == 2 and r[0] == '\\':
return r[1]
return r
# Populate [char_names] with the information present in the file.
with open(fname, "r") as inp:
for line in inp:
m = re.match(line_re, line)
if m == None or m.group(3) == None:
continue
try:
char_names[m.group(3)] = parse_seq_result(m.group(2))
except Exception:
pass
# Parse the sequences
with open(fname, "r") as inp:
seqs = []
for line in inp:
s = parse_seq_line(line)
if s != None:
seqs.append(s)
return seqs
# Basic support for comments in json files. Reads a file
def strip_cstyle_comments(inp):
def strip_line(line):
i = line.find("//")
return line[:i] + "\n" if i >= 0 else line
return "".join(map(strip_line, inp))
# Parse from a json file containing a dictionary sequence → result string.
def parse_sequences_file_json(fname):
def tree_to_seqs(tree, prefix):
for c, r in tree.items():
if isinstance(r, str):
yield prefix + [c], r
else:
yield from tree_to_seqs(r, prefix + [c])
try:
with open(fname, "r") as inp:
tree = json.loads(strip_cstyle_comments(inp))
return list(tree_to_seqs(tree, []))
except Exception as e:
warn("Failed parsing '%s': %s" % (fname, str(e)))
# Format of the sequences file is determined by its extension
def parse_sequences_file(fname, xkb_char_extra_names={}):
if fname.endswith(".pre"):
return parse_sequences_file_xkb(fname, xkb_char_extra_names)
if fname.endswith(".json"):
return parse_sequences_file_json(fname)
raise Exception(fname + ": Unsupported format")
# A sequence directory can contain several sequence files as well as
# 'keysymdef.h'.
def parse_sequences_dir(dname):
compose_files = []
xkb_char_extra_names = {}
# Parse keysymdef.h first if present
for fbasename in os.listdir(dname):
fname = os.path.join(dname, fbasename)
if fbasename == "keysymdef.h":
xkb_char_extra_names = dict(parse_keysymdef_h(fname))
else:
compose_files.append(fname)
sequences = []
for fname in compose_files:
sequences.extend(parse_sequences_file(fname, xkb_char_extra_names))
return sequences
# Turn a list of sequences into a trie.
def add_sequences_to_trie(seqs, trie):
global dropped_sequences
def add_seq_to_trie(seq, result):
t_ = trie
for c in seq[:-1]:
t_ = t_.setdefault(c, {})
if isinstance(t_, str):
return False
c = seq[-1]
if c in t_:
return False
t_[c] = result
return True
def existing_sequence_to_str(seq): # Used in error message
i = 0
t_ = trie
while i < len(seq):
if seq[i] not in t_: break # No collision ?
t_ = t_[seq[i]]
i += 1
if isinstance(t_, str): break
return "".join(seq[:i]) + " = " + str(t_)
for seq, result in seqs:
if not add_seq_to_trie(seq, result):
dropped_sequences += 1
warn("Sequence collide: '%s' and '%s = %s'" % (
existing_sequence_to_str(seq),
"".join(seq), result))
# Compile the trie into a state machine.
def make_automata(tries):
previous_leafs = {} # Deduplicate leafs
states = []
def add_tree(t):
this_node_index = len(states)
# Index and size of the new node
i = len(states)
s = len(t.keys())
# Add node header
states.append(("\0", s + 1))
i += 1
# Reserve space for the current node in both arrays
for c in range(s):
states.append((None, None))
# Add nested nodes and fill the current node
for c in sorted(t.keys()):
states[i] = (c, add_node(t[c]))
i += 1
return this_node_index
def add_leaf(c):
if c in previous_leafs:
return previous_leafs[c]
this_node_index = len(states)
previous_leafs[c] = this_node_index
# There are two encoding for leafs: character final state for 15-bit
# characters and string final state for the rest.
if len(c) > 1 or ord(c[0]) > 32767: # String final state
# A ':' can be added to the result of a sequence to force a string
# final state. For example, to go through KeyValue lookup.
if c.startswith(":"): c = c[1:]
javachars = array('H', c.encode("UTF-16-LE"))
states.append((-1, len(javachars) + 1))
for c in javachars:
states.append((c, 0))
else: # Character final state
states.append((c, 1))
return this_node_index
def add_node(n):
if type(n) == str:
return add_leaf(n)
else:
return add_tree(n)
states.append((1, 1)) # Add an empty state at the beginning.
entry_states = { n: add_tree(root) for n, root in tries.items() }
return entry_states, states
# Debug
def print_automata(automata):
i = 0
for (s, e) in automata:
s = "%#06x" % s if isinstance(s, int) else '"%s"' % str(s)
print("%3d %8s %d" % (i, s, e), file=sys.stderr)
i += 1
# Report warnings about the compose sequences
def check_for_warnings(tries):
def get(seq):
t = tries
for c in seq:
if c not in t:
return None
t = t[c]
return t if type(t) == str else None
# Check that compose+Upper+Upper have an equivalent compose+Upper+Lower or compose+Lower+Lower
for c1 in string.ascii_uppercase:
for c2 in string.ascii_uppercase:
seq = [c1, c2]
seq_l = [c1, c2.lower()]
seq_ll = [c1.lower(), c2.lower()]
r = get(seq)
r_l = get(seq_l)
r_ll = get(seq_ll)
if r is not None:
ll_warning = f" (but {seq_to_str(seq_ll)} = {r_ll} exists)" if r_ll is not None else ""
if r_l is None:
if r != r_ll:
warn(f"has no lower case equivalent {seq_to_str(seq_l)}{ll_warning}", seq=seq, result=r)
elif r != r_l:
warn(f"is not the same as {seq_to_str(seq_l)} = {r_l}{ll_warning}", seq=seq, result=r)
def batched(ar, n):
i = 0
while i + n < len(ar):
yield ar[i:i+n]
i += n
if i < len(ar):
yield ar[i:]
# Print the state machine compiled by make_automata into java code that can be
# used by [ComposeKeyData.java].
def gen_java(entry_states, machine):
chars_map = {
# These characters cannot be used in unicode form as Java's parser
# unescape unicode sequences before parsing.
-1: "\\uFFFF",
"\"": "\\\"",
"\\": "\\\\",
"\n": "\\n",
"\r": "\\r",
ord("\""): "\\\"",
ord("\\"): "\\\\",
ord("\n"): "\\n",
ord("\r"): "\\r",
}
def char_repr(c):
if c in chars_map:
return chars_map[c]
if type(c) == int: # The edges array contains ints
return "\\u%04x" % c
if c in string.printable:
return c
return "\\u%04x" % ord(c)
def gen_array(array):
chars = list(map(char_repr, array))
return "\" +\n \"".join(map(lambda b: "".join(b), batched(chars, 72)))
def gen_entry_state(s):
name, state = s
return " public static final int %s = %d;" % (name, state)
print("""package juloo.keyboard2;
/** This file is generated, see [srcs/compose/compile.py]. */
public final class ComposeKeyData
{
public static final char[] states =
("%s").toCharArray();
public static final char[] edges =
("%s").toCharArray();
%s
}""" % (
# Break the edges array every few characters using string concatenation.
gen_array(map(lambda s: s[0], machine)),
gen_array(map(lambda s: s[1], machine)),
"\n".join(map(gen_entry_state, entry_states.items())),
))
total_sequences = 0
tries = {} # Orderred dict
for fname in sorted(sys.argv[1:]):
tname, _ = os.path.splitext(os.path.basename(fname))
if os.path.isdir(fname):
sequences = parse_sequences_dir(fname)
else:
sequences = parse_sequences_file(fname)
add_sequences_to_trie(sequences, tries.setdefault(tname, {}))
total_sequences += len(sequences)
check_for_warnings(tries["compose"])
entry_states, automata = make_automata(tries)
gen_java(entry_states, automata)
print("Compiled %d sequences into %d states. Dropped %d sequences. Generated %d warnings." % (total_sequences, len(automata), dropped_sequences, warning_count), file=sys.stderr)
# print_automata(automata)

View file

@ -0,0 +1,149 @@
{
"ا": {
"ا": "combining_alef_above",
"ع": "أ",
"و": "ۉ",
"ي": "ؽ",
"ی": "ؽ",
"۷": "combining_alef_below",
"٧": "combining_alef_below"
},
"ت": {
"د": "ط",
"ر": "ڑ",
"ش": "ث",
"ن": "ٹ"
},
"ج": {
"ش": "چ"
},
"ح": {
"ح": "combining_sukun"
},
"د": {
"ت": "ڈ",
"ز": "ذ",
"ت": "ڑ",
"۷": "ڕ"
},
"س": {
"ش": "ص"
},
"ش": {
"ت": "ث"
},
"ع": {
"ا": "إ",
"ه": "ۀ",
"و": "ؤ",
"ي": "ئ",
"ی": "ئ",
"۷": "combining_hamza_below",
"۸": "combining_hamza_above",
"٧": "combining_hamza_below",
"٨": "combining_hamza_above"
},
"غ": {
"ك": "گ",
"ک": "گ"
},
"ف": {
"و": "ڡ"
},
"ق": {
"و": "ۊ"
},
"ل": {
"ل": "combining_shaddah",
"۷": "ڵ",
"٧": "ڵ"
},
"ن": {
"ت": "ٹ",
"ه": "combining_fathatan",
"و": "combining_dammatan",
"ی": "combining_kasratan",
"ي": "combining_kasratan"
},
"ه": {
" ": "ە",
"ت": "ة",
"ع": "ۀ",
"ن": "combining_fathatan",
"ه": "combining_fatha",
"و": "ۆ",
"ي": "ێ",
"ی": "ێ"
},
"و": {
"ث": "ۋ",
"ع": "ؤ",
"ف": "ڡ",
"ن": "combining_dammatan",
"و": "combining_dammah",
"۷": "ۆ",
"۸": "ۉ",
"۸": "ۉ",
"٧": "ۆ",
"٨": "ۉ",
"٨": "ۉ"
},
"ي": {
" ": "ے",
"ا": "ى",
"ع": "ئ",
"ي": "combining_kasra",
"۷": "ێ",
"۸": "ؽ",
"ن": "combining_kasratan",
"٧": "ێ",
"٨": "ؽ"
},
"ی": {
" ": "ے",
"ا": "ى",
"ع": "ئ",
"ن": "combining_kasratan",
"ی": "combining_kasra",
"۷": "ێ",
"۸": "ؽ",
"٧": "ێ",
"٨": "ؽ"
},
"۷": {
"ا": "combining_alef_below",
"ر": "ڕ",
"ع": "combining_hamza_below",
"ل": "ڵ",
"و": "ۆ",
"ي": "ێ",
"ی": "ێ",
"۷": "combining_arabic_v"
},
"۸": {
"ع": "combining_hamza_above",
"و": "ۉ",
"و": "ۉ",
"ي": "ؽ",
"ی": "ؽ",
"۸": "combining_arabic_inverted_v"
},
"٧": {
"ا": "combining_alef_below",
"ر": "ڕ",
"ع": "combining_hamza_below",
"ل": "ڵ",
"و": "ۆ",
"ي": "ێ",
"٧": "combining_arabic_v",
"ی": "ێ"
},
"٨": {
"ع": "combining_hamza_above",
"و": "ۉ",
"و": "ۉ",
"ي": "ؽ",
"٨": "combining_arabic_inverted_v",
"ی": "ؽ"
}
}

View file

@ -0,0 +1,165 @@
{
",": {
"г": "ӻ",
"к": "ӄ",
"л": "ԓ",
"н": "ӈ",
"х": "ӽ",
"ѧ": "ӊ"
},
".": {
"г": "ӷ",
"ж": "җ",
"й": "ҋ",
"к": "қ",
"л": "ԯ",
"м": "ӎ",
"н": "ӊ",
"х": "ҳ",
"ч": "ҷ",
"і": "ї"
},
"а": {
"е": "ѣ",
"у": "ѡ",
"ч": "combining_aigu",
"ы": "ѣ",
"ь": "ꙙ",
"ꙋ": "ꙍ",
"ꙑ": "ѣ"
},
"б": {
"ч": "combining_slavonic_psili"
},
"г": {
",": "ӻ",
".": "ӷ",
"й": "ғ",
"к": "ґ",
"х": "ҁ",
"ј": "ғ"
},
"д": {
"е": "ꙉ",
"ж": "џ",
"з": "ꙃ",
"й": "ꙉ",
"ј": "ꙉ",
"ѥ": "ђ"
},
"е": {
"ч": "combining_trema"
},
"ж": {
".": "җ"
},
"з": {
"ф": "ҙ"
},
"и": {
"и": "ӣ",
"у": "ѵ"
},
"й": {
".": "ҋ",
"ч": "combining_breve"
},
"к": {
",": "ӄ",
".": "қ",
"г": "ґ",
"с": "ѯ",
"х": "ҁ",
"ш": "ѯ"
},
"л": {
",": "ԓ",
".": "ԯ",
"ь": "љ"
},
"м": {
".": "ӎ"
},
"н": {
",": "ӈ",
"·": "ԩ",
"ч": "combining_titlo",
"ь": "њ"
},
"о": {
"т": "ѿ",
"у": "ѹ",
"ч": "combining_inverted_breve"
},
"п": {
"с": "ѱ"
},
"т": {
"й": "ћ",
"ф": "ѳ",
"ј": "ћ"
},
"у": {
"и": "ѵ",
"й": "ў",
"у": "ӯ",
"ч": "combining_pokrytie",
"і": "ѵ",
"ј": "ў"
},
"х": {
",": "ӽ",
".": "ҳ",
"ч": "combining_slavonic_dasia"
},
"ч": {
".": "ҷ",
"а": "combining_aigu",
"б": "combining_slavonic_psili",
"е": "combining_trema",
"й": "combining_breve",
"н": "combining_titlo",
"о": "combining_inverted_breve",
"у": "combining_pokrytie",
"х": "combining_slavonic_dasia",
"ч": "combining_payerok",
"ч": "combining_payerok",
"ъ": "combining_vertical_tilde",
"ю": "combining_grave",
"ј": "combining_breve",
"ѧ": "combining_vzmet"
},
"ш": {
"т": "щ"
},
"ъ": {
"ч": "combining_vertical_tilde"
},
"ю": {
"а": "ꙓ",
"е": "ё",
"м": "ѭ",
"н": "ѩ",
"ч": "combining_grave"
},
"я": {
"ь": "ꙝ"
},
"і": {
"\"": "ї",
".": "ї",
"у": "ѵ",
"і": "ӣ"
},
"ј": {
"а": "ꙗ",
"ч": "combining_breve",
"ѣ": "ꙝ"
},
"ѡ": {
"т": "ѿ"
},
"ѧ": {
"ч": "combining_vzmet"
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,62 @@
{
"V": {
"s": "Š",
"c": "Č",
"z": "Ž"
},
"\\": {
"n": "\\n",
"t": "\\t"
},
"n": {
"g": {
"~": "n͠g"
}
},
"N": {
"g": {
"~": "N͠g"
},
"g": "Ŋ",
"n": ""
},
"g": {
"~": "g̃",
"u": "Ğ"
},
"A": {
"a": "Å",
"e": "Æ",
"t": "@"
},
"a": {
"E": "Æ"
},
"O": {
"e": "Œ"
},
"S": {
"s": "ẞ"
},
"I": {
"j": "IJ"
},
"D": {
"h": "Ð"
},
"E": {
"e": "Ə"
},
"Q": {
"q": ""
},
"R": {
"r": ""
},
"T": {
"h": "Þ"
},
"Z": {
"z": ""
}
}

File diff suppressed because it is too large Load diff

268
srcs/compose/fn.json Normal file
View file

@ -0,0 +1,268 @@
{
"1": "f1",
"2": "f2",
"3": "f3",
"4": "f4",
"5": "f5",
"6": "f6",
"7": "f7",
"8": "f8",
"9": "f9",
"0": "f10",
"<": "«",
">": "»",
"{": "",
"}": "",
"[": "",
"]": "",
"(": "“",
")": "”",
"'": "",
"\"": "„",
"-": "",
"_": "—",
"^": "¬",
"%": "‰",
"=": "≈",
"u": "µ",
"a": "æ",
"o": "œ",
"*": "°",
".": "…",
",": "·",
"!": "¡",
"?": "¿",
"|": "¦",
"§": "¶",
"†": "‡",
"×": "∙",
" ": "nbsp",
// arrows
"↖": "⇖",
"↑": "⇑",
"↗": "⇗",
"←": "⇐",
"→": "⇒",
"↙": "⇙",
"↓": "⇓",
"↘": "⇘",
"↔": "⇔",
"↕": "⇕",
// Currency symbols
"e": "€",
"l": "£",
"r": "₹",
"y": "¥",
"c": "¢",
"p": "₽",
"b": "₱",
"h": "₴",
"z": "₿",
// avoid showing these twice
"€": "removed",
"£": "removed",
// alternative greek letters
"π": "ϖ",
"θ": "ϑ",
"Θ": "ϴ",
"ε": "ϵ",
"β": "ϐ",
"ρ": "ϱ",
"σ": "ς",
"γ": "ɣ",
"φ": "ϕ",
"υ": "ϒ",
"κ": "ϰ",
// alternative math characters
"": "",
"∩": "⋂",
"∃": "∄",
"∫": "∮",
"Π": "∏",
"Σ": "∑",
"": "",
"∧": "⋀",
"⊷": "⊶",
"⊂": "⊆",
"⊃": "⊇",
"±": "∓",
// APL
"": "⍶",
"⍵": "⍹",
"⋄": "⌺",
"⍝": "⍧",
"∆": "⍙",
"∇": "⍢",
"": "⍡",
"⎕": "⍞",
// hebrew niqqud
"ק": "qamats", // kamatz
"ר": "hataf_qamats", // reduced kamatz
"ו": "holam",
"ם": "rafe",
"פ": "patah", // patach
"ש": "sheva",
"ד": "dagesh", // or mapiq
"ח": "hiriq",
"ף": "hataf_patah", // reduced patach
"ז": "qubuts", // kubuts
"ס": "segol",
"ב": "hataf_segol", // reduced segol
"צ": "tsere",
// Devanagari symbols
"ए": "ऍ",
"े": "ॅ",
"ऐ": "ऎ",
"ै": "ॆ",
"ऋ": "ॠ",
"ृ": "ॄ",
"ळ": "ऴ",
"र": "ऱ",
"क": "क़",
"ख": "ख़",
"ग": "ग़",
"घ": "ॻ",
"ढ": "ढ़",
"न": "ऩ",
"ड": "ड़",
"ट": "ॸ",
"ण": "ॾ",
"फ": "फ़",
"ऌ": "ॡ",
"ॢ": "ॣ",
"औ": "ॵ",
"ौ": "ॏ",
"ओ": "ऒ",
"ो": "ॊ",
"च": "ॼ",
"ज": "ज़",
"ब": "ॿ",
"व": "ॺ",
"य": "य़",
"अ": "ॲ",
"आ": "ऑ",
"ा": "ॉ",
"झ": "ॹ",
"ई": "ॴ",
"ी": "ऻ",
"इ": "ॳ",
"ि": "ऺ",
"उ": "ॶ",
"ऊ": "ॷ",
"ु": "ऄ",
"ष": "क्ष",
"थ": "त्र",
"द": "द्र",
"प": "प्र",
"श": "श्र",
"छ": "श्च",
"ँ": "ऀ",
"₹": "₨",
"ॖ": "ॗ",
"॓": "॔",
"॰": "ॱ",
"।": "॥",
"ं": "ॕ",
"़": "ॎ",
"ऽ": "",
// Persian numbers
"۱": "f1",
"۲": "f2",
"۳": "f3",
"۴": "f4",
"۵": "f5",
"۶": "f6",
"۷": "f7",
"۸": "f8",
"۹": "f9",
// Arabic numbers
"۰": "f10",
"١": "f1",
"٢": "f2",
"٣": "f3",
"٤": "f4",
"٥": "f5",
"٦": "f6",
"٧": "f7",
"٨": "f8",
"٩": "f9",
"٠": "f10",
// Cyrillic
"ꙑ": "ы",
"ы": "ꙑ",
"ш": "ѱ",
"з": "ꙁ",
"и": "і",
"і": "и",
"я": "ꙗ",
"е": "ѥ",
"ѡ": "ꙍ",
"о": "ѻ",
"а": "ѣ",
"э": "є",
"ъ": "ь",
"ь": "ъ",
"й": "ј",
"ꙉ": "ђ",
"ч": "ћ",
"ҁ": "қ",
"қ": "ҁ",
"џ": "ҷ",
"ҷ": "џ",
"ј": "й",
"у": "ꙋ",
"м": "ѫ",
"н": "ѧ",
"с": "ѕ",
"л": "ԯ",
"ԓ": "ԯ",
"\ua67d": "\u0483",
"\u0487": "\ua66f",
"ӈ": "ԩ",
// Arabic
":": "zwnj",
"ل": "ڵ",
"\u064F": "ۆ", // combining_dammah
"\u0650": "ێ", // combining_kasra
"ر": "ڕ",
"ب": "ٮ",
"ه": "ھ",
"ث": "پ",
"ز": "ژ",
"غ": "گ",
"ك": "ک",
"ا": "آ",
"ي": "ی",
"ک": "ك",
"ط": "ظ",
"ص": "ض",
"ی": "ي",
"ق": "غ",
"ع": "ء",
"ح": "ہ",
"ێ": "combining_kasra",
"ئ": "combining_hamza_above",
"ؽ": "combining_arabic_inverted_v",
"ۉ": "combining_arabic_inverted_v",
"ڡ": "combining_dammah",
"ة": "combining_fatha",
"إ": "combining_hamza_below",
"ۆ": "combining_arabic_v",
"س": "ـ",
"ف": "ڤ",
"ن": "ں",
// Tamil
"ய": ":௰",
"ஒ": ":ௐ",
"ள": ":௱",
"ச": ":௲",
"வ": ":௳"
}

View file

@ -0,0 +1,12 @@
{
"0": "",
"1": "১",
"2": "২",
"3": "৩",
"4": "",
"5": "৫",
"6": "৬",
"7": "",
"8": "৮",
"9": "৯"
}

View file

@ -0,0 +1,12 @@
{
"0": "",
"1": "१",
"2": "२",
"3": "३",
"4": "४",
"5": "५",
"6": "६",
"7": "७",
"8": "८",
"9": "९"
}

View file

@ -0,0 +1,12 @@
{
"0": "",
"1": "૧",
"2": "૨",
"3": "૩",
"4": "૪",
"5": "૫",
"6": "૬",
"7": "૭",
"8": "૮",
"9": "૯"
}

View file

@ -0,0 +1,14 @@
// Used with Arabic despite the name; called "Hindi numerals" in Arabic
// numpad_devanagari is used in Hindi
{
"0": "٠",
"1": "١",
"2": "٢",
"3": "٣",
"4": "٤",
"5": "٥",
"6": "٦",
"7": "٧",
"8": "٨",
"9": "٩"
}

View file

@ -0,0 +1,12 @@
{
"0": "",
"1": "೧",
"2": "೨",
"3": "೩",
"4": "೪",
"5": "೫",
"6": "೬",
"7": "೭",
"8": "೮",
"9": "೯"
}

View file

@ -0,0 +1,12 @@
{
"0": "۰",
"1": "۱",
"2": "۲",
"3": "۳",
"4": "۴",
"5": "۵",
"6": "۶",
"7": "۷",
"8": "۸",
"9": "۹"
}

View file

@ -0,0 +1,12 @@
{
"0": "",
"1": "௧",
"2": "௨",
"3": "௩",
"4": "௪",
"5": "௫",
"6": "௬",
"7": "௭",
"8": "௮",
"9": "௯"
}

138
srcs/compose/shift.json Normal file
View file

@ -0,0 +1,138 @@
{
"↙": "⇙",
"↓": "⇓",
"↘": "⇘",
"←": "⇐",
"→": "⇒",
"↖": "⇖",
"↑": "⇑",
"↗": "⇗",
"└": "╚",
"┴": "╩",
"┘": "╝",
"├": "╠",
"┼": "╬",
"┤": "╣",
"┌": "╔",
"┬": "╦",
"┐": "╗",
"─": "═",
"│": "║",
"∈": "∉",
"∋": "∌",
"⊂": "⊄",
"⊃": "⊅",
"⊆": "⊈",
"⊇": "⊉",
// superscript
"ᵃ": "ᴬ",
"ᵇ": "ᴮ",
"ᶜ": "ꟲ",
"ᵈ": "ᴰ",
"ᵉ": "ᴱ",
"ᶠ": "ꟳ",
"ᵍ": "ᴳ",
"ʰ": "ᴴ",
"ⁱ": "ᴵ",
"ʲ": "ᴶ",
"ᵏ": "ᴷ",
"ˡ": "ᴸ",
"ᵐ": "ᴹ",
"ⁿ": "ᴺ",
"ᵒ": "ᴼ",
"ᵖ": "ᴾ",
"ʳ": "ᴿ",
"ᵗ": "ᵀ",
"ᵘ": "ᵁ",
"ᵛ": "ⱽ",
"ʷ": "ᵂ",
"ᶾ": "ᴣ",
"ᵠ": "ᶲ",
// german eszett has an uppercase, but because it is uncommon, java doesn't know about it
"ß": "ẞ",
// these characters don't have a preapplied uppercase version, so we use combining characters
"ẗ": "T\u0308",
"ẘ": "W\u030A",
"ẙ": "Y\u030A",
"ǰ": "J\u030C",
"ȷ": "J\u0307",
// In Turkish, upper case of 'iı' is 'İI' but Java's toUpperCase will
// return 'II'. To make 'İ' accessible, make it the shift of 'ı'. This
// has the inconvenient of swapping i and ı on the keyboard.
"ı": "İ",
"₹": "₨",
// Gujarati alternate characters
"અ": "આ",
"ઇ": "ઈ",
"િ": "ી",
"ઉ": "ઊ",
"ુ": "ૂ",
"એ": "ઐ",
"ે": "ૈ",
"ઓ": "ઔ",
"ો": "ૌ",
"ક": "ખ",
"ગ": "ઘ",
"ચ": "છ",
"જ": "ઝ",
"ટ": "ઠ",
"ડ": "ઢ",
"ન": "ણ",
"ત": "થ",
"દ": "ધ",
"પ": "ફ",
"બ": "ભ",
"મ": "ં",
"લ": "ળ",
"સ": "શ",
"હ": "",
// Tamil alternate characters
"௹": "₨",
// Modern Hindi and Sanskrit
"अ": "आ",
"इ": "ई",
"ि": "ी",
"उ": "ऊ",
"ु": "ू",
"ए": "ऐ",
"े": "ै",
"ओ": "औ",
"ो": "ौ",
"क": "ख",
"ग": "घ",
"च": "छ",
"ज": "झ",
"ट": "ठ",
"ड": "ढ",
"न": "ण",
"त": "थ",
"द": "ध",
"ब": "भ",
"म": "ं",
"ल": "ळ",
"स": "श",
"ह": "",
"ऋ": "ॠ",
"ृ": "ॄ",
"ऌ": "ॡ",
"ॢ": "ॣ",
"॒": "॑",
"ॅ": "ॲ",
"ॉ": "ऑ",
// Mathematical symbols
"\uD835": {
"\uDD68": "𝕎", // 𝕨 𝕎
"\uDD69": "𝕏", // 𝕩 𝕏
"\uDD57": "𝔽", // 𝕗 𝔽
"\uDD58": "𝔾", // 𝕘 𝔾
"\uDD64": "𝕊" // 𝕤 𝕊
}
}