[RULES]
%include url
%include e-mail

#Ex (oud)-studente(s)
WORD-PARPREFIX-PARSUFFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})

#Ex: (oud)-studente, (on)zin,
WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*

#Ex: könig(in)
WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})

#Keep dash/underscore connected parts (even if they are in parenthesis)
WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+

#Abbreviations with multiple periods
ABBREVIATION=\p{L}{1,3}(?:\.\p{L}{1,3})+\.?

#retain initials
INITIAL=^(?:\p{Lt}|\p{Lu})\.$

#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}

#Date
DATE=\p{N}{4}-\p{N}{1,2}[\.-]\p{N}{1,2}\.?
DATE-SHORT=\p{N}{1,2}[-]\p{Ps}?\p{N}{1,2}[-]\p{Ps}?\p{N}{2,4}

NUMBER-YEAR=('\p{N}{2})(?:\P{N}|\z)
#NUMBER-YEAR=('\p{N}{2})\P{N}

#Times
TIME=\p{N}{1,2}\.\p{N}{1,2}(?:\.\p{N})?(?i:am|pm)?

#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+

CURRENCY=\p{Sc}

WORD=[\p{L}\p{Mn}]+

PUNCTUATION=\p{P}

UNKNOWN=.

[PREFIXES]

[SUFFIXES]
's
:n
:a
:ar
:arna
:s

[ORDINALS]
:a
:e

[TOKENS]
's

[UNITS]
km
m
cm
mm
g
kg
hg
cg
C
l
s
sec
min
gb
mb
kb
St


[CURRENCY]
SEK
EUR
DM
USD

[ABBREVIATIONS]
adr
aug
bl.a
ca
cg
dec
d.v.s
dvs
dr
dyl
el
enl
etc
&c
ex
febr
f.d
fg.å
f.k
f.n
f.v
forts
fr.o.m
gm
h
ha
hg
hr
i st.f
jfr
kg
kl
km
lr
m.fl
m
min
min
m.m
mm
mån
ngn
ngt
nr
o.dyl
o.likn
o.s.v
p.g.a
s.a.s
s.k
skn
st
t.ex
t.o.m
tfn
trpt
u.a
vard
v.g.v

[EOSMARKERS]
# Character: !
# Name: EXCLAMATION MARK
# Code: 33 (0x21) 
\u0021

# Character: ?
# Name: QUESTION MARK
# Code: 3f (0x3f) 
\u003F

# Character: ;
# Name: GREEK QUESTION MARK
# Code: 894 (0x37e) 
\u037e

# Character: ؟
# Name: ARABIC QUESTION MARK
# Code: 1567 (0x61f) 
\u061f

# Character: 。
# Name: IDEOGRAPHIC FULL STOP
# Code: 12290 (0x3002) 
\u3002

# Character: ｡
# Name: HALFWIDTH IDEOGRAPHIC FULL STOP
# Code: 65377 (0xff61) 
\uff61

# Character: ？
# Name: FULLWIDTH QUESTION MARK
# Code: 65311 (0xff1f) 
\uff1f

# Character: ！
# Name: FULLWIDTH EXCLAMATION MARK
# Code: 65281 (0xff01) 
\uff01

# Character: ।
# Name: DEVANAGARI DANDA
# Code: 2404 (0x964) 
\u0964

# Character: ։
# Name: ARMENIAN FULL STOP
# Code: 1417 (0x589) 
\u0589

# Character: ՞
# Name: ARMENIAN QUESTION MARK
# Code: 1374 (0x55e) 
\u055e

# Character: ።
# Name: ETHIOPIC FULL STOP
# Code: 4962 (0x1362) 
\u1362

# Character: ᙮
# Name: CANADIAN SYLLABICS FULL STOP
# Code: 5742 (0x166e) 
\u166e

# Character: ។
# Name: KHMER SIGN KHAN
# Code: 6100 (0x17d4) 
\u17d4

# Character: ៕
# Name: KHMER SIGN BARIYOOSAN
# Code: 6101 (0x17d5) 
\u17d5

# Character: ᠃
# Name: MONGOLIAN FULL STOP
# Code: 6147 (0x1803) 
\u1803

# Character: ᠉
# Name: MONGOLIAN MANCHU FULL STOP
# Code: 6153 (0x1809) 
\u1809
