From 81de4e4ccd86d8e5276061abb8bd93440f1fbd66 Mon Sep 17 00:00:00 2001 From: n9k Date: Sat, 16 Jul 2022 05:50:37 +0000 Subject: [PATCH] Emotes: expand regex for [a-zA-Z0-9_]-ended emotes Emote names with non-word characters at an end are now always matched. Previously they were only matched when adjacent to non-word characters. This means that an emote named ":joy:" will now be matched in "aaa:joy:zzz" where previously it wouldn't have been. --- anonstream/utils/chat.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/anonstream/utils/chat.py b/anonstream/utils/chat.py index f496852..6a50fd0 100644 --- a/anonstream/utils/chat.py +++ b/anonstream/utils/chat.py @@ -37,9 +37,15 @@ def schema_to_emotes(schema): assert not re.search(r'\s', name), \ 'whitespace is not allowed in emote names' name_markup = escape(name) - regex = re.compile( - r'(?:^|(?<=\s|\W))%s(?:$|(?=\s|\W))' % re.escape(name_markup) - ) + # If the emote name begins with a word character [a-zA-Z0-9_], + # match only if preceded by a non-word character or the empty + # string. Similarly for the end of the emote name. + # Examples: + # * ":joy:" matches "abc :joy:~xyz" and "abc:joy:xyz" + # * "JoySi" matches "abc JoySi~xyz" but NOT "abcJoySiabc" + onset = r'(?:^|(?<=\W))' if re.fullmatch(r'\w', name[0]) else r'' + finish = r'(?:$|(?=\W))' if re.fullmatch(r'\w', name[-1]) else r'' + regex = re.compile(''.join((onset, re.escape(name_markup), finish))) position, size = tuple(coords['position']), tuple(coords['size']) emotes.append((name, regex, position, size)) return emotes