Fix ping removal for @Username and people on the notified users list.

master
float-trip 2023-07-23 14:52:00 +00:00
parent a06c66dff4
commit 42b06c8bfc
2 changed files with 35 additions and 19 deletions

View File

@ -30,7 +30,7 @@ class StopAfterPlusIsGenerated(LogitsProcessor):
class Model:
def __init__(self):
name = f"{config['data_dir']}/mpt-30b-drama"
name = "float-trip/mpt-30b-drama"
self.tokenizer = GPTNeoXTokenizerFast.from_pretrained(
name, pad_token="<|endoftext|>"
)

View File

@ -11,41 +11,56 @@ URL_REGEX = (
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
)
tokenizer = GPTNeoXTokenizerFast.from_pretrained(f"{config['data_dir']}/mpt-30b-drama")
tokenizer = GPTNeoXTokenizerFast.from_pretrained("float-trip/mpt-30b-drama")
def remove_notifications(text):
"""Change @float-trip to @<i></i>float-trip and carp to c<i></i>arp."""
text = re.sub(rf"@(?!{config['username']}\b)", "@<i></i>", text)
"""Change @float-trip to <span>@</span>float-trip and carp to <span>c</span>arp."""
text = re.sub(rf"@(?!{config['username']}\b)", "<span>@</span>", text)
notified_users = [
"aevan",
"avean",
"joan",
"pewkie",
" capy",
"the rodent",
"carp",
"idio3",
"idio ",
"the_homocracy",
"schizocel",
"scitzocel",
"clit",
"snakes",
"sneks",
"snekky",
"snekchad",
"jc",
"justcool",
"clit",
"geese",
"kippy",
"mccox",
"chiobu",
"lawlz",
"transgirltradwife",
"impassionata",
"pizzashill",
"idio3",
"idio ",
"telegram ",
"schizo",
"joan",
"pewkie",
"homocracy",
"donger",
"geese",
"soren",
"marseyismywaifu",
"mimw",
"heymoon",
"gaypoon",
"jollymoon",
"chiobu",
"mccox",
"august",
"marco",
"klen",
]
def replace(match):
# Insert <i></i> after the first character of the matched string.
# Insert <span></span> around the first character of the matched string.
user = match.group()
return f"{user[:1]}<i></i>{user[1:]}"
return f"<span>{user[:1]}</span>{user[1:]}"
for user in notified_users:
text = re.sub(user, replace, text, flags=re.IGNORECASE)
@ -57,10 +72,11 @@ def format_reply(text):
for username in config["fake_usernames"]:
text.replace(username, config["username"])
text = replace_rdrama_images(text)
text = remove_notifications(text)
return text.strip()
def is_low_quality(reply, post, comments):
def is_low_quality(reply, _post, comments):
"""
Label the reply as low quality if:
- The Levenshtein distance determines it's similar to a previous comment in the thread.