Compare commits

..

No commits in common. "3a7eaafe9a97da9e10bf9bd86611a85565bdfc7c" and "6b5c5314758ed3e9072a00e62bd2043e9b3d63f3" have entirely different histories.

4 changed files with 45 additions and 167 deletions

2
.gitignore vendored
View File

@ -1,7 +1,5 @@
/cuda*/
/python/
/venv/
__pycache__
model.json
token.txt
*.gguf

140
app.py
View File

@ -1,8 +1,3 @@
import sys
sys.path.append(".")
sys.path.append("./lib")
import discord
import re
import requests
@ -11,9 +6,6 @@ import time
import re
import asyncio
import functools
import os
import json
import importlib
from llama_cpp import Llama
@ -24,58 +16,50 @@ attention = {}
message_cache = {}
lock = False
praise = 0
print("Loading model...", end=" ")
model_settings_path = "model.json"
model_settings = {
"model_path": None,
"formatter": "chatml",
"n_gpu_layers": -1,
"n_ctx": 32768,
"n_threads": 8,
"max_tokens": 16384,
"stop": ["<|im_end|>", "</s>", "<|im_start|>"],
"repeat_penalty": 1.1,
"temperature": 0.75,
"default_context": "You are a nameless AI assistant with the programmed personality of Lain from the anime \"Serial Experiments Lain.\" You are to answer all of the user's questions as quickly and briefly as possible using advanced English and cryptic messaging. You are not to go into full length detail unless asked."
}
if not os.path.isfile(model_settings_path):
with open(model_settings_path, "w") as f:
f.write(json.dumps(model_settings, indent=4))
with open(model_settings_path) as f:
model_settings = json.loads(f.read())
if model_settings["model_path"] is None:
for f in os.scandir("."):
if re.search(r"\.gguf$", f.path):
model_settings["model_path"] = f.path
break
if model_settings["model_path"] is None:
raise Exception("No .gguf model was found in the program directory. Please specify a model's relative or absolute path using the generated model.json configuration file.")
formatter = importlib.import_module(model_settings["formatter"])
LLM = Llama(
model_path = model_settings["model_path"],
n_gpu_layers = model_settings["n_gpu_layers"],
n_ctx = model_settings["n_ctx"],
model_path = "capybarahermes-2.5-mistral-7b.Q4_K_S.gguf",
n_gpu_layers = -1,
n_ctx = 32768,
verbose = False,
n_threads = model_settings["n_threads"])
n_threads = 8)
print("Loaded model {model_path}".format(model_path=model_settings["model_path"]))
def get_messages_as_text(context, query, for_completion=False):
# ChatML format:
user_id = "user"
assistant_id = "assistant"
context_declaration = "<|im_start|>system\n"
message_declaration = "<|im_start|>{author}\n"
end_of_message = "<|im_end|>\n"
stop_tokens = ["<|im_end|>", "</s>", "<|im_start|>"]
output = ""
if isinstance(query, str):
query = [{"author": "user", "body": query}]
if isinstance(query, list):
for message in query:
author = message["author"]
body = message["body"]
if "nickname" in message.keys():
nickname = message["nickname"]
author = nickname
output = f"{output}{message_declaration.format(author=author)}{body}{end_of_message}"
append = ""
if for_completion:
append = message_declaration.format(author=assistant_id)
output = f"""{context_declaration}{context}{end_of_message}{output}{append}"""
return output
def get_response(text):
global lock
global model_settings
while lock == True:
time.sleep(0.1)
time.sleep(1)
try:
lock = True
@ -83,11 +67,11 @@ def get_response(text):
response = LLM(
text,
max_tokens = model_settings["max_tokens"],
stop = model_settings["stop"],
max_tokens = 16384,
stop = ["<|im_end|>", "</s>", "<|im_start|>"],
echo = False,
repeat_penalty = model_settings["repeat_penalty"],
temperature = model_settings["temperature"],
repeat_penalty = 1.1,
temperature = 0.75,
stream = True)
# Stream a buffered response
@ -115,35 +99,6 @@ async def get_message(channel, message_id):
message_cache[message_id] = reference
return reference
async def y_or_n(user_input, question):
global formatter
context = "Analyze the conversation and answer the question as accurately as possible. Do not provide any commentary or extra help, you are programmed to respond with a Y or N."
messages = []
if isinstance(user_input, list):
for i in user_input:
messages.append(i)
if isinstance(user_input, str):
messages.append({"author": "user", "body": user_input})
messages.append({"author": "user", "body": question})
messages.append({"author": "user", "body": "Answer with Y or N only, no explanation is wanted."})
f_body = formatter.format(context, messages, for_completion=True)
f_resp = await get_response_wrapper(f_body)
if f_resp[0].lower() == "y":
return True
if f_resp[0].lower() == "n":
return False
raise Exception("Answer provided does not begin with Y or N.")
# When the Discord bot starts up successfully:
@client.event
async def on_ready():
@ -152,18 +107,15 @@ async def on_ready():
# When the Discord bot sees a new message anywhere:
@client.event
async def on_message(msg):
global praise
if msg.author.id == client.user.id:
return
messages = []
msg_history = [message async for message in msg.channel.history(limit=10)]
msg_history = [message async for message in msg.channel.history(limit=50)]
msg_history.reverse()
for m in msg_history:
reference = None
if m.reference is not None:
reference = await get_message(msg.channel, m.reference.message_id)
@ -177,7 +129,6 @@ async def on_message(msg):
if reference is None or reference.author.id != msg.author.id:
continue
now = datetime.datetime.now(datetime.timezone.utc)
then = m.created_at
age = now - then
@ -260,7 +211,7 @@ async def on_message(msg):
if paying_attention:
attention[session_name] = time.perf_counter()
context = model_settings["default_context"]
context = f"You are {bot_name}, an AI assistant with the programmed personality of Lain from the anime \"Serial Experiments Lain.\" You are to answer all of the user's questions as quickly and briefly as possible using advanced English and cryptic messaging. You are not to go into full length detail unless asked."
if chl.topic is not None:
context = chl.topic
@ -268,13 +219,12 @@ async def on_message(msg):
print(f"{user_name}: {msg.content}")
print(f"{bot_name}: ", end="")
async with chl.typing():
f_body = formatter.format(context, messages, for_completion=True)
f_body = get_messages_as_text(context, messages, for_completion=True)
f_resp = await get_response_wrapper(f_body)
print("")
await chl.send(f_resp, reference=msg)
#await chl.send(f_resp)
if __name__ == "__main__":
# Read the token:

View File

@ -1,33 +0,0 @@
# The ChatML format
# Takes messages in and provides a text body to autocomplete on
def format(context, query, for_completion=False):
# ChatML format:
user_id = "user"
assistant_id = "assistant"
context_declaration = "<|im_start|>system\n"
message_declaration = "<|im_start|>{author}\n"
end_of_message = "<|im_end|>\n"
output = ""
if isinstance(query, str):
query = [{"author": "user", "body": query}]
if isinstance(query, list):
for message in query:
author = message["author"]
body = message["body"]
if "nickname" in message.keys():
nickname = message["nickname"]
author = nickname
output = f"{output}{message_declaration.format(author=author)}{body}{end_of_message}"
append = ""
if for_completion:
append = message_declaration.format(author=assistant_id)
output = f"""{context_declaration}{context}{end_of_message}{output}{append}"""
return output

View File

@ -1,37 +0,0 @@
# You are a command executing assistant. Your duty is to interpret the user's request and identify the matching command from the Python code below. Read the comments of each function definition to determine how to best apply the user's request to the JSON template below. READ CAREFULLY AND RESPOND WITH JSON INTELLIGENTLY.
"""
JSON Template:
{
"command": "some_command",
"query": "some_query",
"arg1": "some_arg1",
"arg2": "some_arg2"
}
"""
# OBEY THE FOLLOWING RULES:
# 1. Your response must be entirely JSON with no markdown formatting.
# 2. Your JSON response must be a flat JSON object with no nested JSON objects.
# 3. Every key's value must be a string.
# 4. Do not hallucinate any additional keys.
# 5. Do NOT include result in answer.
# 6. Do NOT include keys with a null value.
# 7. Do NOT include null values.
# 8. If a value is null, do not include the key.
# 9. If you do not know the command, admit it in English.
import asyncio
class CommandList:
# If the user is asking you to roll a random number, replace:
# some_command with "roll_random"
# some_query with null
async def roll_random(self, json_blob):
import random
random_number = random.randint(1, 20)
#await self.chl.send("### Inside commands.py")
await self.chl.send(f"-# You rolled a {random_number}")
# If you cannot match the user's request to a command, simply reply in ENGLISH stating you do not know how to perform the requested command.