Added support for model at remote address, see the mllm-streamlit project for this
This commit is contained in:
parent
3a7eaafe9a
commit
1aa7cc0a1e
50
app.py
50
app.py
@ -26,10 +26,9 @@ lock = False
|
|||||||
|
|
||||||
praise = 0
|
praise = 0
|
||||||
|
|
||||||
print("Loading model...", end=" ")
|
|
||||||
|
|
||||||
model_settings_path = "model.json"
|
model_settings_path = "model.json"
|
||||||
model_settings = {
|
model_settings = {
|
||||||
|
"remote_address": None,
|
||||||
"model_path": None,
|
"model_path": None,
|
||||||
"formatter": "chatml",
|
"formatter": "chatml",
|
||||||
"n_gpu_layers": -1,
|
"n_gpu_layers": -1,
|
||||||
@ -58,22 +57,52 @@ if model_settings["model_path"] is None:
|
|||||||
if model_settings["model_path"] is None:
|
if model_settings["model_path"] is None:
|
||||||
raise Exception("No .gguf model was found in the program directory. Please specify a model's relative or absolute path using the generated model.json configuration file.")
|
raise Exception("No .gguf model was found in the program directory. Please specify a model's relative or absolute path using the generated model.json configuration file.")
|
||||||
|
|
||||||
formatter = importlib.import_module(model_settings["formatter"])
|
formatter = importlib.import_module(model_settings["formatter"])
|
||||||
|
LLM = None
|
||||||
|
|
||||||
|
# Enable loading the model only if the remote address is unspecified:
|
||||||
|
if model_settings["remote_address"] is None:
|
||||||
|
print("Loading model...", end=" ")
|
||||||
|
|
||||||
LLM = Llama(
|
LLM = Llama(
|
||||||
model_path = model_settings["model_path"],
|
model_path = model_settings["model_path"],
|
||||||
n_gpu_layers = model_settings["n_gpu_layers"],
|
n_gpu_layers = model_settings["n_gpu_layers"],
|
||||||
n_ctx = model_settings["n_ctx"],
|
n_ctx = model_settings["n_ctx"],
|
||||||
verbose = False,
|
verbose = False,
|
||||||
n_threads = model_settings["n_threads"])
|
n_threads = model_settings["n_threads"])
|
||||||
|
|
||||||
|
print("Loaded model {model_path}".format(model_path=model_settings["model_path"]))
|
||||||
|
|
||||||
|
def get_response_remote(text):
|
||||||
|
global model_settings
|
||||||
|
|
||||||
|
remote_address = model_settings["remote_address"]
|
||||||
|
|
||||||
|
# e.g. http://127.0.0.1:11434/
|
||||||
|
# The project mllm-streamlit has a built-in webserver that runs inference on POSTed text:
|
||||||
|
response = requests.post(
|
||||||
|
remote_address,
|
||||||
|
data=text.encode("utf-8"),
|
||||||
|
headers={"Content-Type": "text/plain"},
|
||||||
|
stream=True)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
|
||||||
|
if chunk:
|
||||||
|
chunk_text = json.loads(chunk)
|
||||||
|
print(chunk_text, end="")
|
||||||
|
yield chunk_text
|
||||||
|
|
||||||
print("Loaded model {model_path}".format(model_path=model_settings["model_path"]))
|
|
||||||
|
|
||||||
def get_response(text):
|
def get_response(text):
|
||||||
global lock
|
global lock
|
||||||
global model_settings
|
global model_settings
|
||||||
|
|
||||||
|
# If the remote address is specified, use this routine:
|
||||||
|
if model_settings["remote_address"] is not None:
|
||||||
|
return "".join(get_response_remote(text))
|
||||||
|
|
||||||
while lock == True:
|
while lock == True:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
@ -268,7 +297,6 @@ async def on_message(msg):
|
|||||||
print(f"{user_name}: {msg.content}")
|
print(f"{user_name}: {msg.content}")
|
||||||
print(f"{bot_name}: ", end="")
|
print(f"{bot_name}: ", end="")
|
||||||
|
|
||||||
|
|
||||||
async with chl.typing():
|
async with chl.typing():
|
||||||
f_body = formatter.format(context, messages, for_completion=True)
|
f_body = formatter.format(context, messages, for_completion=True)
|
||||||
f_resp = await get_response_wrapper(f_body)
|
f_resp = await get_response_wrapper(f_body)
|
||||||
|
Loading…
Reference in New Issue
Block a user