move to ollama instead of llama-cpp-python

This commit is contained in:
zongor 2025-02-08 12:57:42 -05:00
parent 31dc90187a
commit e8dd751bea
2 changed files with 27 additions and 13 deletions

33
bot.py
View File

@ -2,10 +2,11 @@
import sys
import os
import random
import re
import discord
from ollama import AsyncClient
from discord.ext import commands
from llama_cpp import Llama
from dotenv import load_dotenv
description = """
@ -14,7 +15,7 @@ There are a number of utility commands being showcased here.
"""
load_dotenv()
llm = Llama(model_path="./models/gpt4all-7B/gpt4all-lora-quantized.bin")
client = AsyncClient()
bot = commands.Bot(
command_prefix=commands.when_mentioned_or("!"),
@ -22,11 +23,21 @@ bot = commands.Bot(
intents=discord.Intents.all(),
)
@bot.event
async def on_ready():
print(f"Logged in as {bot.user} (ID: {bot.user.id})")
def extract_response(input_string):
# Find the position of the closing tag
end_index = input_string.find('</think>')
if end_index == -1:
raise ValueError("Closing tag </think> not found")
# Extract the response part of the string
response = input_string[end_index + len('</think>'):].strip()
return response
@bot.event
async def on_message(message: discord.Message):
@ -37,14 +48,16 @@ async def on_message(message: discord.Message):
if f"""<@{bot.user.id}>""" in message.content:
async with message.channel.typing():
question = f"""Text transcript of a never ending dialog, where {message.author} interacts with Karl AI.
Karl is helpful, kind, honest, friendly, good at writing and never fails to answer {message.author}s requests immediately and with details and precision.
There are no annotations like (30 seconds passed...) or (to himself), just what {message.author} and Karl say aloud to each other.
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
messages = [
{
'role':'user',
'content': message.content
}
]
{message.author}: {message.content}"""
msg = llm(question, max_tokens=256, stop=[f"""{message.author}"""], echo=True)[
"choices"][0]["text"][len(question):]
response = await client.chat('marx', messages=messages)
print(response['message']['content'])
msg = extract_response(response['message']['content'])
await message.channel.send(msg)

View File

@ -1,4 +1,5 @@
llama-cpp-python
langchain
#llama-cpp-python
#langchain
ollama
discord.py
python-dotenv
python-dotenv