move to ollama instead of llama-cpp-python

This commit is contained in:
zongor 2025-02-08 12:57:42 -05:00
parent 31dc90187a
commit e8dd751bea
2 changed files with 27 additions and 13 deletions

33
bot.py
View File

@ -2,10 +2,11 @@
import sys import sys
import os import os
import random import random
import re
import discord import discord
from ollama import AsyncClient
from discord.ext import commands from discord.ext import commands
from llama_cpp import Llama
from dotenv import load_dotenv from dotenv import load_dotenv
description = """ description = """
@ -14,7 +15,7 @@ There are a number of utility commands being showcased here.
""" """
load_dotenv() load_dotenv()
llm = Llama(model_path="./models/gpt4all-7B/gpt4all-lora-quantized.bin") client = AsyncClient()
bot = commands.Bot( bot = commands.Bot(
command_prefix=commands.when_mentioned_or("!"), command_prefix=commands.when_mentioned_or("!"),
@ -22,11 +23,21 @@ bot = commands.Bot(
intents=discord.Intents.all(), intents=discord.Intents.all(),
) )
@bot.event @bot.event
async def on_ready(): async def on_ready():
print(f"Logged in as {bot.user} (ID: {bot.user.id})") print(f"Logged in as {bot.user} (ID: {bot.user.id})")
def extract_response(input_string):
# Find the position of the closing tag
end_index = input_string.find('</think>')
if end_index == -1:
raise ValueError("Closing tag </think> not found")
# Extract the response part of the string
response = input_string[end_index + len('</think>'):].strip()
return response
@bot.event @bot.event
async def on_message(message: discord.Message): async def on_message(message: discord.Message):
@ -37,14 +48,16 @@ async def on_message(message: discord.Message):
if f"""<@{bot.user.id}>""" in message.content: if f"""<@{bot.user.id}>""" in message.content:
async with message.channel.typing(): async with message.channel.typing():
question = f"""Text transcript of a never ending dialog, where {message.author} interacts with Karl AI. messages = [
Karl is helpful, kind, honest, friendly, good at writing and never fails to answer {message.author}s requests immediately and with details and precision. {
There are no annotations like (30 seconds passed...) or (to himself), just what {message.author} and Karl say aloud to each other. 'role':'user',
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. 'content': message.content
}
]
{message.author}: {message.content}""" response = await client.chat('marx', messages=messages)
msg = llm(question, max_tokens=256, stop=[f"""{message.author}"""], echo=True)[ print(response['message']['content'])
"choices"][0]["text"][len(question):] msg = extract_response(response['message']['content'])
await message.channel.send(msg) await message.channel.send(msg)

View File

@ -1,4 +1,5 @@
llama-cpp-python #llama-cpp-python
langchain #langchain
ollama
discord.py discord.py
python-dotenv python-dotenv