move to ollama instead of llama-cpp-python

2025-02-08 12:57:42 -05:00 · 2025-02-08 12:57:42 -05:00 · e8dd751bea
parent 31dc90187a
commit e8dd751bea
2 changed files with 27 additions and 13 deletions
--- a/bot.py
+++ b/bot.py
@ -2,10 +2,11 @@
 import sys
 import os
 import random
 import re
 import discord
 from ollama import AsyncClient
 from discord.ext import commands
 from llama_cpp import Llama
 from dotenv import load_dotenv
 description = """
@ -14,7 +15,7 @@ There are a number of utility commands being showcased here.
 """
 load_dotenv()
-llm = Llama(model_path="./models/gpt4all-7B/gpt4all-lora-quantized.bin")
+client = AsyncClient()
 bot = commands.Bot(
    command_prefix=commands.when_mentioned_or("!"),
@ -22,11 +23,21 @@ bot = commands.Bot(
    intents=discord.Intents.all(),
 )
@bot.event
 async def on_ready():
    print(f"Logged in as {bot.user} (ID: {bot.user.id})")
 def extract_response(input_string):
    # Find the position of the closing tag
    end_index = input_string.find('</think>')
    if end_index == -1:
        raise ValueError("Closing tag </think> not found")
    # Extract the response part of the string
    response = input_string[end_index + len('</think>'):].strip()
    return response
@bot.event
 async def on_message(message: discord.Message):
@ -37,14 +48,16 @@ async def on_message(message: discord.Message):
    if f"""<@{bot.user.id}>""" in message.content:
        async with message.channel.typing():
-            question = f"""Text transcript of a never ending dialog, where {message.author} interacts with Karl AI.
+            messages = [
-        Karl is helpful, kind, honest, friendly, good at writing and never fails to answer {message.author}’s requests immediately and with details and precision.
+                {
-        There are no annotations like (30 seconds passed...) or (to himself), just what {message.author} and Karl say aloud to each other.
+                    'role':'user',
-        The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
+                    'content': message.content 
                }
            ]
-        {message.author}: {message.content}"""
+            response = await client.chat('marx', messages=messages)
-            msg = llm(question, max_tokens=256, stop=[f"""{message.author}"""], echo=True)[
+            print(response['message']['content'])
-                "choices"][0]["text"][len(question):]
+            msg = extract_response(response['message']['content'])
        await message.channel.send(msg)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,5 @@
-llama-cpp-python
+#llama-cpp-python
-langchain
+#langchain
 ollama
 discord.py
 python-dotenv