move to ollama instead of llama-cpp-python

2025-02-08 12:57:42 -05:00 · 2025-02-08 12:57:42 -05:00 · e8dd751bea
parent 31dc90187a
commit e8dd751bea
2 changed files with 27 additions and 13 deletions
--- a/bot.py
+++ b/bot.py
@ -2,10 +2,11 @@
 import sys
 import os
 import random
+import re

 import discord
+from ollama import AsyncClient
 from discord.ext import commands
-from llama_cpp import Llama
 from dotenv import load_dotenv

 description = """
@ -14,7 +15,7 @@ There are a number of utility commands being showcased here.
 """

 load_dotenv()
-llm = Llama(model_path="./models/gpt4all-7B/gpt4all-lora-quantized.bin")
+client = AsyncClient()

 bot = commands.Bot(
    command_prefix=commands.when_mentioned_or("!"),
@ -22,11 +23,21 @@ bot = commands.Bot(
    intents=discord.Intents.all(),
 )

-
@bot.event
 async def on_ready():
    print(f"Logged in as {bot.user} (ID: {bot.user.id})")

+def extract_response(input_string):
+    # Find the position of the closing tag
+    end_index = input_string.find('</think>')
+
+    if end_index == -1:
+        raise ValueError("Closing tag </think> not found")
+
+    # Extract the response part of the string
+    response = input_string[end_index + len('</think>'):].strip()
+
+    return response

@bot.event
 async def on_message(message: discord.Message):
@ -37,14 +48,16 @@ async def on_message(message: discord.Message):

    if f"""<@{bot.user.id}>""" in message.content:
        async with message.channel.typing():
-            question = f"""Text transcript of a never ending dialog, where {message.author} interacts with Karl AI.
-        Karl is helpful, kind, honest, friendly, good at writing and never fails to answer {message.author}’s requests immediately and with details and precision.
-        There are no annotations like (30 seconds passed...) or (to himself), just what {message.author} and Karl say aloud to each other.
-        The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
+            messages = [
+                {
+                    'role':'user',
+                    'content': message.content 
+                }
+            ]

-        {message.author}: {message.content}"""
-            msg = llm(question, max_tokens=256, stop=[f"""{message.author}"""], echo=True)[
-                "choices"][0]["text"][len(question):]
+            response = await client.chat('marx', messages=messages)
+            print(response['message']['content'])
+            msg = extract_response(response['message']['content'])

        await message.channel.send(msg)

--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,5 @@
-llama-cpp-python
-langchain
+#llama-cpp-python
+#langchain
+ollama
 discord.py
-python-dotenv
+python-dotenv