From e8dd751bea3b9659c4e6023e1505559024d40c32 Mon Sep 17 00:00:00 2001
From: zongor <admin@alfrescocavern.com>
Date: Sat, 8 Feb 2025 12:57:42 -0500
Subject: [PATCH] move to ollama instead of llama-cpp-python

---
 bot.py           | 33 +++++++++++++++++++++++----------
 requirements.txt |  7 ++++---
 2 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/bot.py b/bot.py
index e455dc5..7c3386b 100755
--- a/bot.py
+++ b/bot.py
@@ -2,10 +2,11 @@
 import sys
 import os
 import random
+import re
 
 import discord
+from ollama import AsyncClient
 from discord.ext import commands
-from llama_cpp import Llama
 from dotenv import load_dotenv
 
 description = """
@@ -14,7 +15,7 @@ There are a number of utility commands being showcased here.
 """
 
 load_dotenv()
-llm = Llama(model_path="./models/gpt4all-7B/gpt4all-lora-quantized.bin")
+client = AsyncClient()
 
 bot = commands.Bot(
     command_prefix=commands.when_mentioned_or("!"),
@@ -22,11 +23,21 @@ bot = commands.Bot(
     intents=discord.Intents.all(),
 )
 
-
 @bot.event
 async def on_ready():
     print(f"Logged in as {bot.user} (ID: {bot.user.id})")
 
+def extract_response(input_string):
+    # Find the position of the closing tag
+    end_index = input_string.find('</think>')
+
+    if end_index == -1:
+        raise ValueError("Closing tag </think> not found")
+
+    # Extract the response part of the string
+    response = input_string[end_index + len('</think>'):].strip()
+
+    return response
 
 @bot.event
 async def on_message(message: discord.Message):
@@ -37,14 +48,16 @@ async def on_message(message: discord.Message):
 
     if f"""<@{bot.user.id}>""" in message.content:
         async with message.channel.typing():
-            question = f"""Text transcript of a never ending dialog, where {message.author} interacts with Karl AI.
-        Karl is helpful, kind, honest, friendly, good at writing and never fails to answer {message.author}’s requests immediately and with details and precision.
-        There are no annotations like (30 seconds passed...) or (to himself), just what {message.author} and Karl say aloud to each other.
-        The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
+            messages = [
+                {
+                    'role':'user',
+                    'content': message.content 
+                }
+            ]
 
-        {message.author}: {message.content}"""
-            msg = llm(question, max_tokens=256, stop=[f"""{message.author}"""], echo=True)[
-                "choices"][0]["text"][len(question):]
+            response = await client.chat('marx', messages=messages)
+            print(response['message']['content'])
+            msg = extract_response(response['message']['content'])
 
         await message.channel.send(msg)
 
diff --git a/requirements.txt b/requirements.txt
index 809c8f7..0337dad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
-llama-cpp-python
-langchain
+#llama-cpp-python
+#langchain
+ollama
 discord.py
-python-dotenv
\ No newline at end of file
+python-dotenv