From 61a946224778ea79c671b31fe96eb2d81a6ccd30 Mon Sep 17 00:00:00 2001
From: Jerryplusy <Jerryplusy@outlook.com>
Date: Sun, 30 Nov 2025 10:50:12 +0800
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(apps/ai.js):=20enhance=20messa?=
 =?UTF-8?q?ge=20processing=20to=20support=20multimodal=20inputs=20and=20im?=
 =?UTF-8?q?prove=20user=20message=20extraction.=20=E2=9C=A8=20feat(config/?=
 =?UTF-8?q?ai.json):=20add=20configuration=20options=20for=20multimodal=20?=
 =?UTF-8?q?model=20support.=20=E2=9C=A8=20feat(lib/ai/aiCaller.js):=20impl?=
 =?UTF-8?q?ement=20multimodal=20AI=20call=20handling=20and=20formatting=20?=
 =?UTF-8?q?for=20diverse=20message=20types.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 apps/ai.js         |  80 ++++++++++++++++++++++-----------
 config/ai.json     |   4 ++
 lib/ai/aiCaller.js | 109 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 160 insertions(+), 33 deletions(-)

diff --git a/apps/ai.js b/apps/ai.js
index d0114f9..2f4e02a 100644
--- a/apps/ai.js
+++ b/apps/ai.js
@@ -91,11 +91,11 @@ async function index(e) {
     if (e.user_id === e.bot.uin) {
       return;
     }
-    const userMessage = await extractUserMessage(e.msg, nickname, e);
-    if (!userMessage || userMessage.length === 0) {
+    const messageData = await extractUserMessage(e.msg, nickname, e);
+    if (!messageData || !messageData.text || messageData.text.length === 0) {
       return e.reply(segment.image(await Meme.getMeme(aiConfig.character, 'default')));
     }
-    const result = await processMessage(userMessage, e, aiConfig);
+    const result = await processMessage(messageData, e, aiConfig);
     if (result && result.length > 0) {
       await sendResponse(e, result);
     }
@@ -113,6 +113,7 @@ async function extractUserMessage(msg, nickname, e) {
     let at = [];
     const aiConfig = await ConfigControl.get('ai');
     const maxMessageLength = aiConfig?.maxMessageLength || 100;
+    const originalMessages = [];
     e.message.forEach((message) => {
       logger.info(message);
       if (message.type === 'text' && message.text !== '' && message.text !== '\n'){
@@ -124,19 +125,29 @@ async function extractUserMessage(msg, nickname, e) {
         text.push(displayText);
       } else if (message.type === 'at') {
         at.push(message.qq);
+      } else if (message.type === 'image') {
+        if (message.image) {
+          originalMessages.push({
+            type: 'image_url',
+            image_url: {
+              url: message.image
+            }
+          });
+        }
       }
     });
+    
     let returnMessage = '';
     if (text.length > 0) {
       text.forEach((message) => {
         if(message === '') {
         } else {
-        returnMessage += `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]说:${message}\n`
+        returnMessage += `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]说:${message}\n`;
         }
       });
     }
     if(at.length == 1 && at[0] == e.bot.uin && text.length == 0){
-      return [];
+      return { text: [], originalMessages: originalMessages };
     }
     if (at.length > 0) {
       for (const at1 of at) {
@@ -144,14 +155,15 @@ async function extractUserMessage(msg, nickname, e) {
           //returnMessage += `[${e.sender?.nickname},id:${e.user_id}]@(at)了你,你的id是${at}\n`;
         } else {
           const atNickname = await e.group.pickMember(at1).nickname || '一个人';
-          returnMessage += `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]@(at)了${atNickname},id是${at1}\n`;
+          const tempMessage = `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]@(at)了${atNickname},id是${at1}\n`
+          returnMessage += tempMessage;
+          originalMessages.push({
+            type: 'text',
+            content: tempMessage
+          });
         }
       }
     }
-    const imgUrls = await YunzaiUtils.getImages(e, 1, true);
-    if (imgUrls) {
-      returnMessage += `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]发送了一张图片(你可能暂时无法查看)\n`;
-    }
     if(e.source || e.reply_id){
       let reply;
       if(e.getReply) reply = await e.getReply();
@@ -163,18 +175,33 @@ async function extractUserMessage(msg, nickname, e) {
         const msgArr = Array.isArray(reply) ? reply : reply.message || [];
         msgArr.forEach((msg) => {
           if(msg.type === 'text'){
-            returnMessage += `[${e.sender?.nickname}]引用了[被引用消息:${reply.user_id == e.bot.uin ? '你' : reply.sender?.nickname},id:${reply.user_id},seq:${reply.message_id}]发的一段文本:${msg.text}\n`
+            const tempMessage = `[${e.sender?.nickname}]引用了[被引用消息:${reply.user_id == e.bot.uin ? '你' : reply.sender?.nickname},id:${reply.user_id},seq:${reply.message_id}]发的一段文本:${msg.text}\n`
+            returnMessage += tempMessage;
+            originalMessages.push({
+              type: 'text',
+              content: tempMessage
+            });
           }
           if(msg.type === 'image'){
             returnMessage += `[${e.sender?.nickname}]引用了[被引用消息:${reply.user_id == e.bot.uin ? '你' : reply.sender?.nickname},id:${reply.user_id},seq:${reply.message_id}]发的一张图片(你可能暂时无法查看)\n`;
+            originalMessages.push({
+              type: 'image_url',
+              image_url: {
+                url: msg.image
+              }
+            });
           }
         })
       }
     }
-    return returnMessage;
+    const imgUrls = await YunzaiUtils.getImages(e, 1, true);
+    if (imgUrls) {
+      returnMessage += `[${e.sender?.nickname},id:${e.user_id},seq:${e.message_id}]发送了一张图片(你可能暂时无法查看)\n`;
+    }
+    return { text: returnMessage, originalMessages: originalMessages };
   }
   logger.warn('[crystelf-ai] 字符串匹配失败');
-  return [];
+  return { text: [], originalMessages: [] };
 }
 
 /**
@@ -202,11 +229,11 @@ async function processMessage(userMessage, e, aiConfig) {
 
 /**
  * 关键词模式
- * @param userMessage
+ * @param messageData
  * @param e
  * @returns {Promise<[{type: string, data: string}]>}
  */
-async function handleKeywordMode(userMessage, e) {
+async function handleKeywordMode(messageData, e) {
   const matchResult = await KeywordMatcher.matchKeywords(e.msg, 'ai');
 
   if (matchResult && matchResult.matched) {
@@ -229,17 +256,17 @@ async function handleKeywordMode(userMessage, e) {
   ];
 }
 
-async function handleAiMode(userMessage, e, aiConfig) {
-  return await callAiForResponse(userMessage, e, aiConfig);
+async function handleAiMode(messageData, e, aiConfig) {
+  return await callAiForResponse(messageData, e, aiConfig);
 }
 
-async function handleMixMode(userMessage, e, aiConfig) {
+async function handleMixMode(messageData, e, aiConfig) {
   const isTooLong = await KeywordMatcher.isMessageTooLong(e.msg);
 
   if (isTooLong) {
     //消息太长,使用AI回复
     logger.info('[crystelf-ai] 消息过长,使用ai回复');
-    return await callAiForResponse(userMessage, e, aiConfig);
+    return await callAiForResponse(messageData, e, aiConfig);
   } else {
     const matchResult = await KeywordMatcher.matchKeywords(e.msg, 'ai');
     if (matchResult && matchResult.matched) {
@@ -264,7 +291,7 @@ async function handleMixMode(userMessage, e, aiConfig) {
       };
       const newChatHistory = [
         ...chatHistory,
-        { role: 'user', content: userMessage },
+        { role: 'user', content: messageData.text },
         { role: 'assistant', content: JSON.stringify(resMessage) },
       ];
       SessionManager.updateChatHistory(e.group_id, newChatHistory);
@@ -274,12 +301,12 @@ async function handleMixMode(userMessage, e, aiConfig) {
     } else {
       logger.info('[crystelf-ai] 关键词匹配失败,使用ai回复');
       //关键词匹配失败,使用AI回复
-      return await callAiForResponse(userMessage, e, aiConfig);
+      return await callAiForResponse(messageData, e, aiConfig);
     }
   }
 }
 
-async function callAiForResponse(userMessage, e, aiConfig) {
+async function callAiForResponse(messageData, e, aiConfig) {
   try {
     //创建session
     const session = SessionManager.createOrGetSession(e.group_id, e.user_id, e);
@@ -299,7 +326,10 @@ async function callAiForResponse(userMessage, e, aiConfig) {
     //构建聊天历史
     const historyLen = aiConfig.chatHistory;
     const chatHistory = session.chatHistory.slice(-historyLen | -10);
-    const aiResult = await AiCaller.callAi(userMessage, chatHistory, memories, e);
+    
+    // 根据多模态开关决定调用方式
+    const aiResult = await AiCaller.callAi(messageData.text, chatHistory, memories, e, messageData.originalMessages);
+    
     if (!aiResult.success) {
       logger.error(`[crystelf-ai] AI调用失败: ${aiResult.error}`);
       SessionManager.deactivateSession(e.group_id, e.user_id);
@@ -313,14 +343,14 @@ async function callAiForResponse(userMessage, e, aiConfig) {
     //处理响应
     const processedResponse = await ResponseHandler.processResponse(
       aiResult.response,
-      userMessage,
+      messageData.text,
       e.group_id,
       e.user_id
     );
     //更新session
     const newChatHistory = [
       ...chatHistory,
-      { role: 'user', content: userMessage },
+      { role: 'user', content: messageData.text },
       { role: 'assistant', content: aiResult.response },
     ];
     SessionManager.updateChatHistory(e.group_id, newChatHistory);
diff --git a/config/ai.json b/config/ai.json
index 2fac1a3..8979809 100644
--- a/config/ai.json
+++ b/config/ai.json
@@ -8,6 +8,10 @@
   "apiKey": "",
   "?modelType": "模型名称,请根据baseApi填写的服务商的对应的模型",
   "modelType": "deepseek-ai/DeepSeek-V3.2-Exp",
+  "?multimodalEnabled": "是否启用多模态模型模式,启用后将忽略文本模型",
+  "multimodalEnabled": false,
+  "?multimodalModel": "多模态模型名称",
+  "multimodalModel": "Qwen/Qwen2.5-VL-72B-Instruct",
   "?temperature": "聊天温度,可选0-2.0,温度越高创造性越高",
   "temperature": 1.2,
   "?concurrency": "最大同时聊天群数,一个群最多一个人聊天",
diff --git a/lib/ai/aiCaller.js b/lib/ai/aiCaller.js
index c156e44..2243598 100644
--- a/lib/ai/aiCaller.js
+++ b/lib/ai/aiCaller.js
@@ -36,14 +36,39 @@ class AiCaller {
    * @param chatHistory 聊天历史
    * @param memories 记忆
    * @param e
+   * @param originalMessages 原始消息数组
    * @returns {Promise<{success: boolean, response: (*|string), rawResponse: (*|string)}|{success: boolean, error: string}|{success: boolean, error}>}
    */
-  async callAi(prompt, chatHistory = [], memories = [], e) {
+  async callAi(prompt, chatHistory = [], memories = [], e, originalMessages = []) {
     if (!this.isInitialized || !this.config) {
       logger.error('[crystelf-ai] 未初始化或配置无效');
       return { success: false, error: 'AI调用器未初始化' };
     }
+    try {
+      if (this.config.multimodalEnabled) {
+        return await this.callMultimodalAi(originalMessages, chatHistory, memories, e);
+      } else {
+        return await this.callTextAi(prompt, chatHistory, memories, e);
+      }
+    } catch (error) {
+      logger.error(`[crystelf-ai] 调用失败: ${error.message}`);
+      SessionManager.deactivateSession(e.group_id, e.user_id);
+      return {
+        success: false,
+        error: error.message,
+      };
+    }
+  }
 
+  /**
+   * 文本AI模型
+   * @param prompt 用户输入
+   * @param chatHistory 聊天历史
+   * @param memories 记忆
+   * @param e
+   * @returns {Promise<{success: boolean, response: (*|string), rawResponse: (*|string)}|{success: boolean, error: string}>}
+   */
+  async callTextAi(prompt, chatHistory = [], memories = [], e) {
     try {
       const fullPrompt = this.buildPrompt(prompt);
       const apiCaller = this.openaiChat;
@@ -52,7 +77,7 @@ class AiCaller {
         chatHistory: chatHistory,
         model: this.config.modelType,
         temperature: this.config.temperature,
-        customPrompt: await this.getSystemPrompt(e,memories),
+        customPrompt: await this.getSystemPrompt(e, memories),
       });
 
       if (result.success) {
@@ -68,15 +93,83 @@ class AiCaller {
         };
       }
     } catch (error) {
-      logger.error(`[crystelf-ai] 调用失败: ${error.message}`);
-      SessionManager.deactivateSession(e.group_id, e.user_id);
-      return {
-        success: false,
-        error: error.message,
-      };
+      throw error;
     }
   }
 
+  /**
+   * 多模态AI调用
+   * @param originalMessages 原始消息数组
+   * @param chatHistory 聊天历史
+   * @param memories 记忆
+   * @param e
+   * @returns {Promise<{success: boolean, response: (*|string), rawResponse: (*|string)}|{success: boolean, error: string}>}
+   */
+  async callMultimodalAi(originalMessages, chatHistory = [], memories = [], e) {
+    try {
+      const messages = this.formatMultimodalMessages(originalMessages, chatHistory, memories, e);
+      const apiCaller = this.openaiChat;
+      const result = await apiCaller.callAi({
+        messages: messages,
+        model: this.config.multimodalModel,
+        temperature: this.config.temperature,
+      });
+
+      if (result.success) {
+        return {
+          success: true,
+          response: result.aiResponse,
+          rawResponse: result.aiResponse,
+        };
+      } else {
+        return {
+          success: false,
+          error: '多模态AI调用失败',
+        };
+      }
+    } catch (error) {
+      throw error;
+    }
+  }
+
+  /**
+   * 将原始消息格式转换为多模态格式
+   * @param originalMessages 原始消息数组
+   * @param chatHistory 聊天历史
+   * @param memories 记忆
+   * @param e
+   * @returns {Array} 多模态格式的消息数组
+   */
+  async formatMultimodalMessages(originalMessages, chatHistory = [], memories = [], e) {
+    const messages = [];
+    const systemPrompt = await this.getSystemPrompt(e, memories);
+    messages.push({
+      role: 'system',
+      content: [{ type: 'text', text: systemPrompt }]
+    });
+    for (const history of chatHistory) {
+      const role = history.role === 'user' ? 'user' : 'assistant';
+      messages.push({
+        role: role,
+        content: [{ type: 'text', text: history.content }]
+      });
+    }
+    for (const msg of originalMessages) {
+      if (msg.type === 'text' && msg.content) {
+        messages.push({
+          role: 'user',
+          content: [{ type: 'text', text: msg.content }]
+        });
+      } else if (msg.type === 'image_url' && msg.image_url) {
+        messages.push({
+          role: 'user',
+          content: [{ type: 'image_url', image_url: { url: msg.image_url.url } }]
+        });
+      }
+    }
+    return messages;
+  }
+
   /**
    * 构造完整的prompt
    * @param prompt