✨ feat(ai): Enhance multimodal support by allowing smart selection of model based on message type.

📄 config: Add smart multimodal option for flexible AI model usage. 🛠️ fix(aiCaller): Update AI caller logic to utilize smart multimodal settings effectively.
2026-01-29 09:17:27 +00:00 · 2025-12-05 23:48:47 +08:00 · 2025-12-05 23:48:47 +08:00 · 1283db5d1e
commit 1283db5d1e
parent 894dba0aa0
3 changed files with 34 additions and 4 deletions
--- a/apps/ai.js
+++ b/apps/ai.js
@ -352,10 +352,23 @@ async function callAiForResponse(messageData, e, aiConfig) {
      e.user_id
    );
    //更新session
    let userMessageContent, assistantMessageContent;
    const usedMultimodal = aiConfig.multimodalEnabled && 
      (!aiConfig.smartMultimodal || messageData.originalMessages?.some(msg => msg.type === 'image_url'));
    if (usedMultimodal && messageData.originalMessages) {
      userMessageContent = messageData.originalMessages.map(msg => {
        if (msg.type === 'text') return msg.content;
        if (msg.type === 'image_url') return `[图片消息]`;
      }).filter(Boolean).join('');
    } else {
      userMessageContent = messageData.text;
    }
    assistantMessageContent = aiResult.response;
    const newChatHistory = [
      ...chatHistory,
-      { role: 'user', content: messageData.text },
+      { role: 'user', content: userMessageContent },
-      { role: 'assistant', content: aiResult.response },
+      { role: 'assistant', content: assistantMessageContent },
    ];
    SessionManager.updateChatHistory(e.group_id, newChatHistory);
    SessionManager.deactivateSession(e.group_id, e.user_id);
--- a/config/ai.json
+++ b/config/ai.json
@ -10,6 +10,8 @@
  "modelType": "deepseek-ai/DeepSeek-V3.2-Exp",
  "?multimodalEnabled": "是否启用多模态模型模式,启用后将忽略文本模型",
  "multimodalEnabled": false,
  "?smartMultimodal": "智能多模态模式,开启时只有文字用文本模型,有图片才用多模态模型",
  "smartMultimodal": false,
  "?multimodalModel": "多模态模型名称",
  "multimodalModel": "Qwen/Qwen2.5-VL-72B-Instruct",
  "?temperature": "聊天温度,可选0-2.0,温度越高创造性越高",
--- a/lib/ai/aiCaller.js
+++ b/lib/ai/aiCaller.js
@ -45,7 +45,16 @@ class AiCaller {
      return { success: false, error: 'AI调用器未初始化' };
    }
    try {
-      if (this.config.multimodalEnabled) {
+      if (this.config.smartMultimodal && this.config.multimodalEnabled) {
        const hasImage = originalMessages.some(msg => msg.type === 'image_url');
        if (hasImage) {
          logger.info('[crystelf-ai] 检测到图片，使用多模态模型');
          return await this.callMultimodalAi(originalMessages, chatHistory, memories, e);
        } else {
          logger.info('[crystelf-ai] 纯文本消息，使用文本模型');
          return await this.callTextAi(prompt, chatHistory, memories, e);
        }
      } else if (this.config.multimodalEnabled) {
        return await this.callMultimodalAi(originalMessages, chatHistory, memories, e);
      } else {
        return await this.callTextAi(prompt, chatHistory, memories, e);
@ -72,9 +81,15 @@ class AiCaller {
    try {
      const fullPrompt = this.buildPrompt(prompt);
      const apiCaller = this.openaiChat;
      const formattedChatHistory = chatHistory.map(msg => ({
        role: msg.role,
        content: msg.content
      }));
      const result = await apiCaller.callAi({
        prompt: fullPrompt,
-        chatHistory: chatHistory,
+        chatHistory: formattedChatHistory,
        model: this.config.modelType,
        temperature: this.config.temperature,
        customPrompt: await this.getSystemPrompt(e, memories),