feat(ai): Enhance multimodal support by allowing smart selection of model based on message type.

📄 config: Add smart multimodal option for flexible AI model usage.
🛠️ fix(aiCaller): Update AI caller logic to utilize smart multimodal settings effectively.
This commit is contained in:
Jerry 2025-12-05 23:48:47 +08:00
parent 894dba0aa0
commit 1283db5d1e
3 changed files with 34 additions and 4 deletions

View File

@ -352,10 +352,23 @@ async function callAiForResponse(messageData, e, aiConfig) {
e.user_id e.user_id
); );
//更新session //更新session
let userMessageContent, assistantMessageContent;
const usedMultimodal = aiConfig.multimodalEnabled &&
(!aiConfig.smartMultimodal || messageData.originalMessages?.some(msg => msg.type === 'image_url'));
if (usedMultimodal && messageData.originalMessages) {
userMessageContent = messageData.originalMessages.map(msg => {
if (msg.type === 'text') return msg.content;
if (msg.type === 'image_url') return `[图片消息]`;
}).filter(Boolean).join('');
} else {
userMessageContent = messageData.text;
}
assistantMessageContent = aiResult.response;
const newChatHistory = [ const newChatHistory = [
...chatHistory, ...chatHistory,
{ role: 'user', content: messageData.text }, { role: 'user', content: userMessageContent },
{ role: 'assistant', content: aiResult.response }, { role: 'assistant', content: assistantMessageContent },
]; ];
SessionManager.updateChatHistory(e.group_id, newChatHistory); SessionManager.updateChatHistory(e.group_id, newChatHistory);
SessionManager.deactivateSession(e.group_id, e.user_id); SessionManager.deactivateSession(e.group_id, e.user_id);

View File

@ -10,6 +10,8 @@
"modelType": "deepseek-ai/DeepSeek-V3.2-Exp", "modelType": "deepseek-ai/DeepSeek-V3.2-Exp",
"?multimodalEnabled": "是否启用多模态模型模式,启用后将忽略文本模型", "?multimodalEnabled": "是否启用多模态模型模式,启用后将忽略文本模型",
"multimodalEnabled": false, "multimodalEnabled": false,
"?smartMultimodal": "智能多模态模式,开启时只有文字用文本模型,有图片才用多模态模型",
"smartMultimodal": false,
"?multimodalModel": "多模态模型名称", "?multimodalModel": "多模态模型名称",
"multimodalModel": "Qwen/Qwen2.5-VL-72B-Instruct", "multimodalModel": "Qwen/Qwen2.5-VL-72B-Instruct",
"?temperature": "聊天温度,可选0-2.0,温度越高创造性越高", "?temperature": "聊天温度,可选0-2.0,温度越高创造性越高",

View File

@ -45,7 +45,16 @@ class AiCaller {
return { success: false, error: 'AI调用器未初始化' }; return { success: false, error: 'AI调用器未初始化' };
} }
try { try {
if (this.config.multimodalEnabled) { if (this.config.smartMultimodal && this.config.multimodalEnabled) {
const hasImage = originalMessages.some(msg => msg.type === 'image_url');
if (hasImage) {
logger.info('[crystelf-ai] 检测到图片,使用多模态模型');
return await this.callMultimodalAi(originalMessages, chatHistory, memories, e);
} else {
logger.info('[crystelf-ai] 纯文本消息,使用文本模型');
return await this.callTextAi(prompt, chatHistory, memories, e);
}
} else if (this.config.multimodalEnabled) {
return await this.callMultimodalAi(originalMessages, chatHistory, memories, e); return await this.callMultimodalAi(originalMessages, chatHistory, memories, e);
} else { } else {
return await this.callTextAi(prompt, chatHistory, memories, e); return await this.callTextAi(prompt, chatHistory, memories, e);
@ -72,9 +81,15 @@ class AiCaller {
try { try {
const fullPrompt = this.buildPrompt(prompt); const fullPrompt = this.buildPrompt(prompt);
const apiCaller = this.openaiChat; const apiCaller = this.openaiChat;
const formattedChatHistory = chatHistory.map(msg => ({
role: msg.role,
content: msg.content
}));
const result = await apiCaller.callAi({ const result = await apiCaller.callAi({
prompt: fullPrompt, prompt: fullPrompt,
chatHistory: chatHistory, chatHistory: formattedChatHistory,
model: this.config.modelType, model: this.config.modelType,
temperature: this.config.temperature, temperature: this.config.temperature,
customPrompt: await this.getSystemPrompt(e, memories), customPrompt: await this.getSystemPrompt(e, memories),