diff --git a/README.md b/README.md index 8a8e75c..975249c 100644 --- a/README.md +++ b/README.md @@ -15,17 +15,15 @@ apps -- 业务核心 config -- 配置文件 -img -- readme图片 +img -- README图片 model -- 核心文件[建议不动] resource -- 资源文件 -test -- 爬虫文件[python] - utils -- 工具类 -index -- 主入口 + ## 🧏 ‍使用实例 ![help](./img/example.png) @@ -59,7 +57,22 @@ sudo apt-get install ffmpeg 4. 【可选】小程序解析仅适配了[Yoimiya/Yunzai-Bot](https://gitee.com/yoimiya-kokomi/Yunzai-Bot) 如果解析有问题参考issue:[#I6MFF7](https://gitee.com/kyrzy0416/rconsole-plugin/issues/I6MFF7) -![小程序解析](./img/example6.png) +小程序解析 + +5. 【可选】对哔哩哔哩解析进行总结:需要填写openaiApiKey和哔哩哔哩的SESSDATA + +小程序解析 + +> 哔哩哔哩的SESSDATA? +> +> 进入网站 -- 打开F12开发者选项 -- 应用 -- 找到Cookie -- 找到SESSDATA -- 复制 -- 粘贴到tools.yaml or 锅巴 + +> 如何获取openaiApiKey? +> +> 1. 要有一个openai的账号(https://chat.openai.com/) +> 2. 获取api-key参考(https://chatgpt.cn.obiscr.com/blog/posts/2023/How-to-get-api-key/) + +注:以上条件缺一不可(**SESSDATA+OpenAiApiKey+魔法**),不然无法触发 ## 🧑‍🌾 进阶内容 【可选】相关配置(apps/tools.js): @@ -72,6 +85,18 @@ sudo apt-get install ffmpeg ## 📦 业务 ![help](./img/help.jpg) +## 📝 计划功能 +- [ ] YouTube解析(这个可能要🕊一久) +- [x] Instagram解析 + - [ ] 单张图片解析 + - [ ] 视频解析 +- [ ] 哔哩哔哩总结 +- [ ] 精简工具功能冗余代码 +- [ ] 自由修改插件优先级 +- [ ] ... + + + ## 🔗 链接 - [云崽](https://gitee.com/yoimiya-kokomi/Yunzai-Bot) diff --git a/apps/tools.js b/apps/tools.js index 0661dfb..b77812a 100644 --- a/apps/tools.js +++ b/apps/tools.js @@ -10,11 +10,14 @@ import HttpProxyAgent from "https-proxy-agent"; import { mkdirsSync } from "../utils/file.js"; import { downloadBFile, getDownloadUrl, mergeFileToMp4, getDynamic } from "../utils/bilibili.js"; import { parseUrl, parseM3u8, downloadM3u8Videos, mergeAcFileToMp4 } from "../utils/acfun.js"; -import { transMap, douyinTypeMap, TEN_THOUSAND, XHS_CK } from "../utils/constant.js"; +import { transMap, douyinTypeMap, XHS_CK } from "../utils/constant.js"; import { getIdVideo, generateRandomStr } from "../utils/common.js"; import config from "../model/index.js"; import Translate from "../utils/trans-strategy.js"; import { getXB } from "../utils/x-bogus.js"; +import { getVideoInfo } from "../utils/biliInfo.js"; +import { getBiliGptInputText } from "../utils/biliSummary.js"; +import { ChatGPTClient } from "@waylaidwanderer/chatgpt-api"; export class tools extends plugin { constructor() { @@ -81,12 +84,22 @@ export class tools extends plugin { // 视频保存路径 this.defaultPath = this.toolsConfig.defaultPath; // 代理接口 - // TODO 填写服务器的内网ID和clash的端口 this.proxyAddr = this.toolsConfig.proxyAddr; this.proxyPort = this.toolsConfig.proxyPort; this.myProxy = `http://${this.proxyAddr}:${this.proxyPort}`; - // 加载twitter配置 - this.bearerToken = this.toolsConfig.bearerToken; + // 加载哔哩哔哩配置 + this.biliSessData = this.toolsConfig.biliSessData; + // 加载gpt配置 + this.openaiApiKey = this.toolsConfig.openaiApiKey; + // 加载gpt客户端 + this.chatGptClient = new ChatGPTClient(this.openaiApiKey, { + modelOptions: { + model: "gpt-3.5-turbo", + temperature: 0, + }, + proxy: this.myProxy, + debug: false, + }); } // 翻译插件 @@ -143,13 +156,16 @@ export class tools extends plugin { // const url = `https://www.iesdouyin.com/aweme/v1/web/aweme/detail/?aweme_id=${ douId }&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333`; // 感谢 Evil0ctal(https://github.com/Evil0ctal)提供的header 和 B1gM8c(https://github.com/B1gM8c)的逆向算法X-Bogus const headers = { - 'accept-encoding': 'gzip, deflate, br', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', - 'referer': 'https://www.douyin.com/', - 'cookie': "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;" - } + "accept-encoding": "gzip, deflate, br", + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", + referer: "https://www.douyin.com/", + cookie: "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;", + }; const dyApi = "https://www.douyin.com/aweme/v1/web/aweme/detail/?"; - const params = `msToken=${generateRandomStr(107)}&device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=${douId}&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079`; + const params = `msToken=${generateRandomStr( + 107, + )}&device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=${douId}&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079`; // xg参数 const xbParam = getXB(params.replaceAll("&", "%26")); // const param = resp.data.result[0].paramsencode; @@ -197,7 +213,7 @@ export class tools extends plugin { // console.log(no_watermark_image_list) await this.reply(await Bot.makeForwardMsg(no_watermark_image_list)); } - }); + }); }); return true; } @@ -291,7 +307,6 @@ export class tools extends plugin { url = url.substring(0, url.indexOf("?")); } const dynamicId = /[^/]+(?!.*\/)/.exec(url)[0]; - // console.log(dynamicId) getDynamic(dynamicId).then(async resp => { if (resp.dynamicSrc.length > 0) { e.reply(`识别:哔哩哔哩动态, ${resp.dynamicDesc}`); @@ -304,9 +319,6 @@ export class tools extends plugin { }); }); await this.reply(await Bot.makeForwardMsg(dynamicSrcMsg)); - // resp.dynamicSrc.forEach(item => { - // e.reply(segment.image(item)); - // }); } else { e.reply(`识别:哔哩哔哩动态, 但是失败!`); } @@ -320,41 +332,8 @@ export class tools extends plugin { } // 视频信息获取例子:http://api.bilibili.com/x/web-interface/view?bvid=BV1hY411m7cB // 请求视频信息 - (function () { - const baseVideoInfo = "http://api.bilibili.com/x/web-interface/view"; - const videoId = /video\/[^\?\/ ]+/.exec(url)[0].split("/")[1]; - // 获取视频信息,然后发送 - fetch( - videoId.startsWith("BV") - ? `${baseVideoInfo}?bvid=${videoId}` - : `${baseVideoInfo}?aid=${videoId}`, - ).then(async resp => { - const respJson = await resp.json(); - const respData = respJson.data; - // 视频标题 - const title = "识别:哔哩哔哩," + respData.title + "\n"; - // 视频图片(暂时不加入,影响性能) - // const videoCover = respData.pic; - // 视频信息 - let { view, danmaku, reply, favorite, coin, share, like } = respData.stat; - // 数据处理 - const dataProcessing = data => { - return Number(data) >= TEN_THOUSAND - ? (data / TEN_THOUSAND).toFixed(1) + "万" - : data; - }; - // 组合内容 - const combineContent = `总播放量:${dataProcessing( - view, - )}, 弹幕数量:${dataProcessing(danmaku)}, 回复量:${dataProcessing( - reply, - )}, 收藏数:${dataProcessing(favorite)}, 投币:${dataProcessing( - coin, - )}, 分享:${dataProcessing(share)}, 点赞:${dataProcessing(like)}\n`; - const msgCombine = [title, combineContent /*, segment.image(videoCover)*/]; - await e.reply(msgCombine); - }); - })(); + const { title, combineContent, aid, cid } = await getVideoInfo(url); + e.reply([title, combineContent]); await getDownloadUrl(url) .then(data => { @@ -371,6 +350,14 @@ export class tools extends plugin { logger.error(err); e.reply("解析失败,请重试一下"); }); + + // 如果有ck 并且 有openai的key + if (this.biliSessData && this.openaiApiKey) { + const prompt = await getBiliGptInputText(title, aid, cid); + const response = await this.chatGptClient.sendMessage(prompt); + // 暂时不设计上下文 + e.reply(response.response); + } return true; } @@ -862,12 +849,17 @@ export class tools extends plugin { .catch(err => reject(err)); }); }; - await fetch(API).then(async resp => { + await fetch(API, { + headers: { + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", + }, + }).then(async resp => { const html = await resp.text(); const desc = html.match(/(?<=content=").*?(?=\")/g)?.[2]; const images = html.match(/
/g); if (!_.isNull(images)) { - e.reply(`识别:Insta,${desc || "暂无描述"}\n`) + e.reply(`识别:Insta,${desc || "暂无描述"}\n`); images.map((item, index) => { const imgUrl = /(?<=data-src=").*?(?=")/ .exec(item)[0] diff --git a/config/tools.yaml b/config/tools.yaml index 1c71faa..5681ceb 100644 --- a/config/tools.yaml +++ b/config/tools.yaml @@ -4,3 +4,6 @@ proxyPort: '7890' # 魔法端口 translateAppId: '' # 百度翻译APP ID translateSecret: '' # 百度翻译密匙 + +biliSessData: '' # 哔哩哔哩的SESSDATA +openaiApiKey: '' # OpenAI的API Key, sk... diff --git a/guoba.support.js b/guoba.support.js index 53045d4..643e695 100644 --- a/guoba.support.js +++ b/guoba.support.js @@ -75,6 +75,28 @@ export function supportGuoba() { placeholder: "请输入视频暂存位置", }, }, + { + field: "tools.biliSessData", + label: "哔哩哔哩SESSDATA", + bottomHelpMessage: + "如何获取具体参考我的文档说明:https://gitee.com/kyrzy0416/rconsole-plugin", + component: "Input", + required: false, + componentProps: { + placeholder: "请输入哔哩哔哩SESSDATA", + }, + }, + { + field: "tools.openaiApiKey", + label: "OpenAI的API-KEY", + bottomHelpMessage: + "如何获取参考文章:https://chatgpt.cn.obiscr.com/blog/posts/2023/How-to-get-api-key/", + component: "Input", + required: false, + componentProps: { + placeholder: "请输入OpenAI的API-KEY(sk.....)", + }, + }, ], getConfigData() { const toolsData = { diff --git a/img/example7.png b/img/example7.png new file mode 100644 index 0000000..99a8762 Binary files /dev/null and b/img/example7.png differ diff --git a/img/help.jpg b/img/help.jpg index 76af5ee..2ff1eb6 100644 Binary files a/img/help.jpg and b/img/help.jpg differ diff --git a/package.json b/package.json index 6758f3b..7e22527 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,7 @@ "type": "module", "dependencies": { "axios": "^1.3.4", - "tunnel": "^0.0.6" + "tunnel": "^0.0.6", + "@waylaidwanderer/chatgpt-api": "^1.31.5" } } diff --git a/utils/biliInfo.js b/utils/biliInfo.js new file mode 100644 index 0000000..e87d363 --- /dev/null +++ b/utils/biliInfo.js @@ -0,0 +1,42 @@ +import fetch from "node-fetch"; +import { TEN_THOUSAND } from "./constant.js"; + +async function getVideoInfo(url) { + const baseVideoInfo = "http://api.bilibili.com/x/web-interface/view"; + const videoId = /video\/[^\?\/ ]+/.exec(url)[0].split("/")[1]; + // 获取视频信息,然后发送 + return fetch( + videoId.startsWith("BV") + ? `${baseVideoInfo}?bvid=${videoId}` + : `${baseVideoInfo}?aid=${videoId}`, + ).then(async resp => { + const respJson = await resp.json(); + const respData = respJson.data; + // 视频标题 + const title = "识别:哔哩哔哩," + respData.title + "\n"; + // 视频图片(暂时不加入,影响性能) + // const videoCover = respData.pic; + // 视频信息 + let { view, danmaku, reply, favorite, coin, share, like } = respData.stat; + // 数据处理 + const dataProcessing = data => { + return Number(data) >= TEN_THOUSAND ? (data / TEN_THOUSAND).toFixed(1) + "万" : data; + }; + // 组合内容 + const combineContent = `总播放量:${dataProcessing(view)}, 弹幕数量:${dataProcessing( + danmaku, + )}, 回复量:${dataProcessing(reply)}, 收藏数:${dataProcessing( + favorite, + )}, 投币:${dataProcessing(coin)}, 分享:${dataProcessing(share)}, 点赞:${dataProcessing( + like, + )}\n`; + return { + title, + combineContent, + aid: respData.aid, + cid: respData.pages?.[0].cid, + }; + }); +} + +export { getVideoInfo }; diff --git a/utils/biliSummary.js b/utils/biliSummary.js new file mode 100644 index 0000000..63effa7 --- /dev/null +++ b/utils/biliSummary.js @@ -0,0 +1,198 @@ +const headers = { + Accept: "application/json", + "Content-Type": "application/json", + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + Host: "api.bilibili.com", + Cookie: `SESSDATA=92ce67f8%2C1695139091%2C5151e%2A31`, +}; +const commonConfig = { + method: "GET", + cache: "no-cache", + headers, + referrerPolicy: "no-referrer", +}; + +/** + * 获取gpt提取视频信息的文字 + * @param title 视频标题 + * @param aid + * @param cid + * @param shouldShowTimestamp 是否在每段字幕前面加入时间标识 + * @returns {Promise} + */ +export async function getBiliGptInputText(title, aid, cid, shouldShowTimestamp = false) { + // https://api.bilibili.com/x/player/v2?aid=438937138&cid=1066979272 + const resp = await fetch( + `https://api.bilibili.com/x/player/v2?aid=${aid}&cid=${cid}`, + commonConfig, + ); + const subtitles = (await resp.json()).data.subtitle.subtitles; + const res = await fetch(`http:${subtitles[0].subtitle_url}`); + const subtitlesData = (await res.json()).body; + const subtitleTimestamp = reduceBilibiliSubtitleTimestamp(subtitlesData, shouldShowTimestamp); + const inputText = getSmallSizeTranscripts(subtitleTimestamp, subtitleTimestamp); + const videoConfig = { + showEmoji: false, + }; + const userPrompt = shouldShowTimestamp + ? getUserSubtitleWithTimestampPrompt(title, inputText, videoConfig) + : getUserSubtitlePrompt(title, inputText, videoConfig); + return userPrompt; +} + +// 以下拼接算法来自:https://github.com/JimmyLv/BibiGPT +function reduceBilibiliSubtitleTimestamp(subtitles = [], shouldShowTimestamp) { + return reduceSubtitleTimestamp( + subtitles, + i => i.from, + i => i.content, + shouldShowTimestamp, + ); +} +function reduceSubtitleTimestamp(subtitles, getStart, getText, shouldShowTimestamp) { + // 把字幕数组总共分成 20 组 + const TOTAL_GROUP_COUNT = 30; + // 如果字幕不够多,就每7句话合并一下 + const MINIMUM_COUNT_ONE_GROUP = 7; + const eachGroupCount = + subtitles.length > TOTAL_GROUP_COUNT + ? subtitles.length / TOTAL_GROUP_COUNT + : MINIMUM_COUNT_ONE_GROUP; + + return subtitles.reduce((accumulator, current, index) => { + // 计算当前元素在哪一组 + const groupIndex = Math.floor(index / MINIMUM_COUNT_ONE_GROUP); + + // 如果是当前组的第一个元素,初始化这一组的字符串 + if (!accumulator[groupIndex]) { + accumulator[groupIndex] = { + // 5.88 -> 5.9 + // text: current.start.toFixed() + ": ", + index: groupIndex, + s: getStart(current), + text: shouldShowTimestamp ? getStart(current) + " - " : "", + }; + } + + // 将当前元素添加到当前组的字符串末尾 + accumulator[groupIndex].text = accumulator[groupIndex].text + getText(current) + " "; + + return accumulator; + }, []); +} + +function getSmallSizeTranscripts(newTextData, oldTextData, byteLimit = 6200) { + const text = newTextData + .sort((a, b) => a.index - b.index) + .map(t => t.text) + .join(" "); + const byteLength = getByteLength(text); + + if (byteLength > byteLimit) { + const filtedData = filterHalfRandomly(newTextData); + return getSmallSizeTranscripts(filtedData, oldTextData, byteLimit); + } + + let resultData = newTextData.slice(); + let resultText = text; + let lastByteLength = byteLength; + + for (let i = 0; i < oldTextData.length; i++) { + const obj = oldTextData[i]; + if (itemInIt(newTextData, obj.text)) { + continue; + } + + const nextTextByteLength = getByteLength(obj.text); + const isOverLimit = lastByteLength + nextTextByteLength > byteLimit; + if (isOverLimit) { + const overRate = (lastByteLength + nextTextByteLength - byteLimit) / nextTextByteLength; + const chunkedText = obj.text.substring(0, Math.floor(obj.text.length * overRate)); + resultData.push({ text: chunkedText, index: obj.index }); + } else { + resultData.push(obj); + } + resultText = resultData + .sort((a, b) => a.index - b.index) + .map(t => t.text) + .join(" "); + lastByteLength = getByteLength(resultText); + } + + return resultText; +} + +function filterHalfRandomly(arr) { + const filteredArr = []; + const halfLength = Math.floor(arr.length / 2); + const indicesToFilter = new Set(); + + // 随机生成要过滤掉的元素的下标 + while (indicesToFilter.size < halfLength) { + const index = Math.floor(Math.random() * arr.length); + if (!indicesToFilter.has(index)) { + indicesToFilter.add(index); + } + } + + // 过滤掉要过滤的元素 + for (let i = 0; i < arr.length; i++) { + if (!indicesToFilter.has(i)) { + filteredArr.push(arr[i]); + } + } + + return filteredArr; +} + +function getByteLength(text) { + return unescape(encodeURIComponent(text)).length; +} + +function itemInIt(textData, text) { + return textData.find(t => t.text === text) !== undefined; +} + +function getUserSubtitlePrompt(title, transcript, videoConfig) { + const videoTitle = title?.replace(/\n+/g, " ").trim(); + const videoTranscript = limitTranscriptByteLength(transcript).replace(/\n+/g, " ").trim(); + const language = "zh-CN"; + const sentenceCount = videoConfig.sentenceNumber || 7; + const emojiTemplateText = videoConfig.showEmoji ? "[Emoji] " : ""; + const emojiDescriptionText = videoConfig.showEmoji + ? "Choose an appropriate emoji for each bullet point. " + : ""; + const shouldShowAsOutline = Number(videoConfig.outlineLevel) > 1; + const wordsCount = videoConfig.detailLevel ? (Number(videoConfig.detailLevel) / 100) * 2 : 15; + const outlineTemplateText = shouldShowAsOutline ? `\n - Child points` : ""; + const outlineDescriptionText = shouldShowAsOutline + ? `Use the outline list, which can have a hierarchical structure of up to ${videoConfig.outlineLevel} levels. ` + : ""; + const prompt = `Your output should use the following template:\n## Summary\n## Highlights\n- ${emojiTemplateText}Bulletpoint${outlineTemplateText}\n\nYour task is to summarise the text I have given you in up to ${sentenceCount} concise bullet points, starting with a short highlight, each bullet point is at least ${wordsCount} words. ${outlineDescriptionText}${emojiDescriptionText}Use the text above: {{Title}} {{Transcript}}.\n\nReply in ${language} Language.`; + + return `Title: "${videoTitle}"\nTranscript: "${videoTranscript}"\n\nInstructions: ${prompt}`; +} + +export function getUserSubtitleWithTimestampPrompt(title, transcript, videoConfig) { + const videoTitle = title?.replace(/\n+/g, " ").trim(); + const videoTranscript = limitTranscriptByteLength(transcript).replace(/\n+/g, " ").trim(); + const language = "zh-CN"; + const sentenceCount = videoConfig.sentenceNumber || 7; + const emojiTemplateText = videoConfig.showEmoji ? "[Emoji] " : ""; + const wordsCount = videoConfig.detailLevel ? (Number(videoConfig.detailLevel) / 100) * 2 : 15; + const promptWithTimestamp = `Act as the author and provide exactly ${sentenceCount} bullet points for the text transcript given in the format [seconds] - [text] \nMake sure that:\n - Please start by summarizing the whole video in one short sentence\n - Then, please summarize with each bullet_point is at least ${wordsCount} words\n - each bullet_point start with \"- \" or a number or a bullet point symbol\n - each bullet_point should has the start timestamp, use this template: - seconds - ${emojiTemplateText}[bullet_point]\n - there may be typos in the subtitles, please correct them\n - Reply all in ${language} Language.`; + const videoTranscripts = limitTranscriptByteLength(JSON.stringify(videoTranscript)); + return `Title: ${videoTitle}\nTranscript: ${videoTranscripts}\n\nInstructions: ${promptWithTimestamp}`; +} + +function limitTranscriptByteLength(str, byteLimit = 6200) { + const utf8str = unescape(encodeURIComponent(str)); + const byteLength = utf8str.length; + if (byteLength > byteLimit) { + const ratio = byteLimit / byteLength; + const newStr = str.substring(0, Math.floor(str.length * ratio)); + return newStr; + } + return str; +}