feat: 1.1.0-rc1 重大更新

1. 【重要】更新可选选项--哔哩哔哩解析可以总结视频(beta)
2. 优化哔哩哔哩解析
3. Instagram增加健壮性
This commit is contained in:
zhiyu1998 2023-03-24 02:44:42 +08:00
parent dba801113e
commit 1000d21e2f
9 changed files with 341 additions and 58 deletions

View File

@ -15,17 +15,15 @@ apps -- 业务核心
config -- 配置文件 config -- 配置文件
img -- readme图片 img -- README图片
model -- 核心文件[建议不动] model -- 核心文件[建议不动]
resource -- 资源文件 resource -- 资源文件
test -- 爬虫文件[python]
utils -- 工具类 utils -- 工具类
index -- 主入口
## 🧏 ‍使用实例 ## 🧏 ‍使用实例
![help](./img/example.png) ![help](./img/example.png)
@ -59,7 +57,22 @@ sudo apt-get install ffmpeg
4. 【可选】小程序解析仅适配了[Yoimiya/Yunzai-Bot](https://gitee.com/yoimiya-kokomi/Yunzai-Bot) 4. 【可选】小程序解析仅适配了[Yoimiya/Yunzai-Bot](https://gitee.com/yoimiya-kokomi/Yunzai-Bot)
如果解析有问题参考issue[#I6MFF7](https://gitee.com/kyrzy0416/rconsole-plugin/issues/I6MFF7) 如果解析有问题参考issue[#I6MFF7](https://gitee.com/kyrzy0416/rconsole-plugin/issues/I6MFF7)
![小程序解析](./img/example6.png) <img src="./img/example6.png" alt="小程序解析" style="zoom:50%;" />
5. 【可选】对哔哩哔哩解析进行总结需要填写openaiApiKey和哔哩哔哩的SESSDATA
<img src="./img/example7.png" alt="小程序解析" style="zoom:50%;" />
> 哔哩哔哩的SESSDATA
>
> 进入网站 -- 打开F12开发者选项 -- 应用 -- 找到Cookie -- 找到SESSDATA -- 复制 -- 粘贴到tools.yaml or 锅巴
> 如何获取openaiApiKey
>
> 1. 要有一个openai的账号https://chat.openai.com/
> 2. 获取api-key参考https://chatgpt.cn.obiscr.com/blog/posts/2023/How-to-get-api-key/
注:以上条件缺一不可(**SESSDATA+OpenAiApiKey+魔法**),不然无法触发
## 🧑‍🌾 进阶内容 ## 🧑‍🌾 进阶内容
【可选】相关配置(apps/tools.js) 【可选】相关配置(apps/tools.js)
@ -72,6 +85,18 @@ sudo apt-get install ffmpeg
## 📦 业务 ## 📦 业务
![help](./img/help.jpg) ![help](./img/help.jpg)
## 📝 计划功能
- [ ] YouTube解析这个可能要🕊一久
- [x] Instagram解析
- [ ] 单张图片解析
- [ ] 视频解析
- [ ] 哔哩哔哩总结
- [ ] 精简工具功能冗余代码
- [ ] 自由修改插件优先级
- [ ] ...
## 🔗 链接 ## 🔗 链接
- [云崽](https://gitee.com/yoimiya-kokomi/Yunzai-Bot) - [云崽](https://gitee.com/yoimiya-kokomi/Yunzai-Bot)

View File

@ -10,11 +10,14 @@ import HttpProxyAgent from "https-proxy-agent";
import { mkdirsSync } from "../utils/file.js"; import { mkdirsSync } from "../utils/file.js";
import { downloadBFile, getDownloadUrl, mergeFileToMp4, getDynamic } from "../utils/bilibili.js"; import { downloadBFile, getDownloadUrl, mergeFileToMp4, getDynamic } from "../utils/bilibili.js";
import { parseUrl, parseM3u8, downloadM3u8Videos, mergeAcFileToMp4 } from "../utils/acfun.js"; import { parseUrl, parseM3u8, downloadM3u8Videos, mergeAcFileToMp4 } from "../utils/acfun.js";
import { transMap, douyinTypeMap, TEN_THOUSAND, XHS_CK } from "../utils/constant.js"; import { transMap, douyinTypeMap, XHS_CK } from "../utils/constant.js";
import { getIdVideo, generateRandomStr } from "../utils/common.js"; import { getIdVideo, generateRandomStr } from "../utils/common.js";
import config from "../model/index.js"; import config from "../model/index.js";
import Translate from "../utils/trans-strategy.js"; import Translate from "../utils/trans-strategy.js";
import { getXB } from "../utils/x-bogus.js"; import { getXB } from "../utils/x-bogus.js";
import { getVideoInfo } from "../utils/biliInfo.js";
import { getBiliGptInputText } from "../utils/biliSummary.js";
import { ChatGPTClient } from "@waylaidwanderer/chatgpt-api";
export class tools extends plugin { export class tools extends plugin {
constructor() { constructor() {
@ -81,12 +84,22 @@ export class tools extends plugin {
// 视频保存路径 // 视频保存路径
this.defaultPath = this.toolsConfig.defaultPath; this.defaultPath = this.toolsConfig.defaultPath;
// 代理接口 // 代理接口
// TODO 填写服务器的内网ID和clash的端口
this.proxyAddr = this.toolsConfig.proxyAddr; this.proxyAddr = this.toolsConfig.proxyAddr;
this.proxyPort = this.toolsConfig.proxyPort; this.proxyPort = this.toolsConfig.proxyPort;
this.myProxy = `http://${this.proxyAddr}:${this.proxyPort}`; this.myProxy = `http://${this.proxyAddr}:${this.proxyPort}`;
// 加载twitter配置 // 加载哔哩哔哩配置
this.bearerToken = this.toolsConfig.bearerToken; this.biliSessData = this.toolsConfig.biliSessData;
// 加载gpt配置
this.openaiApiKey = this.toolsConfig.openaiApiKey;
// 加载gpt客户端
this.chatGptClient = new ChatGPTClient(this.openaiApiKey, {
modelOptions: {
model: "gpt-3.5-turbo",
temperature: 0,
},
proxy: this.myProxy,
debug: false,
});
} }
// 翻译插件 // 翻译插件
@ -143,13 +156,16 @@ export class tools extends plugin {
// const url = `https://www.iesdouyin.com/aweme/v1/web/aweme/detail/?aweme_id=${ douId }&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333`; // const url = `https://www.iesdouyin.com/aweme/v1/web/aweme/detail/?aweme_id=${ douId }&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333`;
// 感谢 Evil0ctalhttps://github.com/Evil0ctal提供的header 和 B1gM8chttps://github.com/B1gM8c的逆向算法X-Bogus // 感谢 Evil0ctalhttps://github.com/Evil0ctal提供的header 和 B1gM8chttps://github.com/B1gM8c的逆向算法X-Bogus
const headers = { const headers = {
'accept-encoding': 'gzip, deflate, br', "accept-encoding": "gzip, deflate, br",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', "User-Agent":
'referer': 'https://www.douyin.com/', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
'cookie': "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;" referer: "https://www.douyin.com/",
} cookie: "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;",
};
const dyApi = "https://www.douyin.com/aweme/v1/web/aweme/detail/?"; const dyApi = "https://www.douyin.com/aweme/v1/web/aweme/detail/?";
const params = `msToken=${generateRandomStr(107)}&device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=${douId}&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079`; const params = `msToken=${generateRandomStr(
107,
)}&device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=${douId}&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079`;
// xg参数 // xg参数
const xbParam = getXB(params.replaceAll("&", "%26")); const xbParam = getXB(params.replaceAll("&", "%26"));
// const param = resp.data.result[0].paramsencode; // const param = resp.data.result[0].paramsencode;
@ -197,7 +213,7 @@ export class tools extends plugin {
// console.log(no_watermark_image_list) // console.log(no_watermark_image_list)
await this.reply(await Bot.makeForwardMsg(no_watermark_image_list)); await this.reply(await Bot.makeForwardMsg(no_watermark_image_list));
} }
}); });
}); });
return true; return true;
} }
@ -291,7 +307,6 @@ export class tools extends plugin {
url = url.substring(0, url.indexOf("?")); url = url.substring(0, url.indexOf("?"));
} }
const dynamicId = /[^/]+(?!.*\/)/.exec(url)[0]; const dynamicId = /[^/]+(?!.*\/)/.exec(url)[0];
// console.log(dynamicId)
getDynamic(dynamicId).then(async resp => { getDynamic(dynamicId).then(async resp => {
if (resp.dynamicSrc.length > 0) { if (resp.dynamicSrc.length > 0) {
e.reply(`识别:哔哩哔哩动态, ${resp.dynamicDesc}`); e.reply(`识别:哔哩哔哩动态, ${resp.dynamicDesc}`);
@ -304,9 +319,6 @@ export class tools extends plugin {
}); });
}); });
await this.reply(await Bot.makeForwardMsg(dynamicSrcMsg)); await this.reply(await Bot.makeForwardMsg(dynamicSrcMsg));
// resp.dynamicSrc.forEach(item => {
// e.reply(segment.image(item));
// });
} else { } else {
e.reply(`识别:哔哩哔哩动态, 但是失败!`); e.reply(`识别:哔哩哔哩动态, 但是失败!`);
} }
@ -320,41 +332,8 @@ export class tools extends plugin {
} }
// 视频信息获取例子http://api.bilibili.com/x/web-interface/view?bvid=BV1hY411m7cB // 视频信息获取例子http://api.bilibili.com/x/web-interface/view?bvid=BV1hY411m7cB
// 请求视频信息 // 请求视频信息
(function () { const { title, combineContent, aid, cid } = await getVideoInfo(url);
const baseVideoInfo = "http://api.bilibili.com/x/web-interface/view"; e.reply([title, combineContent]);
const videoId = /video\/[^\?\/ ]+/.exec(url)[0].split("/")[1];
// 获取视频信息,然后发送
fetch(
videoId.startsWith("BV")
? `${baseVideoInfo}?bvid=${videoId}`
: `${baseVideoInfo}?aid=${videoId}`,
).then(async resp => {
const respJson = await resp.json();
const respData = respJson.data;
// 视频标题
const title = "识别:哔哩哔哩," + respData.title + "\n";
// 视频图片(暂时不加入,影响性能)
// const videoCover = respData.pic;
// 视频信息
let { view, danmaku, reply, favorite, coin, share, like } = respData.stat;
// 数据处理
const dataProcessing = data => {
return Number(data) >= TEN_THOUSAND
? (data / TEN_THOUSAND).toFixed(1) + "万"
: data;
};
// 组合内容
const combineContent = `总播放量:${dataProcessing(
view,
)}, 弹幕数量${dataProcessing(danmaku)}, 回复量${dataProcessing(
reply,
)}, 收藏数${dataProcessing(favorite)}, 投币${dataProcessing(
coin,
)}, 分享${dataProcessing(share)}, 点赞${dataProcessing(like)}\n`;
const msgCombine = [title, combineContent /*, segment.image(videoCover)*/];
await e.reply(msgCombine);
});
})();
await getDownloadUrl(url) await getDownloadUrl(url)
.then(data => { .then(data => {
@ -371,6 +350,14 @@ export class tools extends plugin {
logger.error(err); logger.error(err);
e.reply("解析失败,请重试一下"); e.reply("解析失败,请重试一下");
}); });
// 如果有ck 并且 有openai的key
if (this.biliSessData && this.openaiApiKey) {
const prompt = await getBiliGptInputText(title, aid, cid);
const response = await this.chatGptClient.sendMessage(prompt);
// 暂时不设计上下文
e.reply(response.response);
}
return true; return true;
} }
@ -862,12 +849,17 @@ export class tools extends plugin {
.catch(err => reject(err)); .catch(err => reject(err));
}); });
}; };
await fetch(API).then(async resp => { await fetch(API, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
},
}).then(async resp => {
const html = await resp.text(); const html = await resp.text();
const desc = html.match(/(?<=content=").*?(?=\")/g)?.[2]; const desc = html.match(/(?<=content=").*?(?=\")/g)?.[2];
const images = html.match(/<div class=\"swiper-slide.*?\">/g); const images = html.match(/<div class=\"swiper-slide.*?\">/g);
if (!_.isNull(images)) { if (!_.isNull(images)) {
e.reply(`识别Insta${desc || "暂无描述"}\n`) e.reply(`识别Insta${desc || "暂无描述"}\n`);
images.map((item, index) => { images.map((item, index) => {
const imgUrl = /(?<=data-src=").*?(?=")/ const imgUrl = /(?<=data-src=").*?(?=")/
.exec(item)[0] .exec(item)[0]

View File

@ -4,3 +4,6 @@ proxyPort: '7890' # 魔法端口
translateAppId: '' # 百度翻译APP ID translateAppId: '' # 百度翻译APP ID
translateSecret: '' # 百度翻译密匙 translateSecret: '' # 百度翻译密匙
biliSessData: '' # 哔哩哔哩的SESSDATA
openaiApiKey: '' # OpenAI的API Key, sk...

View File

@ -75,6 +75,28 @@ export function supportGuoba() {
placeholder: "请输入视频暂存位置", placeholder: "请输入视频暂存位置",
}, },
}, },
{
field: "tools.biliSessData",
label: "哔哩哔哩SESSDATA",
bottomHelpMessage:
"如何获取具体参考我的文档说明https://gitee.com/kyrzy0416/rconsole-plugin",
component: "Input",
required: false,
componentProps: {
placeholder: "请输入哔哩哔哩SESSDATA",
},
},
{
field: "tools.openaiApiKey",
label: "OpenAI的API-KEY",
bottomHelpMessage:
"如何获取参考文章https://chatgpt.cn.obiscr.com/blog/posts/2023/How-to-get-api-key/",
component: "Input",
required: false,
componentProps: {
placeholder: "请输入OpenAI的API-KEYsk.....",
},
},
], ],
getConfigData() { getConfigData() {
const toolsData = { const toolsData = {

BIN
img/example7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 468 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 251 KiB

After

Width:  |  Height:  |  Size: 251 KiB

View File

@ -4,6 +4,7 @@
"type": "module", "type": "module",
"dependencies": { "dependencies": {
"axios": "^1.3.4", "axios": "^1.3.4",
"tunnel": "^0.0.6" "tunnel": "^0.0.6",
"@waylaidwanderer/chatgpt-api": "^1.31.5"
} }
} }

42
utils/biliInfo.js Normal file
View File

@ -0,0 +1,42 @@
import fetch from "node-fetch";
import { TEN_THOUSAND } from "./constant.js";
async function getVideoInfo(url) {
const baseVideoInfo = "http://api.bilibili.com/x/web-interface/view";
const videoId = /video\/[^\?\/ ]+/.exec(url)[0].split("/")[1];
// 获取视频信息,然后发送
return fetch(
videoId.startsWith("BV")
? `${baseVideoInfo}?bvid=${videoId}`
: `${baseVideoInfo}?aid=${videoId}`,
).then(async resp => {
const respJson = await resp.json();
const respData = respJson.data;
// 视频标题
const title = "识别:哔哩哔哩," + respData.title + "\n";
// 视频图片(暂时不加入,影响性能)
// const videoCover = respData.pic;
// 视频信息
let { view, danmaku, reply, favorite, coin, share, like } = respData.stat;
// 数据处理
const dataProcessing = data => {
return Number(data) >= TEN_THOUSAND ? (data / TEN_THOUSAND).toFixed(1) + "万" : data;
};
// 组合内容
const combineContent = `总播放量:${dataProcessing(view)}, 弹幕数量:${dataProcessing(
danmaku,
)}, 回复量${dataProcessing(reply)}, 收藏数${dataProcessing(
favorite,
)}, 投币${dataProcessing(coin)}, 分享${dataProcessing(share)}, 点赞${dataProcessing(
like,
)}\n`;
return {
title,
combineContent,
aid: respData.aid,
cid: respData.pages?.[0].cid,
};
});
}
export { getVideoInfo };

198
utils/biliSummary.js Normal file
View File

@ -0,0 +1,198 @@
const headers = {
Accept: "application/json",
"Content-Type": "application/json",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
Host: "api.bilibili.com",
Cookie: `SESSDATA=92ce67f8%2C1695139091%2C5151e%2A31`,
};
const commonConfig = {
method: "GET",
cache: "no-cache",
headers,
referrerPolicy: "no-referrer",
};
/**
* 获取gpt提取视频信息的文字
* @param title 视频标题
* @param aid
* @param cid
* @param shouldShowTimestamp 是否在每段字幕前面加入时间标识
* @returns {Promise<string>}
*/
export async function getBiliGptInputText(title, aid, cid, shouldShowTimestamp = false) {
// https://api.bilibili.com/x/player/v2?aid=438937138&cid=1066979272
const resp = await fetch(
`https://api.bilibili.com/x/player/v2?aid=${aid}&cid=${cid}`,
commonConfig,
);
const subtitles = (await resp.json()).data.subtitle.subtitles;
const res = await fetch(`http:${subtitles[0].subtitle_url}`);
const subtitlesData = (await res.json()).body;
const subtitleTimestamp = reduceBilibiliSubtitleTimestamp(subtitlesData, shouldShowTimestamp);
const inputText = getSmallSizeTranscripts(subtitleTimestamp, subtitleTimestamp);
const videoConfig = {
showEmoji: false,
};
const userPrompt = shouldShowTimestamp
? getUserSubtitleWithTimestampPrompt(title, inputText, videoConfig)
: getUserSubtitlePrompt(title, inputText, videoConfig);
return userPrompt;
}
// 以下拼接算法来自https://github.com/JimmyLv/BibiGPT
function reduceBilibiliSubtitleTimestamp(subtitles = [], shouldShowTimestamp) {
return reduceSubtitleTimestamp(
subtitles,
i => i.from,
i => i.content,
shouldShowTimestamp,
);
}
function reduceSubtitleTimestamp(subtitles, getStart, getText, shouldShowTimestamp) {
// 把字幕数组总共分成 20 组
const TOTAL_GROUP_COUNT = 30;
// 如果字幕不够多就每7句话合并一下
const MINIMUM_COUNT_ONE_GROUP = 7;
const eachGroupCount =
subtitles.length > TOTAL_GROUP_COUNT
? subtitles.length / TOTAL_GROUP_COUNT
: MINIMUM_COUNT_ONE_GROUP;
return subtitles.reduce((accumulator, current, index) => {
// 计算当前元素在哪一组
const groupIndex = Math.floor(index / MINIMUM_COUNT_ONE_GROUP);
// 如果是当前组的第一个元素,初始化这一组的字符串
if (!accumulator[groupIndex]) {
accumulator[groupIndex] = {
// 5.88 -> 5.9
// text: current.start.toFixed() + ": ",
index: groupIndex,
s: getStart(current),
text: shouldShowTimestamp ? getStart(current) + " - " : "",
};
}
// 将当前元素添加到当前组的字符串末尾
accumulator[groupIndex].text = accumulator[groupIndex].text + getText(current) + " ";
return accumulator;
}, []);
}
function getSmallSizeTranscripts(newTextData, oldTextData, byteLimit = 6200) {
const text = newTextData
.sort((a, b) => a.index - b.index)
.map(t => t.text)
.join(" ");
const byteLength = getByteLength(text);
if (byteLength > byteLimit) {
const filtedData = filterHalfRandomly(newTextData);
return getSmallSizeTranscripts(filtedData, oldTextData, byteLimit);
}
let resultData = newTextData.slice();
let resultText = text;
let lastByteLength = byteLength;
for (let i = 0; i < oldTextData.length; i++) {
const obj = oldTextData[i];
if (itemInIt(newTextData, obj.text)) {
continue;
}
const nextTextByteLength = getByteLength(obj.text);
const isOverLimit = lastByteLength + nextTextByteLength > byteLimit;
if (isOverLimit) {
const overRate = (lastByteLength + nextTextByteLength - byteLimit) / nextTextByteLength;
const chunkedText = obj.text.substring(0, Math.floor(obj.text.length * overRate));
resultData.push({ text: chunkedText, index: obj.index });
} else {
resultData.push(obj);
}
resultText = resultData
.sort((a, b) => a.index - b.index)
.map(t => t.text)
.join(" ");
lastByteLength = getByteLength(resultText);
}
return resultText;
}
function filterHalfRandomly(arr) {
const filteredArr = [];
const halfLength = Math.floor(arr.length / 2);
const indicesToFilter = new Set();
// 随机生成要过滤掉的元素的下标
while (indicesToFilter.size < halfLength) {
const index = Math.floor(Math.random() * arr.length);
if (!indicesToFilter.has(index)) {
indicesToFilter.add(index);
}
}
// 过滤掉要过滤的元素
for (let i = 0; i < arr.length; i++) {
if (!indicesToFilter.has(i)) {
filteredArr.push(arr[i]);
}
}
return filteredArr;
}
function getByteLength(text) {
return unescape(encodeURIComponent(text)).length;
}
function itemInIt(textData, text) {
return textData.find(t => t.text === text) !== undefined;
}
function getUserSubtitlePrompt(title, transcript, videoConfig) {
const videoTitle = title?.replace(/\n+/g, " ").trim();
const videoTranscript = limitTranscriptByteLength(transcript).replace(/\n+/g, " ").trim();
const language = "zh-CN";
const sentenceCount = videoConfig.sentenceNumber || 7;
const emojiTemplateText = videoConfig.showEmoji ? "[Emoji] " : "";
const emojiDescriptionText = videoConfig.showEmoji
? "Choose an appropriate emoji for each bullet point. "
: "";
const shouldShowAsOutline = Number(videoConfig.outlineLevel) > 1;
const wordsCount = videoConfig.detailLevel ? (Number(videoConfig.detailLevel) / 100) * 2 : 15;
const outlineTemplateText = shouldShowAsOutline ? `\n - Child points` : "";
const outlineDescriptionText = shouldShowAsOutline
? `Use the outline list, which can have a hierarchical structure of up to ${videoConfig.outlineLevel} levels. `
: "";
const prompt = `Your output should use the following template:\n## Summary\n## Highlights\n- ${emojiTemplateText}Bulletpoint${outlineTemplateText}\n\nYour task is to summarise the text I have given you in up to ${sentenceCount} concise bullet points, starting with a short highlight, each bullet point is at least ${wordsCount} words. ${outlineDescriptionText}${emojiDescriptionText}Use the text above: {{Title}} {{Transcript}}.\n\nReply in ${language} Language.`;
return `Title: "${videoTitle}"\nTranscript: "${videoTranscript}"\n\nInstructions: ${prompt}`;
}
export function getUserSubtitleWithTimestampPrompt(title, transcript, videoConfig) {
const videoTitle = title?.replace(/\n+/g, " ").trim();
const videoTranscript = limitTranscriptByteLength(transcript).replace(/\n+/g, " ").trim();
const language = "zh-CN";
const sentenceCount = videoConfig.sentenceNumber || 7;
const emojiTemplateText = videoConfig.showEmoji ? "[Emoji] " : "";
const wordsCount = videoConfig.detailLevel ? (Number(videoConfig.detailLevel) / 100) * 2 : 15;
const promptWithTimestamp = `Act as the author and provide exactly ${sentenceCount} bullet points for the text transcript given in the format [seconds] - [text] \nMake sure that:\n - Please start by summarizing the whole video in one short sentence\n - Then, please summarize with each bullet_point is at least ${wordsCount} words\n - each bullet_point start with \"- \" or a number or a bullet point symbol\n - each bullet_point should has the start timestamp, use this template: - seconds - ${emojiTemplateText}[bullet_point]\n - there may be typos in the subtitles, please correct them\n - Reply all in ${language} Language.`;
const videoTranscripts = limitTranscriptByteLength(JSON.stringify(videoTranscript));
return `Title: ${videoTitle}\nTranscript: ${videoTranscripts}\n\nInstructions: ${promptWithTimestamp}`;
}
function limitTranscriptByteLength(str, byteLimit = 6200) {
const utf8str = unescape(encodeURIComponent(str));
const byteLength = utf8str.length;
if (byteLength > byteLimit) {
const ratio = byteLimit / byteLength;
const newStr = str.substring(0, Math.floor(str.length * ratio));
return newStr;
}
return str;
}