From af921202ba266474da78ed17b2638c5c06645722 Mon Sep 17 00:00:00 2001 From: DjangoPeng Date: Wed, 21 Aug 2024 08:20:26 +0000 Subject: [PATCH] optimize prompts, upgrade LLM to GPT-4o-mini and add Chineses comments to other modules --- prompts/report_prompt.txt | 22 ++++++++++++++ src/command_handler.py | 18 ++++++++++-- src/command_tool.py | 52 ++++++++++++++++----------------- src/config.py | 12 ++++++-- src/daemon_process.py | 54 +++++++++++++++++----------------- src/github_client.py | 6 ---- src/gradio_server.py | 37 +++++++++++++----------- src/llm.py | 24 +++++++++------ src/main.py | 61 --------------------------------------- src/report_generator.py | 7 ----- src/scheduler.py | 26 ++++++++++------- 11 files changed, 151 insertions(+), 168 deletions(-) create mode 100644 prompts/report_prompt.txt delete mode 100644 src/main.py diff --git a/prompts/report_prompt.txt b/prompts/report_prompt.txt new file mode 100644 index 00000000..095122ba --- /dev/null +++ b/prompts/report_prompt.txt @@ -0,0 +1,22 @@ +你接下来收到的都是开源项目的最新进展。 + +你根据进展,总结成一个中文的报告,以 项目名称和日期 开头,包含:新增功能、主要改进,修复问题等章节。 + +参考示例如下: + +# LangChain 项目进展 + +## 时间周期:2024-08-13至2024-08-18 + +## 新增功能 +- langchain-box: 添加langchain box包和DocumentLoader +- 添加嵌入集成测试 + +## 主要改进 +- 将@root_validator用法升级以与pydantic 2保持一致 +- 将根验证器升级为与pydantic 2兼容 + +## 修复问题 +- 修复Azure的json模式问题 +- 修复Databricks Vector Search演示笔记本问题 +- 修复Microsoft Azure Cosmos集成测试中的连接字符串问题 \ No newline at end of file diff --git a/src/command_handler.py b/src/command_handler.py index 995a9c59..5b6c0b18 100644 --- a/src/command_handler.py +++ b/src/command_handler.py @@ -2,49 +2,61 @@ import argparse +import argparse # 导入argparse库,用于处理命令行参数解析 + class CommandHandler: def __init__(self, github_client, subscription_manager, report_generator): + # 初始化CommandHandler,接收GitHub客户端、订阅管理器和报告生成器 self.github_client = github_client self.subscription_manager = subscription_manager self.report_generator = report_generator - self.parser = self.create_parser() + self.parser = self.create_parser() # 创建命令行解析器 def create_parser(self): + # 创建并配置命令行解析器 parser = argparse.ArgumentParser( description='GitHub Sentinel Command Line Interface', formatter_class=argparse.RawTextHelpFormatter ) subparsers = parser.add_subparsers(title='Commands', dest='command') + # 添加订阅命令 parser_add = subparsers.add_parser('add', help='Add a subscription') parser_add.add_argument('repo', type=str, help='The repository to subscribe to (e.g., owner/repo)') parser_add.set_defaults(func=self.add_subscription) + # 删除订阅命令 parser_remove = subparsers.add_parser('remove', help='Remove a subscription') parser_remove.add_argument('repo', type=str, help='The repository to unsubscribe from (e.g., owner/repo)') parser_remove.set_defaults(func=self.remove_subscription) + # 列出所有订阅命令 parser_list = subparsers.add_parser('list', help='List all subscriptions') parser_list.set_defaults(func=self.list_subscriptions) + # 导出每日进展命令 parser_export = subparsers.add_parser('export', help='Export daily progress') parser_export.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)') parser_export.set_defaults(func=self.export_daily_progress) + # 导出特定日期范围进展命令 parser_export_range = subparsers.add_parser('export-range', help='Export progress over a range of dates') parser_export_range.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)') parser_export_range.add_argument('days', type=int, help='The number of days to export progress for') parser_export_range.set_defaults(func=self.export_progress_by_date_range) + # 生成日报命令 parser_generate = subparsers.add_parser('generate', help='Generate daily report from markdown file') parser_generate.add_argument('file', type=str, help='The markdown file to generate report from') parser_generate.set_defaults(func=self.generate_daily_report) + # 帮助命令 parser_help = subparsers.add_parser('help', help='Show help message') parser_help.set_defaults(func=self.print_help) - return parser + return parser # 返回配置好的解析器 + # 下面是各种命令对应的方法实现,每个方法都使用了相应的管理器来执行实际操作,并输出结果信息 def add_subscription(self, args): self.subscription_manager.add_subscription(args.repo) print(f"Added subscription for repository: {args.repo}") @@ -72,4 +84,4 @@ def generate_daily_report(self, args): print(f"Generated daily report from file: {args.file}") def print_help(self, args=None): - self.parser.print_help() + self.parser.print_help() # 输出帮助信息 diff --git a/src/command_tool.py b/src/command_tool.py index 3a41a0f0..6a487743 100644 --- a/src/command_tool.py +++ b/src/command_tool.py @@ -1,40 +1,40 @@ -import shlex +import shlex # 导入shlex库,用于正确解析命令行输入 -from config import Config -from github_client import GitHubClient -from notifier import Notifier -from report_generator import ReportGenerator -from llm import LLM -from subscription_manager import SubscriptionManager -from command_handler import CommandHandler -from logger import LOG +from config import Config # 从config模块导入Config类,用于配置管理 +from github_client import GitHubClient # 从github_client模块导入GitHubClient类,用于GitHub API操作 +from notifier import Notifier # 从notifier模块导入Notifier类,用于通知功能 +from report_generator import ReportGenerator # 从report_generator模块导入ReportGenerator类,用于报告生成 +from llm import LLM # 从llm模块导入LLM类,可能用于语言模型相关操作 +from subscription_manager import SubscriptionManager # 从subscription_manager模块导入SubscriptionManager类,管理订阅 +from command_handler import CommandHandler # 从command_handler模块导入CommandHandler类,处理命令行命令 +from logger import LOG # 从logger模块导入LOG对象,用于日志记录 def main(): - config = Config() - github_client = GitHubClient(config.github_token) - notifier = Notifier(config.notification_settings) - llm = LLM() - report_generator = ReportGenerator(llm) - subscription_manager = SubscriptionManager(config.subscriptions_file) - command_handler = CommandHandler(github_client, subscription_manager, report_generator) + config = Config() # 创建配置实例 + github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例 + notifier = Notifier(config.notification_settings) # 创建通知器实例 + llm = LLM() # 创建语言模型实例 + report_generator = ReportGenerator(llm) # 创建报告生成器实例 + subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例 + command_handler = CommandHandler(github_client, subscription_manager, report_generator) # 创建命令处理器实例 - parser = command_handler.parser - command_handler.print_help() + parser = command_handler.parser # 获取命令解析器 + command_handler.print_help() # 打印帮助信息 while True: try: - user_input = input("GitHub Sentinel> ") - if user_input in ['exit', 'quit']: + user_input = input("GitHub Sentinel> ") # 等待用户输入 + if user_input in ['exit', 'quit']: # 如果输入为退出命令,则结束循环 break try: - args = parser.parse_args(shlex.split(user_input)) - if args.command is None: + args = parser.parse_args(shlex.split(user_input)) # 解析用户输入的命令 + if args.command is None: # 如果没有命令被解析,则继续循环 continue - args.func(args) - except SystemExit as e: + args.func(args) # 执行对应的命令函数 + except SystemExit as e: # 捕获由于错误命令引发的异常 LOG.error("Invalid command. Type 'help' to see the list of available commands.") except Exception as e: - LOG.error(f"Unexpected error: {e}") + LOG.error(f"Unexpected error: {e}") # 记录其他未预期的错误 if __name__ == '__main__': - main() + main() # 如果直接运行该文件,则执行main函数 diff --git a/src/config.py b/src/config.py index f93bb9a1..28c74aec 100644 --- a/src/config.py +++ b/src/config.py @@ -1,13 +1,21 @@ import json +import os class Config: def __init__(self): self.load_config() def load_config(self): + # 从环境变量获取GitHub Token + self.github_token = os.getenv('GITHUB_TOKEN') + with open('config.json', 'r') as f: config = json.load(f) - self.github_token = config.get('github_token') + + # 如果环境变量中没有GitHub Token,则从配置文件中读取 + if not self.github_token: + self.github_token = config.get('github_token') + self.notification_settings = config.get('notification_settings') self.subscriptions_file = config.get('subscriptions_file') - self.update_interval = config.get('update_interval', 24 * 60 * 60) # Default to 24 hours \ No newline at end of file + self.update_interval = config.get('update_interval', 24 * 60 * 60) # 默认24小时 diff --git a/src/daemon_process.py b/src/daemon_process.py index d084a925..2a1c791c 100644 --- a/src/daemon_process.py +++ b/src/daemon_process.py @@ -1,51 +1,53 @@ -import daemon -import threading -import time +import daemon # 导入daemon库,用于创建守护进程 +import threading # 导入threading库,用于多线程处理 +import time # 导入time库,用于控制时间间隔 - -from config import Config -from github_client import GitHubClient -from notifier import Notifier -from report_generator import ReportGenerator -from llm import LLM -from subscription_manager import SubscriptionManager -from scheduler import Scheduler -from logger import LOG +from config import Config # 导入配置管理类 +from github_client import GitHubClient # 导入GitHub客户端类,处理GitHub API请求 +from notifier import Notifier # 导入通知器类,用于发送通知 +from report_generator import ReportGenerator # 导入报告生成器类 +from llm import LLM # 导入语言模型类,可能用于生成报告内容 +from subscription_manager import SubscriptionManager # 导入订阅管理器类,管理GitHub仓库订阅 +from scheduler import Scheduler # 导入调度器类,用于定时执行任务 +from logger import LOG # 导入日志记录器 def run_scheduler(scheduler): + # 启动调度器的函数,用于在线程中运行 scheduler.start() def main(): - config = Config() - github_client = GitHubClient(config.github_token) - notifier = Notifier(config.notification_settings) - llm = LLM() - report_generator = ReportGenerator(llm) - subscription_manager = SubscriptionManager(config.subscriptions_file) + config = Config() # 创建配置实例 + github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例 + notifier = Notifier(config.notification_settings) # 创建通知器实例 + llm = LLM() # 创建语言模型实例 + report_generator = ReportGenerator(llm) # 创建报告生成器实例 + subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例 + # 创建调度器实例,配置其参数 scheduler = Scheduler( github_client=github_client, notifier=notifier, report_generator=report_generator, subscription_manager=subscription_manager, - interval=config.update_interval + interval=config.update_interval # 设置更新间隔 ) + # 创建并启动调度器运行的线程 scheduler_thread = threading.Thread(target=run_scheduler, args=(scheduler,)) - scheduler_thread.daemon = True - scheduler_thread.start() + scheduler_thread.daemon = True # 设置线程为守护线程 + scheduler_thread.start() # 启动线程 - LOG.info("Scheduler thread started.") + LOG.info("Scheduler thread started.") # 记录调度器线程已启动 - # Use python-daemon to properly daemonize the process + # 使用python-daemon库,以守护进程方式运行程序 with daemon.DaemonContext(): try: while True: - time.sleep(config.update_interval) + time.sleep(config.update_interval) # 按配置的更新间隔休眠 except KeyboardInterrupt: - LOG.info("Daemon process stopped.") + LOG.info("Daemon process stopped.") # 在接收到中断信号时记录日志 if __name__ == '__main__': main() -# nohup python3 src/daemon_process.py > logs/daemon_process.log 2>&1 & +# 启动方式:nohup python3 src/daemon_process.py > logs/daemon_process.log 2>&1 & diff --git a/src/github_client.py b/src/github_client.py index 1112122a..cf401154 100644 --- a/src/github_client.py +++ b/src/github_client.py @@ -66,9 +66,6 @@ def export_daily_progress(self, repo): file.write("\n## Issues Closed Today\n") for issue in updates['issues']: # 写入今天关闭的问题 file.write(f"- {issue['title']} #{issue['number']}\n") - file.write("\n## Pull Requests Merged Today\n") - for pr in updates['pull_requests']: # 写入今天合并的拉取请求 - file.write(f"- {pr['title']} #{pr['number']}\n") LOG.info(f"Exported daily progress to {file_path}") # 记录日志 return file_path @@ -91,9 +88,6 @@ def export_progress_by_date_range(self, repo, days): file.write(f"\n## Issues Closed in the Last {days} Days\n") for issue in updates['issues']: # 写入在指定日期内关闭的问题 file.write(f"- {issue['title']} #{issue['number']}\n") - file.write(f"\n## Pull Requests Merged in the Last {days} Days\n") - for pr in updates['pull_requests']: # 写入在指定日期内合并的拉取请求 - file.write(f"- {pr['title']} #{pr['number']}\n") LOG.info(f"Exported time-range progress to {file_path}") # 记录日志 return file_path \ No newline at end of file diff --git a/src/gradio_server.py b/src/gradio_server.py index ca8f5812..e260a2d5 100644 --- a/src/gradio_server.py +++ b/src/gradio_server.py @@ -1,38 +1,41 @@ -import gradio as gr +import gradio as gr # 导入gradio库用于创建GUI -from config import Config -from github_client import GitHubClient -from report_generator import ReportGenerator -from llm import LLM -from subscription_manager import SubscriptionManager -from logger import LOG +from config import Config # 导入配置管理模块 +from github_client import GitHubClient # 导入用于GitHub API操作的客户端 +from report_generator import ReportGenerator # 导入报告生成器模块 +from llm import LLM # 导入可能用于处理语言模型的LLM类 +from subscription_manager import SubscriptionManager # 导入订阅管理器 +from logger import LOG # 导入日志记录器 +# 创建各个组件的实例 config = Config() github_client = GitHubClient(config.github_token) llm = LLM() report_generator = ReportGenerator(llm) subscription_manager = SubscriptionManager(config.subscriptions_file) - def export_progress_by_date_range(repo, days): - raw_file_path = github_client.export_progress_by_date_range(repo, days) - report, report_file_path = report_generator.generate_report_by_date_range(raw_file_path, days) + # 定义一个函数,用于导出和生成指定时间范围内项目的进展报告 + raw_file_path = github_client.export_progress_by_date_range(repo, days) # 导出原始数据文件路径 + report, report_file_path = report_generator.generate_report_by_date_range(raw_file_path, days) # 生成并获取报告内容及文件路径 - return report, report_file_path + return report, report_file_path # 返回报告内容和报告文件路径 +# 创建Gradio界面 demo = gr.Interface( - fn=export_progress_by_date_range, - title="GitHubSentinel", + fn=export_progress_by_date_range, # 指定界面调用的函数 + title="GitHubSentinel", # 设置界面标题 inputs=[ gr.Dropdown( subscription_manager.list_subscriptions(), label="订阅列表", info="已订阅GitHub项目" - ), + ), # 下拉菜单选择订阅的GitHub项目 gr.Slider(value=2, minimum=1, maximum=7, step=1, label="报告周期", info="生成项目过去一段时间进展,单位:天"), - + # 滑动条选择报告的时间范围 ], - outputs=[gr.Markdown(), gr.File(label="下载报告")], + outputs=[gr.Markdown(), gr.File(label="下载报告")], # 输出格式:Markdown文本和文件下载 ) if __name__ == "__main__": - demo.launch(share=True, server_name="0.0.0.0") + demo.launch(share=True, server_name="0.0.0.0") # 启动界面并设置为公共可访问 + # 可选带有用户认证的启动方式 # demo.launch(share=True, server_name="0.0.0.0", auth=("django", "1234")) \ No newline at end of file diff --git a/src/llm.py b/src/llm.py index 81904f88..85f572da 100644 --- a/src/llm.py +++ b/src/llm.py @@ -1,4 +1,5 @@ import os +import json from openai import OpenAI # 导入OpenAI库用于访问GPT模型 from logger import LOG # 导入日志模块 @@ -6,18 +7,25 @@ class LLM: def __init__(self): # 创建一个OpenAI客户端实例 self.client = OpenAI() + # 从TXT文件加载提示信息 + with open("prompts/report_prompt.txt", "r", encoding='utf-8') as file: + self.system_prompt = file.read() # 配置日志文件,当文件大小达到1MB时自动轮转,日志级别为DEBUG - LOG.add("daily_progress/llm_logs.log", rotation="1 MB", level="DEBUG") + LOG.add("logs/llm_logs.log", rotation="1 MB", level="DEBUG") def generate_daily_report(self, markdown_content, dry_run=False): - # 构建一个用于生成报告的提示文本,要求生成的报告包含新增功能、主要改进和问题修复 - prompt = f"以下是项目的最新进展,根据功能合并同类项,形成一份简报,至少包含:1)新增功能;2)主要改进;3)修复问题;:\n\n{markdown_content}" - + # 使用从TXT文件加载的提示信息 + messages = [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": markdown_content}, + ] + if dry_run: # 如果启用了dry_run模式,将不会调用模型,而是将提示信息保存到文件中 LOG.info("Dry run mode enabled. Saving prompt to file.") with open("daily_progress/prompt.txt", "w+") as f: - f.write(prompt) + # 格式化JSON字符串的保存 + json.dump(messages, f, indent=4, ensure_ascii=False) LOG.debug("Prompt saved to daily_progress/prompt.txt") return "DRY RUN" @@ -27,10 +35,8 @@ def generate_daily_report(self, markdown_content, dry_run=False): try: # 调用OpenAI GPT模型生成报告 response = self.client.chat.completions.create( - model="gpt-3.5-turbo", # 指定使用的模型版本 - messages=[ - {"role": "user", "content": prompt} # 提交用户角色的消息 - ] + model="gpt-4o-mini", # 指定使用的模型版本 + messages=messages ) LOG.debug("GPT response: {}", response) # 返回模型生成的内容 diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 45055406..00000000 --- a/src/main.py +++ /dev/null @@ -1,61 +0,0 @@ -# src/main.py - -import threading -import shlex - -from argparse import ArgumentError - -from config import Config -from scheduler import Scheduler -from github_client import GitHubClient -from notifier import Notifier -from report_generator import ReportGenerator -from llm import LLM -from subscription_manager import SubscriptionManager -from command_handler import CommandHandler -from logger import LOG - -def run_scheduler(scheduler): - scheduler.start() - -def main(): - config = Config() - github_client = GitHubClient(config.github_token) - notifier = Notifier(config.notification_settings) - llm = LLM() - report_generator = ReportGenerator(llm) - subscription_manager = SubscriptionManager(config.subscriptions_file) - command_handler = CommandHandler(github_client, subscription_manager, report_generator) - - scheduler = Scheduler( - github_client=github_client, - notifier=notifier, - report_generator=report_generator, - subscription_manager=subscription_manager, - interval=config.update_interval - ) - - scheduler_thread = threading.Thread(target=run_scheduler, args=(scheduler,)) - scheduler_thread.daemon = True - # scheduler_thread.start() - - parser = command_handler.parser - command_handler.print_help() - - while True: - try: - user_input = input("GitHub Sentinel> ") - if user_input in ['exit', 'quit']: - break - try: - args = parser.parse_args(shlex.split(user_input)) - if args.command is None: - continue - args.func(args) - except SystemExit as e: - LOG.error("Invalid command. Type 'help' to see the list of available commands.") - except Exception as e: - LOG.error(f"Unexpected error: {e}") - -if __name__ == '__main__': - main() diff --git a/src/report_generator.py b/src/report_generator.py index 37aa7bdc..a1a7a5ed 100644 --- a/src/report_generator.py +++ b/src/report_generator.py @@ -20,9 +20,6 @@ def export_daily_progress(self, repo, updates): file.write("\n## Issues\n") for issue in updates['issues']: file.write(f"- {issue['title']} #{issue['number']}\n") - file.write("\n## Pull Requests\n") - for pr in updates['pull_requests']: - file.write(f"- {pr['title']} #{pr['number']}\n") return file_path def export_progress_by_date_range(self, repo, updates, days): @@ -41,9 +38,6 @@ def export_progress_by_date_range(self, repo, updates, days): file.write("\n## Issues Closed in the Last {days} Days\n") for issue in updates['issues']: file.write(f"- {issue['title']} #{issue['number']}\n") - file.write("\n## Pull Requests Merged in the Last {days} Days\n") - for pr in updates['pull_requests']: - file.write(f"- {pr['title']} #{pr['number']}\n") LOG.info(f"Exported time-range progress to {file_path}") # 记录导出日志 return file_path @@ -59,7 +53,6 @@ def generate_daily_report(self, markdown_file_path): with open(report_file_path, 'w+') as report_file: report_file.write(report) # 写入生成的报告 - LOG.info(f"Generated report saved to {report_file_path}") # 记录生成报告日志 return report, report_file_path diff --git a/src/scheduler.py b/src/scheduler.py index 0a724895..08b92437 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -1,23 +1,27 @@ # src/scheduler.py -import time +import time # 导入time库,用于控制操作间隔 class Scheduler: def __init__(self, github_client, notifier, report_generator, subscription_manager, interval=86400): - self.github_client = github_client - self.notifier = notifier - self.report_generator = report_generator - self.subscription_manager = subscription_manager - self.interval = interval + # 初始化调度器,设置必要的属性 + self.github_client = github_client # GitHub客户端实例,用于与GitHub交互 + self.notifier = notifier # 通知器实例,用于发送通知 + self.report_generator = report_generator # 报告生成器实例,用于生成报告 + self.subscription_manager = subscription_manager # 订阅管理器实例,用于管理订阅 + self.interval = interval # 调度间隔,默认为一天(86400秒) def start(self): + # 启动调度器 self.run() def run(self): + # 运行调度器 while True: - subscriptions = self.subscription_manager.list_subscriptions() + subscriptions = self.subscription_manager.list_subscriptions() # 获取当前所有订阅 for repo in subscriptions: - updates = self.github_client.export_daily_progress(repo) - markdown_file_path = self.report_generator.export_daily_progress(repo, updates) - self.report_generator.generate_daily_report(markdown_file_path) - time.sleep(self.interval) + # 遍历每个订阅的仓库,执行以下操作 + updates = self.github_client.export_daily_progress(repo) # 从GitHub客户端获取每日进展数据 + markdown_file_path = self.report_generator.export_daily_progress(repo, updates) # 将进展数据导出为Markdown文件 + self.report_generator.generate_daily_report(markdown_file_path) # 从Markdown文件生成日报 + time.sleep(self.interval) # 完成一轮操作后,休眠设定的间隔时间