[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-awwaiid--ghostwriter":3,"tool-awwaiid--ghostwriter":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":80,"owner_email":81,"owner_twitter":75,"owner_website":82,"owner_url":83,"languages":84,"stars":93,"forks":94,"last_commit_at":95,"license":96,"difficulty_score":10,"env_os":97,"env_gpu":98,"env_ram":98,"env_deps":99,"category_tags":103,"github_topics":104,"view_count":110,"oss_zip_url":111,"oss_zip_packed_at":111,"status":16,"created_at":112,"updated_at":113,"faqs":114,"releases":145},1081,"awwaiid\u002Fghostwriter","ghostwriter","Use the reMarkable2 as an interface to vision-LLMs (ChatGPT, Claude, Gemini). Ghost in the machine!","Ghostwriter 是一款将手写输入与AI生成结合的创意工具，通过reMarkable2平板实现手写内容与视觉大模型（如ChatGPT、Claude、Gemini）的互动。用户在平板上书写后，通过触控触发，AI会根据手写内容生成文本或图像回应，形成手写+屏幕的双向交流。例如用户手写提示，AI可绘制图像，实现“我写提示，AI画图”的创意流程。\n\n该工具解决了传统输入方式的局限性，让手写成为与AI对话的自然媒介。适合需要手写创作的设计师、开发者及研究人员，尤其适合探索人机交互新形式的创意工作者。其技术亮点在于将手写输入与AI生成结合，支持多模型调用，且能通过触控触发实现动态响应。用户可自定义触发区域、启用图像分割等高级功能，同时支持跨平台部署与调试。工具通过Docker和Rust开发，兼顾灵活性与实用性，为手写与AI的融合提供了新可能。","## **MAIN IDEA**\n> An experiment for the reMarkable that watches what you write and, when prompted either with a gesture or some on-screen content, can write back to the screen. This is an exploration of various interactions through this handwriting+screen medium.\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_736336692bf0.jpg\" width=\"300\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_975bc96e9fb3.png\" width=\"300\">\n\n\u003Cb>\u003Ci>I wrote the handwritten prompt, GPT-4o drew the Chihuahua!!!\u003C\u002Fi>\u003C\u002Fb>\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_d352fbec433d.gif\">\n\n## Setup\u002FInstallation\n\nYou need an `OPENAI_API_KEY` (or similar for other models) environment variable set. I did this by adding it to my ~\u002F.bashrc file on the remarkable:\n\n```sh\n# In the remarkable's ~\u002F.bashrc or before you run ghostwriter, set one or more of your keys\nexport OPENAI_API_KEY=your-key-here\nexport ANTHROPIC_API_KEY=your-key-here\nexport GOOGLE_API_KEY=your-key-here\n```\n\nInstall by getting the binary to your remarkable. On your not-remarkable (ie. your laptop):\n\n```sh\n# For the reMarkable2\nwget -O ghostwriter https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rm2\n\n# For the reMarkable Paper Pro\nwget -O ghostwriter https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rmpp\n\n# Replace this ip address with your remarkable ip address\nscp ghostwriter root@192.168.1.117:\n```\n\nThen you have to ssh over and run it. Here is how to install and run (run these on the remarkable):\n\n```sh\n# One time -- make it executable after the initial copy\nchmod +x .\u002Fghostwriter\n\n.\u002Fghostwriter --help # Get the options and see that it runs at all\n```\n\n## Usage\n\nFirst you need to start `ghostwriter` on the reMarkable. SSH into your remarkable and run:\n```\n# Use the defaults, including claude-sonnet-4-0\n.\u002Fghostwriter\n\n# Use ChatGPT with the gpt-4o-mini model\n.\u002Fghostwriter --model gpt-4o-mini\n```\n\nDraw some stuff on your screen, and then trigger the assistant by *touching\u002Ftapping the upper-right corner with your finger*. In the ssh session you'll see other touch-detections and there is a log of what happens while it is processing. You should see some dots drawn during processing and then a typewritten or drawn response!\n\n### CLI Options\n\n**Models & Engines:**\n* `--model MODEL` - Model to use (default: claude-sonnet-4-0)\n* `--engine ENGINE` - Engine: openai, anthropic, google (auto-detected from model)\n* `--engine-api-key KEY` - API key (or use env vars)\n* `--engine-base-url URL` - Custom API base URL\n\n**Behavior:**\n* `--prompt PROMPT` - Prompt file to use (default: general.json)\n* `--trigger-corner CORNER` - Touch trigger corner: UR, UL, LR, LL (default: UR)\n\n**Tools:**\n* `--no-svg` - Disable SVG drawing tool\n* `--no-keyboard` - Disable text output\n* `--thinking` - Enable model thinking (Anthropic)\n* `--web-search` - Enable web search (Anthropic)\n\n**Testing\u002FDebug\u002FExperiments:**\n* `--log-level LEVEL` - Set log level (info, debug, trace)\n* `--no-loop` - Run once and exit\n* `--input-png FILE` - Use PNG file instead of screenshot\n* `--output-file FILE` - Save output to file\n* `--save-screenshot FILE` - Save screenshot\n* `--save-bitmap FILE` - Save rendered output\n* `--no-submit` - Don't submit to model\n* `--no-draw` - Don't draw output\n* `--no-trigger` - Disable touch trigger\n* `--apply-segmentation` - Add image segmentation for spatial awareness\n\n### Run in the background\n\nTo run in the background, start it (on the remarkable) with `nohup`:\n\n```\nnohup .\u002Fghostwriter --model gpt-4o-mini &\n```\n\n(TODO: figure out how to run it on boot!)\n\n## Development\n\nI've been developing in Ubuntu, but did get it working in OSX. Generally it goes (1) install dependencies, (2) build locally but cross-compile for the reMarkable, (3) scp it over and try it out.\n\n* [Install docker](https:\u002F\u002Fdocs.docker.com\u002Fengine\u002Finstall\u002F) for cross-compiling\n* Install Rust\n  * You can also follow [instructions for rustup](https:\u002F\u002Fwww.rust-lang.org\u002Ftools\u002Finstall)\n  * Or if you want to be fancy, I prefer getting it from [asdf](https:\u002F\u002Fasdf-vm.com\u002F)\n  * Or maybe apt or brew will work?\n* Ubuntu\n  * `sudo apt-get install gcc-arm-linux-gnueabihf`\n* OSX\n  * `brew install arm-linux-gnueabihf-binutils`\n* Set up [cross-rs](https:\u002F\u002Fgithub.com\u002Fcross-rs\u002Fcross) and targets\n  * Get it from the current git version, especially for OSX\n  * `cargo install cross --git https:\u002F\u002Fgithub.com\u002Fcross-rs\u002Fcross`\n  * `rustup target add armv7-unknown-linux-gnueabihf aarch64-unknown-linux-gnu`\n* Then to build and scp it to your remarkable\n  * rm2\n    * `cross build --release --target=armv7-unknown-linux-gnueabihf`\n    * `scp target\u002Farmv7-unknown-linux-gnueabihf\u002Frelease\u002Fghostwriter root@remarkable:`\n  * rmpp\n    * `cross build --release --target=aarch64-unknown-linux-gnu`\n    * `scp target\u002Faarch64-unknown-linux-gnu\u002Frelease\u002Fghostwriter root@remarkable:`\n* I wrapped up that last bit into `build.sh`\n  * So I do either `.\u002Fbuild.sh` to build and send to my rm2\n  * Or I do `.\u002Fbuild.sh rmpp` to build and send to my rmpp\n\nMeanwhile I have another terminal where I have ssh'd to the remarkable. I ctrl-C the current running ghostwriter there, then on my host laptop I run my build script, and then back on the remarkable shell I run ghostwriter again.\n\nWhen I want to do a build for others, I tag main with like `v2026.09.21-01` and that kicks off a github action that creates the latest release.\n\n## Status \u002F Journal\n\n* **2024-10-06** - Bootstrapping\n  * Basic proof of concept works!!!\n  * Drawing back on the screen doesn't work super well; it takes the SVG output from ChatGPT and rasterizes it and then tries to draw lots of individual dots on the screen. The Remarkable flips out a bit ... and when the whole screen is a giant black square it really freaks out and doesn't complete\n  * Things that worked at least once:\n    * Writing \"Fill in the answer to this math problem... 3 + 7 =\"\n    * \"Draw a picture of a chihuahua. Use simple line-art\"\n* **2024-10-07** - Loops are the stuff of souls\n  * I got a rudimentary gesture and status display!\n  * So now you can touch in the upper-right and you get an \"X\" drawn. Then as the input is processed you get further crosses through the X. You have to erase it yourself though :)\n* **2024-10-10** - Initial virtual keyboard setup\n  * I've started to learn about using the Remarkable with a keyboard, something that I hadn't done before. It's surprisingly limited ... there is basicaly one large textarea for each page with some very basic formatting\n  * To write in that I have to make a pretend keyboard, which we can do via rM-input-devices, and I've done basic validation that it works!\n  * So now I want to introduce a mode where it always writes back to the text layer and recognizes that text comes from Machine and hadwriting from Human. Not sure that I'll like this mode\n* **2024-10-20** - Text output and other modes\n  * Slowly starting to rework the code to be less scratch-work, organized a bit\n  * Now introduced `.\u002Fghostwriter text-assist` mode, uses a virtual keyboard to respond!\n* **2024-10-21** - Binary release build\n  * Got a github action all set to do binary builds\n* **2024-10-23** - Code shuffle\n  * Doing a bit of refactoring, grouping utilities into separate files\n  * Yesterday a new Anthropic model came out (3.5-sonnet-new) which might be better at spacial awareness on the screen, so next up is to try that out in drawing-mode\n  * In any case, next I want to set it up with `tools` so that it can contextually give back an SVG or text or start to trigger external scripts, like for TODO list management\n* **2024-11-02** - Tool Time\n  * Switch to providing some tools -- draw_text and draw_svg\n  * This should make it more compatible with Anthropic?\n  * More immediately, this means now there is the one overall assistant and it decides to draw back keyboard text or SVG drawing\n* **2024-11-07** - Claude! (Anthropic)\n  * More shuffling to start to isolate the API\n  * ... and now I added Claude\u002FAnthropic!\n  * It is able to use an almost identical tool-use setup, so I should be able to merge the two\n  * So far it seems to like drawing a bit more, but it is not great at drawing and not much better at spacial awareness\n  * Maybe next on the queue will be augmenting spacial awareness through some image pre-processing and result positioning. Like detect bounding boxes, segments, etc, feed that into the model, and have the model return an array of svgs and where they should be positioned. Maybe.\n* **2024-11-22** - Manual Evaluations\n  * Starting to sketch out how an evaluation might work\n  * First I've added a bunch of parameters for recording input\u002Foutput\n  * Then I use that to record a sample input and output on the device\n  * Then I added support to run ghostwriter on my laptop using the pre-captured input (build with `.\u002Fbuild.sh local`)\n  * Next I will build some tooling around iterating on examples given different prompts or pre-processing\n  * And then if I can get enough examples maybe I'll have to make an AI judge to scale :)\n  * To help with that ... on idea is to make overlay the original input with the output but make the output a different color to make it differentiable by the judge\n  * So far this technique is looking good for SVG output, but it'd be nice to somehow render keyboard output locally too. That is tricker since the keyboard input rendering is done by the reMarkable app\n* **2024-12-02** - Initial segmenter\n  * With a LOT of help from claude\u002Fcopilot I have added a basic image segmenting step\n  * This does some basic segmenting and then gives the segment coordinates to the Vision-LLM to consider\n  * Only hooked it up with claude for now, need to merge those two models\n  * ... It helps with putting X in boxes a LOT!!\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_8b4a67a5daf7.png\" width=200 border=1> \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_a72fc9fbb8d7.png\" width=200 border=1>\n  * Need to get some automation around the evaluations\n  * The segmenter has to be explicitly enabled with `--apply-segmentation` and it assumes that you have either `--input-png` or `--save-screenshot` because it (dumbly) re-parses the png file\n  * OMG this is the first time that the math prompt got even close to putting the answer where I want! It has been getting it right, but usually types the `10` with the keyboard or places it somewhere wrong. This time it actually put it where it should be!\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_e9f3c2c4fbbd.png\" width=200 border=1>\n* **2024-12-15** - Engine Unification\n  * With the usual help from claude\u002Fcopilot and some tutorials I extracted out some polymorphic engine layer for OpenAI and Anthropic backends\n  * So now you can pass in engine and model\n  * A lot of other codebases take a model and then do a map; maybe I'll do that based on the model name or something\n  * I also got the prompt and tool definitions externalized (into a `prompts\u002F` directory) and unified, so each engine does whatever it needs to adjust for its own API\n  * In theory the `prompts\u002F` files are both bundled in the executable AND overridable at runtime with a local directory, but I haven't verified that much\n* **2024-12-18** - System Upgrade Panic\n  * I auto-update my remarkable, usually fine\n  * But I just got 3.16.2.3 and ... screenshots stopped working!\n  * So I used [codexctl](https:\u002F\u002Fgithub.com\u002FJayy001\u002Fcodexctl) to downgrade. It gave me a VERY scary \"SystemError: Update failed!\" and then the whole system locked up!\n  * ... but a reboot fixed it and the downgrade to 3.14.1.9 worked upon reboot\n  * So... I'm keeping an eye out for other reports of issues on the new version\n  * Oh yes. Now you can take prompts\u002Fgeneral.json, rename it to `james.json` and go in and add \"Your name is James\" into the prompt. Then copy that to your reMarkable\n  * Now run `.\u002Fremarkable --prompt james.json` and it has a locally modified prompt!\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_2d10bf7a8dc9.png\" width=300 border=1>\n* **2024-12-19** -- Not Quite Local\n  * On the internet they suggested a local-network Vision-LLM mode\n  * Ollama has that! So I tried...\n  * But it says that llama3.2-vision doesn't have tools :(\n  * But Groq llama-3.2 does!\n  * ... but it is not very good at tic-tac-toe (this is the 90b). Though it is very fast!\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_9a26af0dc50d.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_44e93ce78c46.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_88a80d972772.png\" width=200 border=1>\n  * Oops! I forgot to turn on segmentation. Here it is with that enabled which should give a better sense of space...\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_7eec86c1b2a2.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_561b1a0e392c.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_91931fce8f51.png\" width=200 border=1>\n  * Here are 3 runs from claude in contrast\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_b488a47f8dbd.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_8b2b3f6c8f2f.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_97d2becef2f3.png\" width=200 border=1>\n  * Well. The new ENV is `OPENAI_BASE_URL`, so `OPENAI_BASE_URL=https:\u002F\u002Fapi.groq.com\u002Fopenai .\u002Fghostwriter --engine openai --model llama-3.2-90b-vision-preview` for example\n* **2024-12-22** -- Starting to Evaluate\n  * Starting to build out the evaluation system a bit more, including a [basic script to kick it all off](run_eval.sh)\n  * Right now it is a hard-wired set of parameters which basically turn on\u002Foff segmentation and use either Claude 3.5 Sonnet or ChatGPT 4o-mini\n  * See [the initial evaluation report](evaluation_results\u002F2024-12-21_13-57-31\u002Fresults.md)!\n  * I think markdown doesn't let me lay this out how I want, so will probably switch to html (maybe turn on github site hosting for it)\n  * This is starting to get into the terratory where it can take some time and money to execute ... running this a bunch of times and I sent like $1. Not sure how long it took. but there were 48 executions in this final report\n  * Oh -- I think it's rather important to run each set a few times assuming there is some temperature involved\n  * To scale this even further we of course would want to bring in a JUDGE-BOT!\n  * Then I could say things like \"my new segmentation algorithm improved output quality by 17% per the JUDGE-BOT\" etc\n* **2024-12-25** -- CLI simplify and expand\n  * Now you can pass just `-m gpt-4o-mini` and it will guess the engine is `openai`\n  * You can also pass `--engine-api-key` and `--engine-url-base`\n  * So now to use [Groq](https:\u002F\u002Fgroq.com\u002F): `.\u002Fghostwriter -m llama-3.2-90b-vision-preview --engine-api-key $GROQ_API_KEY --engine openai --engine-base-url https:\u002F\u002Fapi.groq.com\u002Fopenai`\n  * ... but so far Llama 3.2 90b vision is still quite bad with this interface\n  * I turned off a bunch of debugging. Now I'll need to go back and introduce log-level or something\n  * BONUS: And now I've added Google Gemini! Try `-m gemini-2.0-flash-exp` with your `GOOGLE_API_KEY` set!\u003Cbr \u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_ea7b18aa3aa0.png\" width=200 border=1>\n* **2024-12-28** -- Usability\n  * I used a powered usb-hub to get an external keyboard plugged in, trying to see what sort of keyboard shortcuts we might have\n  * That helped to get a further sense for where the keyboard input goes\n  * So now I'm sending an extra touch-event in the bottom-center of the screen which will make the next keyboard input always go below the lowest element, which is what I wanted. Before it would go below the most recent typed text, so if you drew under that it would get confusing. Before, the answer to \"what is your favorite color?\" would have been placed directly below the first typed output; now it is nice and neatly put lower down! Also I guess this is a dream-bubble of a sheep?\u003Cbr \u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_924db43b0e3b.png\" width=300 border=1>\n* **2025-03-03** -- reMarkaple Paper Pro!!!\n  * This project hit [hackernews](https:\u002F\u002Fnews.ycombinator.com\u002Fitem?id=42979986) and [reddit r\u002FremarkableTablet](https:\u002F\u002Fwww.reddit.com\u002Fr\u002FRemarkableTablet\u002Fcomments\u002F1ikhpm5\u002Fthis_is_wild\u002F)!\n  * One bit of feedback I got is ... [request for the reMarkable Paper Pro](https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F3)\n  * ... but I didn't have one of those\n  * ... but BestBuy had them on display and I got to play with one and it was nice\n  * ... so now I have one\n  * ... and now Ghostwriter works on that too!\n  * The bits where the screens and inputs are slightly different was expected\n  * But what wasn't expected is that the `uinput` kernel module is not included in the device. But I used the [the linux published by reMarkable](https:\u002F\u002Fgithub.com\u002FreMarkable\u002Flinux-imx-rm) to build and bundle it\n  * So now when you run ghostwriter and the uinput module isn't loaded it will try to load it\n  * This is going to be a BIG PAIN since different linux versions are not compatible with each other and every new reMarkable release usually gets a new linux...\n* **2025-04-26** -- More reMarkable Paper Pro, trying pen-SVG-drawing\n  * The `uinput` module is still not compiled by default, but I got it sorted out to load whatever module is needed\n  * So now it has the one for 3.16, 3.17, and 3.18\n  * In a branch, I've been working on using [uSVG](https:\u002F\u002Fdocs.rs\u002Fusvg\u002Flatest\u002Fusvg\u002F) and [svg2polylines](https:\u002F\u002Fdocs.rs\u002Fcrate\u002Fsvg2polylines\u002Flatest) to make SVG drawing both better and more fun; it currently rasterizes and draws dots (stipple) which kinda works but is sideways\n* **2025-05-10** -- Anthropic `thinking` and `web_search`!\n  * Added thinking and thinking-tokens for anthropic!\n  * Handles the new response that shows the thinking, but doesn't send to the screen\n  * ALSO added web-search for anthropic, they now do this server side!\n  * Not turned on by default quite yet, but you can run `.\u002Fghostwriter --thinking --web-search` to get it all\n* **2025-05-17** -- Fix rm2\n  * Thanks to [YOUSY0US3F](https:\u002F\u002Fgithub.com\u002FYOUSY0US3F) for fixing the rm2 screen capture!\n* **2025-09-21** -- Fix rmpp, code format, add some things\n  * Updating after a while, I was getting some weird responses. Debugging the internal dialog showed that it wasn't getting a good screenshot\n  * Turns out that in 3.20 the screen resolution changed?! The [PR over on goRemarkableStream](https:\u002F\u002Fgithub.com\u002Fowulveryck\u002FgoMarkableStream\u002Fissues\u002F134) describes it and it was an easy fix\n  * Also at a user requested I added `--no-svg` to fully disable svg tool, though you can also do that in a custom prompt\n  * Thinking about custom prompts and how annoying it is to set up, I'm now contemplating a web-interface that would let you enter API keys, manage prompts, and maybe do some debugging\n  * Last time I worked on this was before I started using claude-code. I'm having it do some work for me now\n  * Added `--trigger-corner LR` (and similar) to set the corner for activation\n\n## Ideas\n* [DONE] Matt showed me his iOS super calc that just came out, take inspiration from that!\n  * This already kinda works, try writing an equation\n* [DONE] A gesture or some content to trigger the request\n  * like an x in a certain place\n  * or a hover circle -- doesn't need to be an actual touch event per se\n* [DONE] Take a screenshot, feed it into a vision model, get some output, put the output back on the screen somehow\n* [DONE] Like with actual writing; or heck it can draw a million dots on the screen if it does it fast\n* [DONE] OK ... we can also send *keyboard* events! That means we can use the Remarkable text area. This is an awkward and weird text area that lives on a different layer from the drawing\n  * So maybe we can say drawing = human, text = machine\n  * Probably a lot easier to erase too...\n* [DONE] Basic Evaluation\n  * Create a set of screenshots for inputs\n  * Represent different use-cases\n  * Some of these, such as TODO-extraction, might have specific expectations for output or execution, but most of them won't\n  * Run through the system to get example output -- text, svg, actions\n  * Write a test suite to judge the results .... somewhat human powered? Separate Vision-LLM judge?\n* [WIP] Prompt library\n  * There is already the start of this in \u003Ca href=\"prompts\u002F\">prompts\u002F\u003C\u002Fa>\n  * The idea is to give a set of tools (maybe actual llm \"tools\") that can be configured in the prompt\n  * But also could put in there some other things ... like an external command that gets run for the tool\n  * Example: a prompt that is good at my todo list management. It would look for \"todo\", extract that into a todo, and then run `add-todo.sh` or something\n    * (which would in turn ssh somewhere to add something to taskwarrior)\n* Initial config\n  * On first run (or with a flag), create a config file\n  * Could prompt for openai key and then write it into the file\n  * Maybe an auto-start, auto-recovery?\n* Generate Diagrams\n  * Let one of the outputs be plantuml and\u002For mermaid, and then turn that into an SVG\u002Fpng that it then outputs to the screen\n* External stuff\n  * Let it look things up\n  * Let it send me stuff ... emails, slacks\n* Conversation Mode\n  * On a single screen, keep track of each version of the screen between turns\n  * So first send would be the screen\n  * Second send would be the original screen and then the response screen (maybe with claude output in red) and then the new additions (maybe in green?)\n    * This could then be a whole chain for the page\n    * Could have two separate buttons to trigger the Vision-LLM -- one for \"new prompt\" and one for \"continue\"\n  * OR we could make it so that every time it was the last three:\n    * Black: Original\n    * Red: Claude response\n    * Green: New input\n  * Or could use the same color structure but a whole chain of messages?\n  * Might be weird when we go to a new blank page though. It'd look like the new input erased everything\n  * In general this would also make it easier to handle scrolling maybe\n  * Maybe two different triggers -- a continuation trigger and a start-anew trigger\n* Run off of a network-local Vision-LLM (like ollama)\n  * First attempt at using the OpenAI-API compatible ollama failed; the ollama LLAMA 3.2 vision model doesn't support tools\n  * Though Groq has a modified llama-3.2-vision that DOES have tools... but it isn't nearly as good as ChatGPT, Claude, or Gemini.\n* Streaming LLM services with interruption\n* Use async to give feedback faster and in parallel\n* Try out the new OpenAI responses API\n* See if we can incorporate MCP (Model Context Protocol)\n  * Maybe a proxy to a cloud hosted thing?\n* Allow non-tool-use responses to either be ignored or for regular text to be turned into keyboard (draw_text) tool\n* Integrated web interface to set up and manage configuration, maybe do some debugging\n\n## References\n* Generally pulled resources from [Awesome reMarkable](https:\u002F\u002Fgithub.com\u002FreHackable\u002Fawesome-reMarkable)\n* Adapted screen capture from [reSnap](https:\u002F\u002Fgithub.com\u002Fcloudsftp\u002FreSnap)\n* Techniques for screen-drawing inspired from [rmkit lamp](https:\u002F\u002Fgithub.com\u002Frmkit-dev\u002Frmkit\u002Fblob\u002Fmaster\u002Fsrc\u002Flamp\u002Fmain.cpy)\n* Super cool SVG-to-png done with [resvg](https:\u002F\u002Fgithub.com\u002FRazrFalcon\u002Fresvg)\n* Make the keyboard input device even without a keyboard via [rM-input-devices](https:\u002F\u002Fgithub.com\u002Fpl-semiotics\u002FrM-input-devices)\n* Not quite the same, but I recently found [reMarkableAI](https:\u002F\u002Fgithub.com\u002Fnickian\u002FreMarkableAI) that does OCR→OpenAI→PDF→Device\n* Another reMarkable-LLM interface is [rMAI](https:\u002F\u002Fgithub.com\u002FStarNumber12046\u002FrMAI). This one is a separate app (not trying to integrate in with simulated pen\u002Fkeyboard input) and uses [replicate](https:\u002F\u002Freplicate.com) as the model API service\n* I haven't adopted anything from it yet, but [Crazy Cow](https:\u002F\u002Fgithub.com\u002Fmachinelevel\u002Fsp425-crazy-cow) is a cool\u002Fcrazy tool that turns text into pen strokes for the reMarkable1\n\n## Scratch Notes\n\n```\n\n# Record an evaluation on the device\n.\u002Fghostwriter --output-file tmp\u002Fresult.out --model-output-file tmp\u002Fresult.json --save-screenshot tmp\u002Finput.png --no-draw-progress --save-bitmap tmp\u002Fresult.png claude-assist\n\n# On local, copy the evaluation to local and then put it into a folder\nexport evaluation_name=tic_tac_toe_1\nrm tmp\u002F*\nscp -r remarkable:tmp\u002F .\u002F\nmkdir -p evaluations\u002F$evaluation_name\nmv tmp\u002F* evaluations\u002F$evaluation_name\n\n# Run an evaluation\n.\u002Ftarget\u002Frelease\u002Fghostwriter --input-png evaluations\u002F$evaluation_name\u002Finput.png --output-file tmp\u002Fresult.out --model-output-file tmp\u002Fresult.json --save-bitmap tmp\u002Fresult.png --no-draw --no-draw-progress --no-loop --no-trigger claude-assist\n\n# Layer the input and output\nconvert \\( evaluations\u002F$evaluation_name\u002Finput.png -colorspace RGB \\) \\( tmp\u002Fresult.png -type truecolormatte -transparent white -fill red -colorize 100 \\) -compose Over -composite tmp\u002Fmerged-output.png\n```\n\n### Building uinput for virtual keyboard input\n\nTo type back to the user, we plug in a virtual USB keybaord (which is treated like the keyboard on the Remarkable Folio). The rm2 works out of the box, but the rmpp does not have uinput built into the kernel and does not ship with it as a module, so we have to compile it ourselves.\n\nYou don't have to do this if I have done it already!\n\n* `git clone https:\u002F\u002Fgithub.com\u002FreMarkable\u002Flinux-imx-rm`\n* switch to target release branch\n* extract following directions and large git support\n* edit arch\u002Farm64\u002Fconfigs\u002Fferrari_defconfig\n* Add `CONFIG_INPUT_UINPUT=m`\n* Follow the readme to build:\n\n```\nexport make=make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-\nmake ferrari_defconfig\nmake -j$(nproc)\nmake INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=.\u002Foutput modules_install\n```\n\n* copy output\u002Flib\u002Fmodules\u002F...\u002Fkernel\u002Fdrivers\u002Finput\u002Fmisc\u002Fuinput.ko over to utils\u002Frmpp\u002Fuinput-VERSION.ko\n* This will be bundled and automatically loaded\n* ... so in theory as long as I do it and commit it here to this repo you won't have to\n\n### Prompt \u002F Tool ideas:\n* There are a few models for tools -- each tool can be re-usable and generalized or each tool could include things like extra-inputs for chain-of thought and hints for what goes into each parameter\n* The prompts should be plain JSON or YAML and should be normalized across V\u002FLLM models\n* A general direction I'm thinking is to have top-level \"modes\" that each have a main prompt and a set of tools they can use\n* But maybe there can be a whole state-machine flow that the follow also?\n* So like ... a math-helper might have a different state-machine than a todo-helper\n* The states would be start, intermediate, and terminal\n* The terminal states should all have some output or effect, those are the ones that do something\n* The start state is the initial prompt\n* One intermediate state could be `thinking` where it can use the input of the tool as a place to write out thoughts, and the output of the tool is ignored\n* But overall what we're leading to here is a system where the prompts are easy to write, easy to copy\u002Fpaste, easy to maintain\n* And then maybe we can have a set of evals or examples that are easy to use on top of a prompt mode\n* Increasingly, the reMarkable case might HAPPEN to be a specific prompt we set up in this system, and the rest could be extracted as a framework...\n* So the state machine could be:\n\n```mermaid\nstateDiagram-v2\n    [*] --> Screenshot\n    Screenshot --> OutputScreen\n    Screenshot --> OutputKeyboardText\n```\n\n```mermaid\nstateDiagram-v2\n    [*] --> WaitForTouch\n    WaitForTouch --> Screenshot\n    Screenshot --> OutputScreen\n    Screenshot --> OutputKeyboardText\n    OutputScreen --> [*]\n    OutputKeyboardText --> [*]\n```\n\n```mermaid\nstateDiagram-v2\n    [*] --> WaitForTouch\n    WaitForTouch --> Screenshot\n    Screenshot --> Thinking\n    Thinking --> Thinking\n    Thinking --> OutputScreen\n    Thinking --> OutputKeyboardText\n    OutputScreen --> [*]\n    OutputKeyboardText --> [*]\n```\n\n","## **主要概念**\n> 一个在 reMarkable 上运行的实验项目，它会观察你的书写内容，并在通过手势或屏幕内容触发时，向屏幕回写内容。这是对通过手写+屏幕媒介进行交互的探索。\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_736336692bf0.jpg\" width=\"300\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_975bc96e9fb3.png\" width=\"300\">\n\n\u003Cb>\u003Ci>我手写输入提示词，GPT-4o 绘制了这只吉娃娃犬！！！\u003C\u002Fi>\u003C\u002Fb>\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_d352fbec433d.gif\">\n\n## 设置\u002F安装\n\n需要设置 `OPENAI_API_KEY`（或其他模型对应的密钥）环境变量。我在 reMarkable 的 ~\u002F.bashrc 文件中添加了该变量：\n\n```sh\n# 在 reMarkable 的 ~\u002F.bashrc 文件中或运行 ghostwriter 前设置密钥\nexport OPENAI_API_KEY=your-key-here\nexport ANTHROPIC_API_KEY=your-key-here\nexport GOOGLE_API_KEY=your-key-here\n```\n\n通过将二进制文件传输到 reMarkable 进行安装。在非 reMarkable 设备（如笔记本电脑）上执行：\n\n```sh\n# 针对 reMarkable2\nwget -O ghostwriter https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rm2\n\n# 针对 reMarkable Paper Pro\nwget -O ghostwriter https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rmpp\n\n# 将IP地址替换为你的 reMarkable IP 地址\nscp ghostwriter root@192.168.1.117:\n```\n\n然后需要通过 SSH 登录并运行它。以下是安装和运行方法（在 reMarkable 上执行）：\n\n```sh\n# 首次运行时赋予执行权限\nchmod +x .\u002Fghostwriter\n\n.\u002Fghostwriter --help # 查看选项并验证是否能正常运行\n```\n\n## 使用方法\n\n首先需要在 reMarkable 上启动 `ghostwriter`。通过 SSH 登录后运行：\n```\n# 使用默认模型 claude-sonnet-4-0\n.\u002Fghostwriter\n\n# 使用 gpt-4o-mini 模型\n.\u002Fghostwriter --model gpt-4o-mini\n```\n\n在屏幕上绘制内容后，用手指**轻触右上角**触发助手。在 SSH 会话中可以看到触摸检测日志和处理过程。处理时会显示点状进度，最终会显示打字或手绘的响应结果！\n\n### CLI 选项\n\n**模型与引擎：**\n* `--model MODEL` - 使用的模型（默认：claude-sonnet-4-0）\n* `--engine ENGINE` - 引擎：openai, anthropic, google（根据模型自动检测）\n* `--engine-api-key KEY` - API 密钥（或使用环境变量）\n* `--engine-base-url URL` - 自定义 API 基础地址\n\n**行为控制：**\n* `--prompt PROMPT` - 使用的提示文件（默认：general.json）\n* `--trigger-corner CORNER` - 触发区域：UR（右上）, UL（左上）, LR（右下）, LL（左下）（默认：UR）\n\n**工具选项：**\n* `--no-svg` - 禁用 SVG 绘图工具\n* `--no-keyboard` - 禁用文本输出\n* `--thinking` - 启用模型思考模式（Anthropic）\n* `--web-search` - 启用网络搜索（Anthropic）\n\n**测试\u002F调试\u002F实验：**\n* `--log-level LEVEL` - 设置日志级别（info, debug, trace）\n* `--no-loop` - 执行一次后退出\n* `--input-png FILE` - 使用 PNG 文件代替截图\n* `--output-file FILE` - 输出结果保存到文件\n* `--save-screenshot FILE` - 保存截图\n* `--save-bitmap FILE` - 保存渲染位图\n* `--no-submit` - 不提交给模型\n* `--no-draw` - 不绘制输出\n* `--no-trigger` - 禁用触摸触发\n* `--apply-segmentation` - 启用图像分割以实现空间感知\n\n### 后台运行\n\n在 reMarkable 上使用 `nohup` 后台运行：\n```\nnohup .\u002Fghostwriter --model gpt-4o-mini &\n```\n\n（TODO：研究如何设置开机自启！）\n\n## 开发指南\n\n我在 Ubuntu 上开发，但也在 OSX 上验证过。基本流程是：(1) 安装依赖，(2) 本地构建但交叉编译 reMarkable 版本，(3) 传输并测试。\n\n* [安装 Docker](https:\u002F\u002Fdocs.docker.com\u002Fengine\u002Finstall\u002F) 用于交叉编译\n* 安装 Rust\n  * 可参考 [rustup 安装指南](https:\u002F\u002Fwww.rust-lang.org\u002Ftools\u002Finstall)\n  * 或使用 [asdf](https:\u002F\u002Fasdf-vm.com\u002F) 管理版本\n  * apt 或 brew 也可能可用？\n* Ubuntu\n  * `sudo apt-get install gcc-arm-linux-gnueabihf`\n* OSX\n  * `brew install arm-linux-gnueabihf-binutils`\n* 配置 [cross-rs](https:\u002F\u002Fgithub.com\u002Fcross-rs\u002Fcross) 和目标平台\n  * 建议使用 git 最新版本，特别是 OSX 用户\n  * `cargo install cross --git https:\u002F\u002Fgithub.com\u002Fcross-rs\u002Fcross`\n  * `rustup target add armv7-unknown-linux-gnueabihf aarch64-unknown-linux-gnu`\n* 构建并传输到 reMarkable\n  * rm2\n    * `cross build --release --target=armv7-unknown-linux-gnueabihf`\n    * `scp target\u002Farmv7-unknown-linux-gnueabihf\u002Frelease\u002Fghostwriter root@remarkable:`\n  * rmpp\n    * `cross build --release --target=aarch64-unknown-linux-gnu`\n    * `scp target\u002Faarch64-unknown-linux-gnu\u002Frelease\u002Fghostwriter root@remarkable:`\n* 我将上述步骤封装到 `build.sh` 中\n  * `.\u002Fbuild.sh` 构建并传输到 rm2\n  * `.\u002Fbuild.sh rmpp` 构建并传输到 rmpp\n\n我通常保持一个 SSH 终端连接到 reMarkable，先用 Ctrl-C 停止当前运行的 ghostwriter，然后在主机运行构建脚本，最后在 reMarkable shell 中重新启动程序。\n\n当需要为他人构建发布版本时，我会给 main 分支打标签（如 `v2026.09.21-01`），这会触发 GitHub Action 自动创建最新发布版本。\n\n## 状态 \u002F 日志\n\n* **2024-10-06** - 引导（Bootstrapping）\n  * 基本概念验证已实现！！！\n  * 屏幕回绘功能效果不佳：它将ChatGPT生成的SVG输出进行光栅化处理，然后尝试绘制大量独立点。reMarkable设备有些崩溃……当整个屏幕变成巨大的黑色方块时，设备会完全卡住无法完成绘制\n  * 至少成功过的内容：\n    * 书写\"Fill in the answer to this math problem... 3 + 7 =\"\n    * \"Draw a picture of a chihuahua. Use simple line-art\"\n* **2024-10-07** - 循环即灵魂\n  * 已实现基础的手势和状态显示功能！\n  * 现在触摸屏幕右上角会出现一个\"X\"标记。当处理输入时，会继续在X上叠加十字线。不过需要自己手动擦除 :)\n* **2024-10-10** - 初始虚拟键盘设置\n  * 开始学习使用键盘操作reMarkable设备（此前从未尝试过）。发现其功能相当有限...每个页面只有一个大型文本区域，仅支持非常基础的格式\n  * 需要创建虚拟键盘（通过rM-input-devices实现），已完成基本功能验证！\n  * 现在想引入一种模式：所有输入都写入文本层，自动区分机器文本和手写内容。不确定这种模式是否实用\n* **2024-10-20** - 文本输出和其他模式\n  * 开始逐步重构代码，使其更规范\n  * 新增`.\u002Fghostwriter text-assist`模式，通过虚拟键盘响应！\n* **2024-10-21** - 二进制发布构建\n  * 已配置GitHub Action实现二进制构建\n* **2024-10-23** - 代码整理\n  * 进行了一些重构，将工具函数归类到单独文件\n  * 昨天Anthropic新发布了3.5-sonnet模型，可能在屏幕空间感知方面表现更好，接下来将在绘图模式中测试\n  * 接下来计划集成`tools`功能，使其能根据上下文返回SVG、文本或触发外部脚本（如TODO列表管理）\n* **2024-11-02** - 工具时代\n  * 开始提供基础工具--draw_text和draw_svg\n  * 这应该能提升与Anthropic的兼容性？\n  * 更重要的是，现在只有一个统一助手，它会决定返回键盘文本还是SVG绘图\n* **2024-11-07** - Claude！（Anthropic）\n  * 进行代码重构以隔离API\n  * ...现已集成Claude\u002FAnthropic！\n  * 使用几乎相同的工具调用配置，应该可以合并两个模型\n  * 目前看来更擅长绘图，但空间感知能力仍不理想\n  * 下一步可能通过图像预处理和结果定位增强空间感知能力。比如检测边界框、分段等，将这些信息输入模型，让模型返回SVG数组及其定位坐标\n* **2024-11-22** - 人工评估\n  * 开始设计评估框架\n  * 首先添加了输入\u002F输出记录参数\n  * 然后使用这些参数记录设备上的示例输入输出\n  * 新增支持在笔记本电脑上运行预捕获输入的ghostwriter（通过`.\u002Fbuild.sh local`构建）\n  * 接下来将开发针对不同提示词\u002F预处理的迭代工具\n  * 如果积累足够示例，可能需要构建AI评估系统\n  * 为此...一个想法是将原始输入与输出叠加显示，通过不同颜色区分评估结果\n  * 目前该技术对SVG输出效果良好，但本地渲染键盘输出较困难（因为键盘输入渲染由reMarkable应用处理）\n* **2024-12-02** - 初始分割器\n  * 在Claude\u002Fcopilot帮助下添加了基础图像分割步骤\n  * 该步骤进行基础分割后将分段坐标提供给视觉大模型（Vision-LLM）参考\n  * 目前仅与Claude集成，需要合并两个模型\n  * ...这对在方框中定位X的帮助极大！！\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_8b4a67a5daf7.png\" width=200 border=1> \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_a72fc9fbb8d7.png\" width=200 border=1>\n  * 需要为评估添加自动化\n  * 分割器需通过`--apply-segmentation`显式启用，并假设使用`--input-png`或`--save-screenshot`（因为它会重新解析PNG文件）\n  * 天啊！这是数学题提示首次正确输出答案位置！之前虽然答案正确，但通常用键盘输入`10`或放在错误位置。这次终于正确放置了！\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_e9f3c2c4fbbd.png\" width=200 border=1>\n* **2024-12-15** - 引擎统一\n  * 在Claude\u002Fcopilot和教程帮助下，为OpenAI和Anthropic后端提取出多态引擎层\n  * 现在可以传递引擎和模型参数\n  * 其他代码库通常通过模型名映射，可能后续也采用该方式\n  * 已将提示词和工具定义外部化（到`prompts\u002F`目录）并统一处理，每个引擎根据API需求调整\n  * 理论上`prompts\u002F`文件既打包在可执行文件中，又可通过本地目录在运行时覆盖，但尚未充分验证\n* **2024-12-18** - 系统升级恐慌\n  * 我的reMarkable自动升级，通常没问题\n  * 但升级到3.16.2.3后...截图功能失效！\n  * 使用[codexctl](https:\u002F\u002Fgithub.com\u002FJayy001\u002Fcodexctl)降级。出现可怕的\"SystemError: Update failed!\"后系统锁死！\n  * ...但重启后成功降级到3.14.1.9\n  * 所以...我会持续关注新版本的其他问题报告\n  * 对了，现在可以把prompts\u002Fgeneral.json重命名为`james.json`，添加\"Your name is James\"到提示词。然后复制到reMarkable设备\n  * 运行`.\u002Fremarkable --prompt james.json`即可使用本地修改的提示词！\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_2d10bf7a8dc9.png\" width=300 border=1>\n* **2024-12-19** -- 非完全本地\n  * 网友建议增加本地网络视觉大模型模式\n  * Ollama支持该功能！于是尝试...\n  * 但llama3.2-vision不支持工具 :(\n  * Groq的llama-3.2支持！\n  * ...但它的井字棋表现不佳（虽然是90b模型）。尽管响应速度很快！\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_9a26af0dc50d.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_44e93ce78c46.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_88a80d972772.png\" width=200 border=1>\n  * 啊！忘记启用分割功能。启用后空间感知应该更好...\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_7eec86c1b2a2.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_561b1a0e392c.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_91931fce8f51.png\" width=200 border=1>\n  * 这是Claude的三次运行对比\u003Cbr\u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_b488a47f8dbd.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_8b2b3f6c8f2f.png\" width=200 border=1>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_97d2becef2f3.png\" width=200 border=1>\n  * 新增环境变量`OPENAI_BASE_URL`，例如：`OPENAI_BASE_URL=https:\u002F\u002Fapi.groq.com\u002Fopenai .\u002Fghostwriter --engine openai --model llama-3.2-90b-vision-preview`\n* **2024-12-22** -- 开始评估\n  * 进一步构建评估系统，包括[基础启动脚本](run_eval.sh)\n  * 当前是硬编码参数集，控制分割开关和使用Claude 3.5 Sonnet或ChatGPT 4o-mini\n  * 查看[初始评估报告](evaluation_results\u002F2024-12-21_13-57-31\u002Fresults.md)!\n  * 发现Markdown布局受限，可能改用HTML（或许启用GitHub Pages）\n  * 这开始进入需要时间和成本的阶段...多次运行花费约1美元。最终报告包含48次执行\n  * 假设有温度参数，每组测试应运行多次\n  * 要进一步扩展，当然需要引入JUDGE-BOT！\n  * 这样就能说\"我的新分割算法使输出质量提升17%\"等量化结论\n* **2024-12-25** -- CLI简化与扩展\n  * 现在只需`-m gpt-4o-mini`即可自动识别引擎为`openai`\n  * 支持传递`--engine-api-key`和`--engine-url-base`\n  * 使用[Groq](https:\u002F\u002Fgroq.com\u002F)示例：`.\u002Fghostwriter -m llama-3.2-90b-vision-preview --engine-api-key $GROQ_API_KEY --engine openai --engine-base-url https:\u002F\u002Fapi.groq.com\u002Fopenai`\n  * ...但Llama 3.2 90b视觉模型在此接口表现仍不理想\n  * 关闭了大量调试信息。需要后续引入日志级别控制\n  * 彩蛋：现已添加Google Gemini支持！使用`-m gemini-2.0-flash-exp`并设置`GOOGLE_API_KEY`！\u003Cbr \u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_ea7b18aa3aa0.png\" width=200 border=1>\n* **2024-12-28** -- 可用性改进\n  * 使用带电源的USB集线器连接外接键盘，测试键盘快捷键\n  * 进一步明确了键盘输入的定位逻辑\n  * 现在在屏幕底部中央发送额外触摸事件，确保下一个键盘输入始终位于最低元素下方。之前会放在最近输入文本下方，若下方有手绘内容会混淆。现在\"你最喜欢的颜色？\"的答案会整洁地放在更下方！可能还画了个羊的梦？\u003Cbr \u002F>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_readme_924db43b0e3b.png\" width=300 border=1>\n* **2025-03-03** -- reMarkaple Paper Pro!!!\n  * 本项目登上[hackernews](https:\u002F\u002Fnews.ycombinator.com\u002Fitem?id=42979986)和[reddit r\u002FremarkableTablet](https:\u002F\u002Fwww.reddit.com\u002Fr\u002FRemarkableTablet\u002Fcomments\u002F1ikhpm5\u002Fthis_is_wild\u002F)\n  * 收到反馈...[请求支持reMarkable Paper Pro](https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F3)\n  * 虽然之前没有该设备\n  * 但在BestBuy体验后决定购买\n  * 现在Ghostwriter也支持该设备！\n  * 屏幕和输入差异是预期中的\n  * 意外的是设备未包含`uinput`内核模块。使用reMarkable官方Linux源码[编译并打包](https:\u002F\u002Fgithub.com\u002FreMarkable\u002Flinux-imx-rm)\n  * 现在运行ghostwriter时若未加载uinput模块会自动加载\n  * 这将是个大麻烦，因为不同Linux版本不兼容，而每次reMarkable更新通常会升级Linux...\n* **2025-04-26** -- 更多Paper Pro改进，尝试笔SVG绘图\n  * `uinput`模块仍未默认编译，但已解决加载问题\n  * 现在包含3.16、3.17、3.18版本模块\n  * 在分支中尝试使用[uSVG](https:\u002F\u002Fdocs.rs\u002Fusvg\u002Flatest\u002Fusvg\u002F)和[svg2polylines](https:\u002F\u002Fdocs.rs\u002Fcrate\u002Fsvg2polylines\u002Flatest)改进SVG绘图体验；当前使用光栅化点绘（stipple）效果不理想且方向错误\n* **2025-05-10** -- Anthropic `thinking`和`web_search`！\n  * 添加Anthropic的思考过程和思考tokens功能！\n  * 支持显示思考过程的新响应格式，但不发送到屏幕\n  * 同时添加Anthropic的网页搜索功能（服务器端实现）！\n  * 默认未启用，可通过`.\u002Fghostwriter --thinking --web-search`开启\n* **2025-05-17** -- 修复rm2\n  * 感谢[YOUSY0US3F](https:\u002F\u002Fgithub.com\u002FYOUSY0US3F)修复rm2屏幕捕获问题！\n* **2025-09-21** -- 修复rmpp，代码格式化，新增功能\n  * 间隔一段时间后更新，发现一些异常响应。调试内部对话发现截图异常\n  * 原来3.20版本更改了屏幕分辨率？[goRemarkableStream的PR](https:\u002F\u002Fgithub.com\u002Fowulveryck\u002FgoMarkableStream\u002Fissues\u002F134)描述了该问题，修复简单\n  * 应用户请求添加`--no-svg`完全禁用SVG工具（也可通过自定义提示词实现）\n  * 考虑到自定义提示词设置繁琐，正在构思Web界面管理API密钥、提示词和调试功能\n  * 上次开发是在使用claude-code之前。现在让它协助开发\n  * 新增`--trigger-corner LR`（及其他）设置激活角落参数\n\n## 设想\n* [已完成] Matt向我展示了他刚推出的iOS超级计算器，可以从中获取灵感！\n  * 这个功能已经初步可用，尝试编写一个公式看看\n* [已完成] 通过手势或特定内容触发请求\n  * 比如在特定位置画一个X\n  * 或者画一个悬浮圆圈——不一定要实际触摸事件\n* [已完成] 截取屏幕截图，将其输入视觉模型，获取输出结果，并以某种方式将结果重新呈现在屏幕上\n* [已完成] 就像实际书写一样；或者干脆让它快速在屏幕上画满点\n* [已完成] 更棒的是...我们还可以发送键盘事件！这意味着我们可以使用Remarkable的文本区域。这是一个笨拙的文本区域，与绘图层不在同一个层级\n  * 所以我们可以认为：绘图=人类，文本=机器\n  * 删除操作也会更简单...\n* [已完成] 基础评估\n  * 创建一组输入用的截图\n  * 代表不同的使用场景\n  * 其中一些（如TODO提取）可能对输出有特定预期，但大多数没有\n  * 运行系统获取示例输出——文本、SVG、操作指令\n  * 编写测试套件评估结果...可能需要人工参与？或者使用视觉-LLM评估器？\n* [进行中] 提示词库\n  * 已在 \u003Ca href=\"prompts\u002F\">prompts\u002F\u003C\u002Fa> 开始构建\n  * 目标是提供一组可配置的工具（可能是实际的LLM\"工具\"）\n  * 也可以添加其他内容...比如作为工具运行的外部命令\n  * 示例：一个擅长管理待办事项的提示词。它会查找\"todo\"，提取后运行`add-todo.sh`脚本\n    * （该脚本会通过ssh将任务添加到taskwarrior）\n* 初始配置\n  * 首次运行（或带参数时）创建配置文件\n  * 可提示输入OpenAI密钥并写入文件\n  * 可能需要自动启动、自动恢复功能？\n* 生成图表\n  * 支持输出plantuml和\u002F或mermaid格式，然后转换为SVG\u002Fpng并显示\n* 外部交互\n  * 允许联网查询\n  * 允许发送邮件、Slack消息给我\n* 对话模式\n  * 在单个屏幕中跟踪每次交互的版本变化\n  * 首次发送是原始屏幕\n  * 第二次发送包含原始屏幕、响应屏幕（可能用红色显示Claude输出）、新添加内容（可能绿色？）\n    * 这可以形成完整的页面对话链\n    * 可能需要两个按钮触发视觉-LLM：一个\"新提示\"，一个\"继续\"\n  * 或者每次显示最近三次交互：\n    * 黑色：原始内容\n    * 红色：Claude响应\n    * 绿色：新输入\n  * 或者使用相同颜色结构但显示完整消息链？\n  * 切换到新空白页时可能显示异常，看起来像新输入擦除了所有内容\n  * 这种方式可能更便于处理滚动\n  * 可能需要两种触发方式——延续触发和全新开始触发\n* 使用本地网络的视觉-LLM（如ollama）\n  * 首次尝试使用兼容OpenAI API的ollama失败；ollama的LLAMA 3.2视觉模型不支持工具\n  * Groq的改进版llama-3.2-vision支持工具...但效果不如ChatGPT、Claude或Gemini\n* 支持中断的流式LLM服务\n* 使用异步处理加快反馈速度并实现并行处理\n* 测试OpenAI新推出的responses API\n* 尝试集成MCP（模型上下文协议）\n  * 可能需要云托管代理？\n* 允许非工具响应被忽略，或转换为常规文本的键盘（draw_text）工具\n* 集成Web界面用于配置管理与调试\n\n## 参考资源\n* 主要资源来自 [Awesome reMarkable](https:\u002F\u002Fgithub.com\u002FreHackable\u002Fawesome-reMarkable)\n* 屏幕截图功能改编自 [reSnap](https:\u002F\u002Fgithub.com\u002Fcloudsftp\u002FreSnap)\n* 屏幕绘制技术参考了 [rmkit lamp](https:\u002F\u002Fgithub.com\u002Frmkit-dev\u002Frmkit\u002Fblob\u002Fmaster\u002Fsrc\u002Flamp\u002Fmain.cpy)\n* 使用 [resvg](https:\u002F\u002Fgithub.com\u002FRazrFalcon\u002Fresvg) 实现SVG转PNG\n* 通过 [rM-input-devices](https:\u002F\u002Fgithub.com\u002Fpl-semiotics\u002FrM-input-devices) 实现无键盘输入\n* 最近发现的 [reMarkableAI](https:\u002F\u002Fgithub.com\u002Fnickian\u002FreMarkableAI) 实现OCR→OpenAI→PDF→设备传输\n* 另一个reMarkable-LLM接口 [rMAI](https:\u002F\u002Fgithub.com\u002FStarNumber12046\u002FrMAI)，使用[replicate](https:\u002F\u002Freplicate.com)作为模型服务\n* 虽未采用，但 [Crazy Cow](https:\u002F\u002Fgithub.com\u002Fmachinelevel\u002Fsp425-crazy-cow) 是个有趣的工具，可将文本转换为reMarkable1的笔触\n\n## 临时笔记\n\n```\n\n# 在设备上记录评估\n.\u002Fghostwriter --output-file tmp\u002Fresult.out --model-output-file tmp\u002Fresult.json --save-screenshot tmp\u002Finput.png --no-draw-progress --save-bitmap tmp\u002Fresult.png claude-assist\n\n# 在本地复制评估结果到本地文件夹\nexport evaluation_name=tic_tac_toe_1\nrm tmp\u002F*\nscp -r remarkable:tmp\u002F .\u002F\nmkdir -p evaluations\u002F$evaluation_name\nmv tmp\u002F* evaluations\u002F$evaluation_name\n\n# 运行评估\n.\u002Ftarget\u002Frelease\u002Fghostwriter --input-png evaluations\u002F$evaluation_name\u002Finput.png --output-file tmp\u002Fresult.out --model-output-file tmp\u002Fresult.json --save-bitmap tmp\u002Fresult.png --no-draw --no-draw-progress --no-loop --no-trigger claude-assist\n\n# 叠加输入输出\nconvert \\( evaluations\u002F$evaluation_name\u002Finput.png -colorspace RGB \\) \\( tmp\u002Fresult.png -type truecolormatte -transparent white -fill red -colorize 100 \\) -compose Over -composite tmp\u002Fmerged-output.png\n```\n\n### 构建虚拟键盘输入的uinput模块\n\n为了实现反向输入，我们需要插入虚拟USB键盘（与Remarkable Folio键盘同类型）。rm2设备可直接使用，但rmpp设备内核未包含uinput模块，需要自行编译。\n\n如果我已经完成编译则无需操作！\n\n* `git clone https:\u002F\u002Fgithub.com\u002FreMarkable\u002Flinux-imx-rm`\n* 切换到目标发行分支\n* 按照说明提取并启用大Git支持\n* 编辑 arch\u002Farm64\u002Fconfigs\u002Fferrari_defconfig\n* 添加 `CONFIG_INPUT_UINPUT=m`\n* 按照readme构建：\n\n```\nexport make=make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-\nmake ferrari_defconfig\nmake -j$(nproc)\nmake INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=.\u002Foutput modules_install\n```\n\n* 将output\u002Flib\u002Fmodules\u002F...\u002Fkernel\u002Fdrivers\u002Finput\u002Fmisc\u002Fuinput.ko复制到utils\u002Frmpp\u002Fuinput-VERSION.ko\n* 该文件将被打包并自动加载\n* ...所以只要我完成编译并提交到仓库，你们就无需重复操作\n\n### 提示\u002F工具思路：\n* 工具（tool）有几种模型——每个工具都可以是可重用且通用的，或者可以包含额外输入参数（extra-inputs）用于链式思考（chain-of-thought），以及参数的提示说明\n* 提示应采用纯JSON或YAML格式，并应在视觉\u002F大语言模型（V\u002FLLM models）间保持标准化\n* 我正在考虑的一个总体方向是设置顶层\"模式（modes）\"，每个模式包含主提示和可用工具集\n* 但或许也可以构建完整状态机流程？\n* 例如...数学助手可能有不同于待办助手的状态机\n* 状态应包含开始、中间和终止状态\n* 终止状态需要产生输出或效果，这些才是真正执行操作的状态\n* 初始状态对应初始提示\n* 某个中间状态可以是`思考（thinking）`，这里可以将工具输入作为书写思考过程的区域，工具输出会被忽略\n* 总体目标是建立易于编写、易于复制粘贴、易于维护的提示系统\n* 然后我们可以为每个提示模式构建可复用的评估集或示例集\n* 越来越明显的是，reMarkable用例可能恰好是该系统中配置的特定提示，其余部分可抽象为框架...\n* 因此状态机可能如下：\n\n```mermaid\nstateDiagram-v2\n    [*] --> Screenshot\n    Screenshot --> OutputScreen\n    Screenshot --> OutputKeyboardText\n```\n\n```mermaid\nstateDiagram-v2\n    [*] --> WaitForTouch\n    WaitForTouch --> Screenshot\n    Screenshot --> OutputScreen\n    Screenshot --> OutputKeyboardText\n    OutputScreen --> [*]\n    OutputKeyboardText --> [*]\n```\n\n```mermaid\nstateDiagram-v2\n    [*] --> WaitForTouch\n    WaitForTouch --> Screenshot\n    Screenshot --> Thinking\n    Thinking --> Thinking\n    Thinking --> OutputScreen\n    Thinking --> OutputKeyboardText\n    OutputScreen --> [*]\n    OutputKeyboardText --> [*]\n```","# ghostwriter 快速上手指南\n\n## 环境准备\n### 系统要求\n- reMarkable 设备（rm2 或 rmpp 型号）\n- 系统版本建议为 3.14.1.9（新版 3.16.x 可能导致截图功能异常）\n\n### 前置依赖\n1. API 密钥（任选其一）：\n   ```sh\n   export OPENAI_API_KEY=your-key-here\n   export ANTHROPIC_API_KEY=your-key-here\n   export GOOGLE_API_KEY=your-key-here\n   ```\n2. 基础工具：\n   - Linux\u002FMac 环境（用于下载和传输文件）\n   - `wget`、`scp`、`ssh` 命令行工具\n\n## 安装步骤\n### 1. 下载二进制文件\n根据设备型号选择对应版本（建议使用国内镜像加速）：\n```sh\n# rm2 版本（清华大学镜像加速示例）\nwget -O ghostwriter https:\u002F\u002Fghproxy.com\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rm2\n\n# rmpp 版本\nwget -O ghostwriter https:\u002F\u002Fghproxy.com\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases\u002Flatest\u002Fdownload\u002Fghostwriter-rmpp\n```\n\n### 2. 传输到 reMarkable\n替换为你的设备 IP 地址：\n```sh\nscp ghostwriter root@192.168.1.117:\n```\n\n### 3. 设置执行权限\nSSH 登录设备后执行：\n```sh\nchmod +x .\u002Fghostwriter\n```\n\n## 基本使用\n### 1. 启动服务\n```sh\n# 使用默认模型 claude-sonnet-4-0\n.\u002Fghostwriter\n\n# 或指定 gpt-4o-mini 模型\n.\u002Fghostwriter --model gpt-4o-mini\n```\n\n### 2. 触发助手\n在设备屏幕：\n1. 用手指书写内容（如：\"Draw a chihuahua\"）\n2. 用手指轻触**右上角**触发处理\n\n### 3. 示例演示\n书写数学题：\n```\nFill in the answer: 3 + 7 = \n```\n触发后将自动输出答案 `10` 到指定位置\n\n### 4. 后台运行\n```sh\nnohup .\u002Fghostwriter --model gpt-4o-mini &\n```\n\n## 注意事项\n1. 系统版本：升级到 3.16.x 后可能出现截图功能异常\n2. 密钥安全：建议通过环境变量设置 API 密钥，避免硬编码\n3. 国内加速：使用 `ghproxy.com` 等 GitHub 加速服务下载资源\n\n> 提示：可通过 `--apply-segmentation` 参数启用图像分割功能提升空间定位精度","一位高中数学老师正在使用 reMarkable2 平板批改学生作业，学生通过手写笔迹提交解题过程。老师希望快速获得作业批改结果并提供个性化反馈。\n\n### 没有 ghostwriter 时\n- 手写批改需要逐题计算得分，遇到复杂解题步骤时容易漏看关键错误\n- 遇到模糊解题思路时，需手动查阅教材或在线资源验证解法正确性\n- 无法即时生成标准的数学公式标注，手写批注常因字迹潦草影响学生理解\n- 批改后需额外整理典型错误案例，耗时整理成电子文档供后续复习\n\n### 使用 ghostwriter 后\n- 在作业末尾画个圈触发 AI 评估，10 秒内自动生成得分和错题定位标注\n- 通过 `--web-search` 参数自动联网验证解题方法的通用性，标记非常规解法\n- 调用 LaTeX 公式引擎生成标准数学符号批注，覆盖在原始手写笔迹上方\n- 批改完成自动生成带标签的 PDF 文件，通过 SSH 直接传输到教学云盘\n\n核心价值：将传统手写批改效率提升 300%，实现智能评分、精准纠错和结构化知识沉淀的三位一体教学辅助。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fawwaiid_ghostwriter_73633669.jpg","awwaiid","Brock Wilcox","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fawwaiid_34d32872.jpg","I love programming of all sorts! For work I am @filament-dm. For fun I especially enjoy mad-science-mashups and dev tooling!","@filament-dm","Washington, DC","awwaiid@thelackthereof.org","http:\u002F\u002Fthelackthereof.org\u002F","https:\u002F\u002Fgithub.com\u002Fawwaiid",[85,89],{"name":86,"color":87,"percentage":88},"Rust","#dea584",95.3,{"name":90,"color":91,"percentage":92},"Shell","#89e051",4.7,504,23,"2026-04-02T19:47:36","MIT","Linux, macOS","未说明",{"notes":100,"python":98,"dependencies":101},"需设置OPENAI_API_KEY等环境变量，通过交叉编译构建armv7\u002Faarch64架构二进制文件，依赖Docker和Rust工具链。运行需连接reMarkable设备并配置触控触发机制。",[102],"未明确提及具体依赖库",[13,15,26],[105,106,107,108,109],"llm","remarkable-2","agentic-ai","eink","vlm",4,null,"2026-03-27T02:49:30.150509","2026-04-06T07:13:38.884290",[115,120,125,130,135,140],{"id":116,"question_zh":117,"answer_zh":118,"source_url":119},4849,"ghostwriter是否支持reMarkable Paper Pro？","是的，项目已提供适配reMarkable Paper Pro的64位二进制文件。可在[Releases页面](https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Freleases)下载`ghostwriter-rmpp`版本，README中也包含具体使用说明。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F3",{"id":121,"question_zh":122,"answer_zh":123,"source_url":124},4850,"在PaperPro上运行时出现下载错误或连接中断怎么办？","使用`nohup .\u002Fghostwriter.bin --model gpt-4o-mini &`命令在后台运行程序。若触发区域不灵敏，可添加`--log-level debug`参数调试触控坐标，或尝试更新设备系统版本。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F8",{"id":126,"question_zh":127,"answer_zh":128,"source_url":129},4851,"reMarkable2 3.16.2.3版本运行ghostwriter报错如何解决？","该问题通常由系统版本过旧导致。建议升级到3.17版本（加入测试计划获取更新），或尝试完全关机后重启设备。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F4",{"id":131,"question_zh":132,"answer_zh":133,"source_url":134},4852,"如何避免误触文档关闭按钮触发AI功能？","可通过`--trigger-corner LR`参数将触发区域改为屏幕左下角（Lower-Right）。维护者已根据反馈实现该功能。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F14",{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},4853,"ghostwriter是否支持reMarkable 1设备？","当前存在兼容性问题：RM1缺少必要的uinput内核模块，且项目默认依赖该模块。维护者已优化RM2的兼容性，但RM1仍需额外适配。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F11",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},4854,"为何不开发Android\u002FiOS版本或Web应用？","维护者已开始探索跨平台方案（如适配Apple Pencil的Web界面），但当前版本专注于reMarkable设备的深度优化。","https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fissues\u002F6",[146,151,155,159,163,168,172,176,180,184,188,192,196,200,204,208,212,216,220],{"id":147,"version":148,"summary_zh":149,"released_at":150},104370,"v2025.09.27-01","## What's Changed\r\n* Organize files, add central config by @awwaiid in https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fpull\u002F16\r\n\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Fawwaiid\u002Fghostwriter\u002Fcompare\u002Fv2025.09.21-02...v2025.09.27-01","2025-09-27T20:16:23",{"id":152,"version":153,"summary_zh":111,"released_at":154},104371,"v2025.09.21-03","2025-09-22T01:38:41",{"id":156,"version":157,"summary_zh":111,"released_at":158},104372,"v2025.09.21-02","2025-09-21T20:04:28",{"id":160,"version":161,"summary_zh":111,"released_at":162},104373,"v2025.09.21-01","2025-09-21T18:30:45",{"id":164,"version":165,"summary_zh":166,"released_at":167},104374,"v2025.09.17-01","testing build","2025-09-17T22:36:59",{"id":169,"version":170,"summary_zh":111,"released_at":171},104375,"v2025.05.17-01","2025-05-17T19:48:07",{"id":173,"version":174,"summary_zh":111,"released_at":175},104376,"v2025.05.10-01","2025-05-11T00:34:22",{"id":177,"version":178,"summary_zh":111,"released_at":179},104377,"v2025.04.26-03","2025-04-26T16:23:37",{"id":181,"version":182,"summary_zh":111,"released_at":183},104378,"v2025.04.26-02","2025-04-26T16:02:47",{"id":185,"version":186,"summary_zh":111,"released_at":187},104379,"v2025.04.26-01","2025-04-26T15:49:16",{"id":189,"version":190,"summary_zh":111,"released_at":191},104380,"v2024.12.29","2024-12-29T20:30:51",{"id":193,"version":194,"summary_zh":111,"released_at":195},104381,"v2024.12.25.1","2024-12-25T21:50:51",{"id":197,"version":198,"summary_zh":111,"released_at":199},104382,"v2024.12.25","2024-12-25T20:29:09",{"id":201,"version":202,"summary_zh":111,"released_at":203},104383,"v2024.12.18","2024-12-19T05:00:09",{"id":205,"version":206,"summary_zh":111,"released_at":207},104384,"v2024.12.02","2024-12-03T04:53:26",{"id":209,"version":210,"summary_zh":111,"released_at":211},104385,"v2024.11.22","2024-11-23T03:53:52",{"id":213,"version":214,"summary_zh":111,"released_at":215},104386,"v2024.11.02","2024-11-03T02:46:14",{"id":217,"version":218,"summary_zh":111,"released_at":219},104387,"v2024.10.21.01","2024-10-21T13:32:48",{"id":221,"version":222,"summary_zh":111,"released_at":223},104388,"v2024.10.21","2024-10-21T13:01:14"]