Skip to content

Commit

Permalink
feat: support vision (#249)
Browse files Browse the repository at this point in the history
* feat: support vision

* clippy

* implement vision

* resolve data url to local file

* add model openai:gpt-4-vision-preview

* use newline to concate embeded text files

* set max_tokens for gpt-4-vision-preview
  • Loading branch information
sigoden authored Nov 27, 2023
1 parent 5bfe95d commit 35c7550
Show file tree
Hide file tree
Showing 19 changed files with 494 additions and 108 deletions.
100 changes: 96 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ reqwest-eventsource = "0.5.0"
simplelog = "0.12.1"
log = "0.4.20"
shell-words = "1.1.0"
mime_guess = "2.0.4"
sha2 = "0.10.8"

[dependencies.reqwest]
version = "0.11.14"
Expand Down
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Download it from [GitHub Releases](https://github.com/sigoden/aichat/releases),
- Support chat and command modes
- Use [Roles](#roles)
- Powerful [Chat REPL](#chat-repl)
- Support vision
- Context-aware conversation/session
- Syntax highlighting markdown and 200 other languages
- Stream output with hand-typing effect
Expand Down Expand Up @@ -147,9 +148,9 @@ The Chat REPL supports:
.session Start a context-aware chat session
.info session Show session info
.exit session End the current session
.file Attach files to the message and then submit it
.set Modify the configuration parameters
.copy Copy the last reply to the clipboard
.read Read files into the message and submit
.exit Exit the REPL

Type ::: to begin multi-line editing, type ::: to end it.
Expand Down Expand Up @@ -255,6 +256,17 @@ The prompt on the right side is about the current usage of tokens and the propor
compared to the maximum number of tokens allowed by the model.
### `.file` - attach files to the message
```
Usage: .file <file>... [-- text...]

.file message.txt
.file config.yaml -- convert to toml
.file a.jpg b.jpg -- What’s in these images?
.file https://ibb.co/a.png https://ibb.co/b.png -- what is the difference?
```
### `.set` - modify the configuration temporarily
```
Expand All @@ -277,6 +289,7 @@ Options:
-m, --model <MODEL> Choose a LLM model
-r, --role <ROLE> Choose a role
-s, --session [<SESSION>] Create or reuse a session
-f, --file <FILE>... Attach files to the message to be sent
-H, --no-highlight Disable syntax highlighting
-S, --no-stream No stream output
-w, --wrap <WRAP> Specify the text-wrapping mode (no*, auto, <max-width>)
Expand Down Expand Up @@ -306,6 +319,9 @@ cat config.json | aichat convert to yaml # Read stdin
cat config.json | aichat -r convert:yaml # Read stdin with a role
cat config.json | aichat -s i18n # Read stdin with a session
aichat --file a.png b.png -- diff images # Attach files
aichat --file screenshot.png -r ocr # Attach files with a role
aichat --list-models # List all available models
aichat --list-roles # List all available roles
aichat --list-sessions # List all available models
Expand Down
3 changes: 3 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ pub struct Cli {
/// Create or reuse a session
#[clap(short = 's', long)]
pub session: Option<Option<String>>,
/// Attach files to the message to be sent.
#[clap(short = 'f', long, num_args = 1.., value_name = "FILE")]
pub file: Option<Vec<String>>,
/// Disable syntax highlighting
#[clap(short = 'H', long)]
pub no_highlight: bool,
Expand Down
19 changes: 10 additions & 9 deletions src/client/common.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{openai::OpenAIConfig, ClientConfig, Message};

use crate::{
config::GlobalConfig,
config::{GlobalConfig, Input},
render::ReplyHandler,
utils::{
init_tokio_runtime, prompt_input_integer, prompt_input_string, tokenize, AbortSignal,
Expand Down Expand Up @@ -50,7 +50,7 @@ macro_rules! register_client {
}

impl $client {
pub const NAME: &str = $name;
pub const NAME: &'static str = $name;

pub fn init(global_config: &$crate::config::GlobalConfig) -> Option<Box<dyn Client>> {
let model = global_config.read().model.clone();
Expand Down Expand Up @@ -186,22 +186,22 @@ pub trait Client {
Ok(client)
}

fn send_message(&self, content: &str) -> Result<String> {
fn send_message(&self, input: Input) -> Result<String> {
init_tokio_runtime()?.block_on(async {
let global_config = self.config().0;
if global_config.read().dry_run {
let content = global_config.read().echo_messages(content);
let content = global_config.read().echo_messages(&input);
return Ok(content);
}
let client = self.build_client()?;
let data = global_config.read().prepare_send_data(content, false)?;
let data = global_config.read().prepare_send_data(&input, false)?;
self.send_message_inner(&client, data)
.await
.with_context(|| "Failed to get answer")
})
}

fn send_message_streaming(&self, content: &str, handler: &mut ReplyHandler) -> Result<()> {
fn send_message_streaming(&self, input: &Input, handler: &mut ReplyHandler) -> Result<()> {
async fn watch_abort(abort: AbortSignal) {
loop {
if abort.aborted() {
Expand All @@ -211,12 +211,13 @@ pub trait Client {
}
}
let abort = handler.get_abort();
init_tokio_runtime()?.block_on(async {
let input = input.clone();
init_tokio_runtime()?.block_on(async move {
tokio::select! {
ret = async {
let global_config = self.config().0;
if global_config.read().dry_run {
let content = global_config.read().echo_messages(content);
let content = global_config.read().echo_messages(&input);
let tokens = tokenize(&content);
for token in tokens {
tokio::time::sleep(Duration::from_millis(10)).await;
Expand All @@ -225,7 +226,7 @@ pub trait Client {
return Ok(());
}
let client = self.build_client()?;
let data = global_config.read().prepare_send_data(content, true)?;
let data = global_config.read().prepare_send_data(&input, true)?;
self.send_message_streaming_inner(&client, handler, data).await
} => {
handler.done()?;
Expand Down
8 changes: 5 additions & 3 deletions src/client/ernie.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{ErnieClient, Client, ExtraConfig, PromptType, SendData, Model};
use super::{ErnieClient, Client, ExtraConfig, PromptType, SendData, Model, MessageContent};

use crate::{
config::GlobalConfig,
Expand Down Expand Up @@ -198,8 +198,10 @@ fn build_body(data: SendData, _model: String) -> Value {

if messages[0].role.is_system() {
let system_message = messages.remove(0);
if let Some(message) = messages.get_mut(0) {
message.content = format!("{}\n\n{}", system_message.content, message.content)
if let (Some(message), MessageContent::Text(system_text)) = (messages.get_mut(0), system_message.content) {
if let MessageContent::Text(text) = message.content.clone() {
message.content = MessageContent::Text(format!("{}\n\n{}", system_text, text))
}
}
}

Expand Down
Loading

0 comments on commit 35c7550

Please sign in to comment.