diff --git a/crates/llama-cpp-bindings/llama.cpp b/crates/llama-cpp-bindings/llama.cpp index 06fc4020de0b..31e7f3c20e1b 160000 --- a/crates/llama-cpp-bindings/llama.cpp +++ b/crates/llama-cpp-bindings/llama.cpp @@ -1 +1 @@ -Subproject commit 06fc4020de0b92ee13407fdabca7870f53c75de5 +Subproject commit 31e7f3c20e1bacb522021e46788e24c045c108f6 diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 7981bcf82273..02b995e4e7e1 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -47,7 +47,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { uint32_t start(const rust::Str prompt, size_t max_input_length) const override { auto* ctx = ctx_.get(); llama_reset_timings(ctx); - std::vector tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ true); + std::vector tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ false); for (size_t i = 0; i < tokens_list.size(); i += N_BATCH) { const size_t size = std::min(N_BATCH, tokens_list.size() - i);