TabbyML · wsxiaoys · Sep 16, 2023 · Sep 16, 2023 · Sep 16, 2023
diff --git a/crates/llama-cpp-bindings/llama.cpp b/crates/llama-cpp-bindings/llama.cpp
diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc
@@ -47,7 +47,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
   uint32_t start(const rust::Str prompt, size_t max_input_length) const override {
     auto* ctx = ctx_.get();
     llama_reset_timings(ctx);
-    std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ true);
+    std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ false);
 
     for (size_t i = 0; i < tokens_list.size(); i += N_BATCH) {
       const size_t size = std::min(N_BATCH, tokens_list.size() - i);
+5 −0		.clang-tidy
+22 −0		.devops/cloud-v-pipeline
+1 −1		.devops/full-cuda.Dockerfile
+1 −1		.devops/main-cuda.Dockerfile
+3 −0		.editorconfig
+99 −38		.github/workflows/build.yml
+11 −4		.github/workflows/docker.yml
+1 −1		.github/workflows/gguf-publish.yml
+16 −14		.gitignore
+171 −42		CMakeLists.txt
+132 −65		Makefile
+27 −5		Package.swift
+24 −45		README.md
+114 −103		common/common.cpp
+11 −2		common/common.h
+9 −9		common/console.cpp
+16 −15		common/grammar-parser.cpp
+8 −8		common/log.h
+304 −0		convert-baichuan-hf-to-gguf.py
+19 −5		convert-falcon-hf-to-gguf.py
+16 −4		convert-gptneox-hf-to-gguf.py
+133 −35		convert-llama-ggml-to-gguf.py
+248 −0		convert-starcoder-hf-to-gguf.py
+8 −25		convert.py
+77 −71		examples/baby-llama/baby-llama.cpp
+0 −3		examples/beam-search/CMakeLists.txt
+4 −8		examples/beam-search/beam-search.cpp
+2 −1		examples/benchmark/CMakeLists.txt
+2 −2		examples/benchmark/benchmark-matmult.cpp
+24 −22		examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+3 −7		examples/embd-input/embd-input-lib.cpp
+0 −1		examples/embd-input/embd-input.h
+8 −8		examples/embedding/embedding.cpp
+25 −25		examples/gguf/gguf.cpp
+34 −34		examples/gptneox-wip/falcon-main.cpp
+32 −32		examples/gptneox-wip/gptneox-main.cpp
+26 −29		examples/llama-bench/llama-bench.cpp
+51 −0		examples/main-cmake-pkg/.gitignore
+36 −0		examples/main-cmake-pkg/CMakeLists.txt
+37 −0		examples/main-cmake-pkg/README.md
+2 −3		examples/main/README.md
+23 −41		examples/main/main.cpp
+25 −23		examples/perplexity/perplexity.cpp
+1 −0		examples/quantize-stats/CMakeLists.txt
+20 −36		examples/quantize-stats/quantize-stats.cpp
+1 −0		examples/quantize/CMakeLists.txt
+7 −9		examples/quantize/quantize.cpp
+3 −4		examples/save-load-state/save-load-state.cpp
+2,005 −1,939		examples/server/index.html.hpp
+37 −4		examples/server/public/index.html
+49 −47		examples/server/server.cpp
+0 −3		examples/simple/CMakeLists.txt
+0 −6		examples/simple/simple.cpp
+93 −16		examples/speculative/speculative.cpp
+10 −36		examples/train-text-from-scratch/train-text-from-scratch.cpp
+7 −1		flake.nix
+103 −33		ggml-alloc.c
+1,077 −678		ggml-cuda.cu
+105 −24		ggml-metal.m
+423 −205		ggml-metal.metal
+6 −6		ggml-opencl.cpp
+100 −50		ggml.c
+42 −32		ggml.h
+6,286 −5,485		ggml_metal_file.c
+55 −17		gguf-py/gguf/gguf.py
+1 −1		gguf-py/pyproject.toml
+34 −0		grammars/json_arr.gbnf
+12 −2		k_quants.c
+1,192 −341		llama.cpp
+13 −7		llama.h
+3 −2		pocs/vdot/vdot.cpp
+4 −0		prompts/chat-with-baichuan.txt
+1 −1		run_with_preset.py
+69 −0		scripts/LlamaConfig.cmake.in
+39 −3		scripts/build-info.cmake
+2 −0		scripts/build-info.h.in
+25 −13		scripts/build-info.sh
+2 −3		tests/CMakeLists.txt
+9 −12		tests/test-opt.cpp
+14 −12		tests/test-quantize-fns.cpp
+5 −5		tests/test-quantize-perf.cpp
+14 −24		tests/test-sampling.cpp
+7 −0		tests/test-tokenizer-0-llama.cpp
+127 −0		tests/test-tokenizer-1-llama.cpp
+0 −108		tests/test-tokenizer-1.cpp