-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathmain.cpp
More file actions
48 lines (38 loc) · 1.9 KB
/
Copy pathmain.cpp
File metadata and controls
48 lines (38 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include "LlamaLib.h"
#include <iostream>
static std::string previous_text = "";
static void streaming_callback(const char *c)
{
std::string current_text(c);
// streaming gets the entire generated response up to now, print only the new text
std::cout << current_text.substr(previous_text.length()) << std::flush;
previous_text = current_text;
}
int main(int argc, char **argv)
{
std::string PROMPT = "The capital of";
// create LLM
LLMService* llm_service = LLMServiceBuilder().model("model.gguf").numGPULayers(10).build();
// alternatively using the LLMService constructor:
// LLMService* llm_service = new LLMService("model.gguf", 1, -1, 10);
llm_service->start();
// Optional: limit the amount of tokens that we can predict so that it doesn't produce text forever (some models do)
llm_service->set_completion_params({{"n_predict", 20}});
std::cout << "----------------------- tokenize -----------------------" << std::endl;
std::vector<int> tokens = llm_service->tokenize(PROMPT);
std::cout << "tokens: ";
for (int token : tokens) std::cout << token << " ";
std::cout << std::endl;
std::cout << std::endl << "----------------------- detokenize -----------------------" << std::endl;
std::string detokenize_response = llm_service->detokenize(tokens);
std::cout << "prompt: " << detokenize_response << std::endl;
std::cout << std::endl << "----------------------- completion (streaming) -----------------------" << std::endl;
std::cout << "response: ";
llm_service->completion(PROMPT, streaming_callback);
std::cout << std::endl;
std::cout << std::endl << "----------------------- completion (no streaming) -----------------------" << std::endl;
std::string completion_response = llm_service->completion(PROMPT);
std::cout << "response: " << completion_response << std::endl << std::endl;
delete llm_service;
return 0;
}