Skip to content

Context sensitive help #3556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5a5a71d
Starting gpt4all
pudepiedj Oct 4, 2023
1bb192f
Add cmap_example.cpp
pudepiedj Oct 5, 2023
aea055d
Update cmap_example
pudepiedj Oct 5, 2023
eb939e0
Update cmap-example
pudepiedj Oct 5, 2023
e3c8f7b
Update cmap-example
pudepiedj Oct 5, 2023
73fa2de
Update cmap-example
pudepiedj Oct 5, 2023
9dad8b8
Update cmap-example
pudepiedj Oct 5, 2023
7804fe0
Update cmap-example
pudepiedj Oct 5, 2023
317d195
Update cmap-examples
pudepiedj Oct 5, 2023
380a10f
Update cmap-example
pudepiedj Oct 5, 2023
275d56e
Update cmap-example
pudepiedj Oct 5, 2023
297b7b6
Automation
pudepiedj Oct 5, 2023
739d6d3
Automatic helper dev
pudepiedj Oct 6, 2023
7a4dcff
Update contextual help dev
pudepiedj Oct 6, 2023
0d70518
Update contextual help
pudepiedj Oct 6, 2023
9c5d6f0
Update helper dev
pudepiedj Oct 7, 2023
982c908
Update contextual help
pudepiedj Oct 8, 2023
32bdf0e
Final reconciliation
pudepiedj Oct 9, 2023
2e17fcf
Comment in common.cpp
pudepiedj Oct 9, 2023
3e4de67
Update find_implemented_args.py
pudepiedj Oct 9, 2023
990e8cb
New comment
pudepiedj Oct 9, 2023
f6e92a8
Merge branch 'master' into context-sensitive-help
pudepiedj Oct 9, 2023
51446bf
Naming convention
pudepiedj Oct 9, 2023
3f07ed9
Added prompt-file to hep
pudepiedj Oct 9, 2023
49244be
Merge branch 'ggerganov:master' into context-sensitive-help
pudepiedj Oct 9, 2023
094d6d6
Add help list
pudepiedj Oct 9, 2023
7636c34
Merge branch 'context-sensitive-help' of https://github.com/pudepiedj…
pudepiedj Oct 9, 2023
9abc925
Remove trailing ws
pudepiedj Oct 9, 2023
6189a9e
One more trailing ws
pudepiedj Oct 9, 2023
53dbefa
Added final newline (2)
pudepiedj Oct 9, 2023
ba32402
creadcommonh
pudepiedj Oct 9, 2023
47675b9
experiment in C
pudepiedj Oct 9, 2023
bddd099
experimental
pudepiedj Oct 9, 2023
301f13c
Compile new cmap-example
pudepiedj Oct 9, 2023
759973b
printf format
pudepiedj Oct 9, 2023
37050ba
Updated cmap-example
pudepiedj Oct 10, 2023
37ae96c
Correct bug
pudepiedj Oct 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,9 @@ perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

cmap-example: examples/cmap-example/cmap-example.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

Expand Down
19 changes: 17 additions & 2 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,9 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
return true;
}

// There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill, --prompt-file
// some corresponding changes to the sequence of fprintf() code may be needed
void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf("usage: %s [options]\n", argv[0]);
printf("\n");
Expand Down Expand Up @@ -672,7 +675,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
printf(" --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
printf(" --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
printf(" -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
printf(" -l T, --logit-bias T T = TOKEN_ID(plus/minus)BIAS\n");
printf(" modifies the likelihood of token appearing in the completion,\n");
printf(" i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
Expand All @@ -687,7 +690,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n");
printf(" --rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model)\n");
printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
printf(" --no-penalize-nl do not penalize newline token\n");
printf(" --no-penalize-nl do not penalize newline token (default is DO penalise nl token)\n");
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
printf(" --temp N temperature (default: %.1f)\n", (double)params.temp);
Expand Down Expand Up @@ -734,6 +737,18 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" draft model for speculative decoding (default: %s)\n", params.model.c_str());
printf(" -ld LOGDIR, --logdir LOGDIR\n");
printf(" path under which to save YAML logs (no logging if unset)\n");
printf(" --ppl-stride stride for ppl calcs. 0 (default): the pre-existing approach will be used.\n");
printf(" --ppl-output-type 0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n");
printf(" --embedding 0 (default): get only sentence embedding\n");
printf(" --beams N 0 (default): if non-zero use beam search of given width N.\n");
printf(" --memory-f32 0 (default): if true (= 1) disable f16 memory.\n");
printf(" --no-mmap 0 (default): if true use mmap for faster loads.\n");
printf(" --mlock 0 (default): if true keep model in memory.\n");
printf(" --use-color 0 (default): use color to distinguish generations from inputs\n");
printf(" --nprobs N if > 0 output the probabilities of the top N tokens\n");
printf(" --alias model alias (default: 'unknown')\n");
printf(" --infill 0 (defaut) use infill mode\n");
printf(" --prompt-file name of external prompt file\n");
printf("\n");
}

Expand Down
25 changes: 13 additions & 12 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@ int32_t get_num_physical_cores();

struct gpt_params {
uint32_t seed = -1; // RNG seed
int32_t n_threads = get_num_physical_cores();
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
int32_t n_threads = get_num_physical_cores(); // user-defined or num of internal physical cores
int32_t n_threads_batch = -1; // num threads for batch proc (-1 = use n_threads)
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_batch = 512; // batch size for prompt proc (>=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_draft = 16; // number of tokens to draft during speculative decoding
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
int32_t n_parallel = 1; // number of parallel sequences to decode
int32_t n_sequences = 1; // number of sequences to decode
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
int32_t n_gpu_layers = -1; // num layers stored in VRAM (-1 for default)
int32_t n_gpu_layers_draft = -1; // num layers stored in VRAM for draft mod (-1 for default)
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
int32_t n_probs = 0; // if > 0, output probabilities of top n_probs tokens.
int32_t n_beams = 0; // if non-zero then use beam search of given width.
float rope_freq_base = 0.0f; // RoPE base frequency
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
Expand All @@ -61,7 +61,7 @@ struct gpt_params {
float typical_p = 1.00f; // 1.0 = disabled
float temp = 0.80f; // 1.0 = disabled
float repeat_penalty = 1.10f; // 1.0 = disabled
int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable, -1 = cxt size)
float frequency_penalty = 0.00f; // 0.0 = disabled
float presence_penalty = 0.00f; // 0.0 = disabled
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
Expand All @@ -75,10 +75,11 @@ struct gpt_params {
std::string cfg_negative_prompt; // string to help guidance
float cfg_scale = 1.f; // How strong is guidance

std::string help = ""; // universal help parameter
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
std::string model_draft = ""; // draft model for speculative decoding
std::string model_alias = "unknown"; // model alias
std::string prompt = "";
std::string prompt = ""; // user-provided single prompt
std::string prompt_file = ""; // store the external prompt file name
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
std::string input_prefix = ""; // string to prefix user inputs with
Expand All @@ -90,11 +91,11 @@ struct gpt_params {
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
std::string lora_base = ""; // base model path for the lora adapter

int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
int ppl_stride = 0; // stride for ppl calcs. 0: the pre-existing approach will be used.
int ppl_output_type = 0; // 0: ppl output as usual, 1: ppl output = num_tokens, ppl, one per line
// (which is more convenient to use for plotting)
//
bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
bool hellaswag = false; // compute HellaSwag score from datafile given in prompt
size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score

bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS
Expand All @@ -109,7 +110,7 @@ struct gpt_params {
bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
bool interactive_first = false; // wait for user input immediately
bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
bool simple_io = false; // improves compat'y with subprocs and ltd consoles
bool cont_batching = false; // insert new sequences for decoding on-the-fly

bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ else()
add_subdirectory(embd-input)
add_subdirectory(llama-bench)
add_subdirectory(beam-search)
add_subdirectory(cmap-example)
if (LLAMA_METAL)
add_subdirectory(metal)
endif()
Expand Down
5 changes: 5 additions & 0 deletions examples/cmap-example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(TARGET cmap-example)
add_executable(${TARGET} cmap-example.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
124 changes: 124 additions & 0 deletions examples/cmap-example/cmap-example.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// example of a C/C++ equivalent data structure to the python dict in readcommonh.py

#include <map>
#include <list>
#include <string>
#include <bitset>
#include <vector>
#include <cstdio>
#include <cmath>
#include <fstream>
#include <sstream>
#include <regex>
// there may be good reasons not to sort the parameters, but here we use map
#include <map>
#include <numeric>

std::vector<std::string> split_string(const std::string& str, const std::string& delimiter) {
std::vector<std::string> tokens;
std::size_t start = 0, end = 0;
bool inside_tags = false; // flag to track if we are inside "<>"

while ((end = str.find(delimiter, start)) != std::string::npos) {
std::string token = str.substr(start, end - start);

// if (!token.empty()) { // Add condition to exclude empty substrings
// tokens.push_back(token);

if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>"
tokens.push_back(token);
}
// deal with cases where the split character occurs inside <>
// Update inside_tags flag based on "<>"
size_t open_tag_pos = str.find("<", start);
size_t close_tag_pos = str.find(">", start);
if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) {
inside_tags = true;
} else if (close_tag_pos != std::string::npos && close_tag_pos < end) {
inside_tags = false;
}
start = end + delimiter.length();
}
tokens.push_back(str.substr(start));
return tokens;
}

void print_parameters(const std::map<std::string, std::vector<std::string>>& parameters) {
for (const auto& pair : parameters) {
const std::string& key = pair.first;
const std::vector<std::string>& value = pair.second; // usually has multiple elements
printf("key: %25s: values: ", key.c_str());
for (const std::string& element : value) {
printf("%s ", element.c_str());
}
printf("\n");
}
}

std::map<std::string, std::vector<std::string>> extract_parameters() {
std::ifstream file("common/common.h");
std::string line;
std::vector<std::string> lines;
while (std::getline(file, line)) {
lines.push_back(line);
}

std::map<std::string, std::vector<std::string>> parameters;
// fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion
// parameters["logit_bias"] = {"std::unordered_map<llama_token, float>" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
// parameters["lora_adapter"] = {"std::vector<std::tuple<std::string, float>>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"};

// are we inside gpt_params?
// this for loop finds all the params inside struct gpt-params
bool inside = false;
for (const std::string& line : lines) {
std::vector<std::string> nws_elements = split_string(line, " ");
printf("nwe = ");
for (const std::string& element : nws_elements) {
printf("%s ", element.c_str());
}
printf("\n");

if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") {
inside = true;
}

if (nws_elements.size() > 2 && inside) {
// cannot use nwe[0] as key because types do not generate unique keys and so overwrite
// Here we deliberately add back the key so we can manually change it when it is different (remove eventually)
// parameters[nws_elements[1]] = nws_elements;
std::vector<std::string> copy = nws_elements; // Create a copy of nws_elements
parameters[nws_elements[1]] = copy; // Assign the copy to parameters

// Remove spurious entry caused by eccentric status of logit_bias
if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") {
parameters.erase("float>");
}
// Remove spurious entry caused by eccentric status of lora_adapter
if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") {
parameters.erase("float>>");
}
}

// Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
if (nws_elements.size() > 2 && nws_elements[2] == "infill") {
inside = false;
break;
}
}
// now display them (unnecessary operationally; here for development)
print_parameters(parameters);

// return the results (will eventually become a void function)
return parameters;
}

int main() {

// process the code inserted to replicate readcommonh.py
// this does not produce output but here is forced; it just collects the output into parameters and returns 0
std::map<std::string, std::vector<std::string>> parameters = extract_parameters();
print_parameters(parameters);

return 0;
}
Loading