Skip to content

Commit

Permalink
server: avoid loading the system prompt twice
Browse files Browse the repository at this point in the history
Signed-off-by: Jared Van Bortel <[email protected]>
  • Loading branch information
cebtenzzre committed Aug 29, 2024
1 parent 9bc60f4 commit 1efbe4e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
21 changes: 14 additions & 7 deletions gpt4all-chat/src/chatllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
// and what the type and name of that model is. I've tried to comment extensively in this method
// to provide an overview of what we're doing here.

// We're already loaded with this model
if (isModelLoaded() && this->modelInfo() == modelInfo)
return true;
if (isModelLoaded() && this->modelInfo() == modelInfo) {
// already acquired -> keep it and reset
resetContext();
return true; // already loaded
}

// reset status
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
Expand Down Expand Up @@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
emit modelInfoChanged(modelInfo);
}

void ChatLLM::acquireModel() {
void ChatLLM::acquireModel()
{
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
emit loadedModelInfoChanged();
}

void ChatLLM::resetModel() {
void ChatLLM::resetModel()
{
m_llModelInfo = {};
emit loadedModelInfoChanged();
}

void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
{
m_shouldBeLoaded = true;
loadModel(modelInfo);
// ignore attempts to switch to the same model twice
if (!isModelLoaded() || this->modelInfo() != modelInfo) {
m_shouldBeLoaded = true;
loadModel(modelInfo);
}
}

bool ChatLLM::handlePrompt(int32_t token)
Expand Down
8 changes: 4 additions & 4 deletions gpt4all-chat/src/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,14 +361,14 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
if (modelInfo.filename().isEmpty()) {
std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
} else if (!loadModel(modelInfo)) {
}

// NB: this resets the context, regardless of whether this model is already loaded
if (!loadModel(modelInfo)) {
std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
}

// don't remember any context
resetContext();

const QString promptTemplate = modelInfo.promptTemplate();
const float top_k = modelInfo.topK();
const int n_batch = modelInfo.promptBatchSize();
Expand Down

0 comments on commit 1efbe4e

Please sign in to comment.