server: avoid loading the system prompt twice

Signed-off-by: Jared Van Bortel <[email protected]>
nomic-ai · Aug 29, 2024 · 1efbe4e · 1efbe4e
1 parent 9bc60f4
commit 1efbe4e
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 11 deletions.
diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
@@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
     // and what the type and name of that model is. I've tried to comment extensively in this method
     // to provide an overview of what we're doing here.
 
-    // We're already loaded with this model
-    if (isModelLoaded() && this->modelInfo() == modelInfo)
-        return true;
+    if (isModelLoaded() && this->modelInfo() == modelInfo) {
+        // already acquired -> keep it and reset
+        resetContext();
+        return true; // already loaded
+    }
 
     // reset status
     emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
@@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
     emit modelInfoChanged(modelInfo);
 }
 
-void ChatLLM::acquireModel() {
+void ChatLLM::acquireModel()
+{
     m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
     emit loadedModelInfoChanged();
 }
 
-void ChatLLM::resetModel() {
+void ChatLLM::resetModel()
+{
     m_llModelInfo = {};
     emit loadedModelInfoChanged();
 }
 
 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
-    m_shouldBeLoaded = true;
-    loadModel(modelInfo);
+    // ignore attempts to switch to the same model twice
+    if (!isModelLoaded() || this->modelInfo() != modelInfo) {
+        m_shouldBeLoaded = true;
+        loadModel(modelInfo);
+    }
 }
 
 bool ChatLLM::handlePrompt(int32_t token)

diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp
@@ -361,14 +361,14 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
     if (modelInfo.filename().isEmpty()) {
         std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
         return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
-    } else if (!loadModel(modelInfo)) {
+    }
+
+    // NB: this resets the context, regardless of whether this model is already loaded
+    if (!loadModel(modelInfo)) {
         std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
         return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
     }
 
-    // don't remember any context
-    resetContext();
-
     const QString promptTemplate    = modelInfo.promptTemplate();
     const float top_k               = modelInfo.topK();
     const int n_batch               = modelInfo.promptBatchSize();