Skip to content

Commit

Permalink
[Prometheus] track requested model (#5774)
Browse files Browse the repository at this point in the history
* enforce prometheus as enterprise feature

* show correct error on prometheus metric when not enrterprise user

* docs promethues metrics enforced

* track requested model on prometheus

* docs prom metrics

* fix prom tracking failures
  • Loading branch information
ishaan-jaff committed Sep 18, 2024
1 parent 5aad3e6 commit a109853
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 6 deletions.
9 changes: 9 additions & 0 deletions docs/my-website/docs/proxy/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys)

Use this for LLM API Error monitoring and tracking remaining rate limits and token limits

#### Labels Tracked for LLM API Metrics
```json
litellm_model_name: The name of the LLM model used by LiteLLM
requested_model: The model sent in the request
model_id: The model_id of the deployment. Autogenerated by LiteLLM, each deployment has a unique model_id
api_base: The API Base of the deployment
api_provider: The LLM API provider, used for the provider. Example (azure, openai, vertex_ai)
```

| Metric Name | Description |
|----------------------|--------------------------------------|
`litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
Expand Down
13 changes: 9 additions & 4 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,17 @@ def __init__(
self.litellm_deployment_success_responses = Counter(
name="litellm_deployment_success_responses",
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
labelnames=_logged_llm_labels,
labelnames=["requested_model"] + _logged_llm_labels,
)
self.litellm_deployment_failure_responses = Counter(
name="litellm_deployment_failure_responses",
documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
labelnames=_logged_llm_labels + ["exception_status"],
labelnames=["requested_model", "exception_status"] + _logged_llm_labels,
)
self.litellm_deployment_total_requests = Counter(
name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
labelnames=_logged_llm_labels,
labelnames=["requested_model"] + _logged_llm_labels,
)

# Deployment Latency tracking
Expand Down Expand Up @@ -440,6 +440,7 @@ def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
_metadata = _litellm_params.get("metadata", {})
litellm_model_name = request_kwargs.get("model", None)
api_base = _metadata.get("api_base", None)
model_group = _metadata.get("model_group", None)
if api_base is None:
api_base = _litellm_params.get("api_base", None)
llm_provider = _litellm_params.get("custom_llm_provider", None)
Expand All @@ -465,13 +466,15 @@ def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
api_base=api_base,
api_provider=llm_provider,
exception_status=exception_status_code,
requested_model=model_group,
).inc()

self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
requested_model=model_group,
).inc()

pass
Expand Down Expand Up @@ -534,7 +537,7 @@ def set_llm_deployment_success_metrics(

"""
log these labels
["litellm_model_name", "model_id", "api_base", "api_provider"]
["litellm_model_name", "requested_model", model_id", "api_base", "api_provider"]
"""
self.set_deployment_healthy(
litellm_model_name=litellm_model_name,
Expand All @@ -548,13 +551,15 @@ def set_llm_deployment_success_metrics(
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
requested_model=model_group,
).inc()

self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
requested_model=model_group,
).inc()

# Track deployment Latency
Expand Down
2 changes: 1 addition & 1 deletion litellm/proxy/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,7 @@ class CommonProxyErrors(str, enum.Enum):
db_not_connected_error = "DB not connected"
no_llm_router = "No models configured on proxy"
not_allowed_access = "Admin-only endpoint. Not allowed to access this."
not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat. \nPricing: https://www.litellm.ai/#pricing"


class SpendCalculateRequest(LiteLLMBase):
Expand Down
4 changes: 3 additions & 1 deletion litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ model_list:
general_settings:
master_key: sk-1234



litellm_settings:
success_callback: ["gcs_bucket"]


0 comments on commit a109853

Please sign in to comment.