diff --git a/README.md b/README.md index c39ec362..2e8333c0 100644 --- a/README.md +++ b/README.md @@ -637,13 +637,17 @@ curl -sX POST 'https://langchain.wolf.jina.ai/api/run' \ ## Frequently Asked Questions -- [My client that connects to the App gets timed-out, what should I do?](#my-client-that-connects-to-the-app-gets-timed-out-what-should-I-do) +- [My client that connects to the JCloud hosted App gets timed-out, what should I do?](#my-client-that-connects-to-the-jcloud-hosted-app-gets-timed-out-what-should-I-do) - [JCloud deployment failed at pushing image to Jina Hubble, what should I do?](#jcloud-deployment-failed-at-pushing-image-to-jina-hubble-what-should-i-di) - [Debug babyagi playground request/response for external integration](#debug-babyagi-playground-requestresponse-for-external-integration) -### My client that connects to the App gets timed-out, what should I do? +### My client that connects to the JCloud hosted App gets timed-out, what should I do? -If you make long HTTP requests, you may experience timeouts due to limitations in the OSS we used in `langchain-serve`. While we are working to permanently address this issue, we recommend using HTTP/1.1 in your client as a temporary workaround. +If you make long HTTP/ WebSocket requests, the default timeout value (2 minutes) might not be suitable for your use case. You can provide a custom timeout value during JCloud deployment by using the `--timeout` argument. + +Additionally, for HTTP, you may also experience timeouts due to limitations in the OSS we used in `langchain-serve`. While we are working to permanently address this issue, we recommend using HTTP/1.1 in your client as a temporary workaround. + +For WebSocket, please note that the connection will be closed if idle for more than 5 minutes. ### JCloud deployment failed at pushing image to Jina Hubble, what should I do? diff --git a/lcserve/flow.py b/lcserve/flow.py index 436fa816..1bc86409 100644 --- a/lcserve/flow.py +++ b/lcserve/flow.py @@ -292,6 +292,7 @@ class Defaults: autoscale_max: int = 10 autoscale_rps: int = 10 autoscale_stable_window: int = DEFAULT_TIMEOUT + autoscale_revision_timeout: int = DEFAULT_TIMEOUT def __post_init__(self): # read from config yaml @@ -366,6 +367,7 @@ class AutoscaleConfig: max: int = Defaults.autoscale_max rps: int = Defaults.autoscale_rps stable_window: int = Defaults.autoscale_stable_window + revision_timeout: int = Defaults.autoscale_revision_timeout def to_dict(self) -> Dict: return { @@ -375,6 +377,7 @@ def to_dict(self) -> Dict: 'metric': 'rps', 'target': self.rps, 'stable_window': self.stable_window, + 'revision_timeout': self.revision_timeout, } } @@ -402,11 +405,7 @@ def get_gateway_jcloud_args( is_websocket: bool = False, timeout: int = DEFAULT_TIMEOUT, ) -> Dict: - _autoscale = AutoscaleConfig(stable_window=timeout) - - # TODO: remove this when websocket + autoscale is supported in JCloud - _timeout = 600 if is_websocket else timeout - _autoscale_args = {} if is_websocket else _autoscale.to_dict() + _autoscale = AutoscaleConfig(stable_window=timeout, revision_timeout=timeout) return { 'jcloud': { @@ -416,8 +415,8 @@ def get_gateway_jcloud_args( 'capacity': 'spot', }, 'healthcheck': False if is_websocket else True, - 'timeout': _timeout, - **_autoscale_args, + 'timeout': timeout, + **_autoscale.to_dict(), } }