diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/__init__.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg
new file mode 100644
index 00000000..b85a4726
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg
@@ -0,0 +1,1188 @@
+
+
+
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg
new file mode 100644
index 00000000..b85a4726
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg
@@ -0,0 +1,1188 @@
+
+
+
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py
new file mode 100644
index 00000000..6961bb63
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py
@@ -0,0 +1,59 @@
+from typing import Dict, List, Type
+
+import openai
+from httpx import Timeout
+
+from model_providers.core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+
+
+class _CommonDeepseek:
+ def _to_credential_kwargs(self, credentials: dict) -> dict:
+ """
+ Transform credentials to kwargs for model instance
+
+ :param credentials:
+ :return:
+ """
+ credentials_kwargs = {
+ "api_key": credentials["api_key"],
+ "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
+ "max_retries": 1,
+ }
+
+ if "base_url" in credentials and credentials["base_url"]:
+ credentials_kwargs["base_url"] = credentials["base_url"]
+
+ return credentials_kwargs
+
+ @property
+ def _invoke_error_mapping(self) -> Dict[Type[InvokeError], List[Type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+ InvokeServerUnavailableError: [openai.InternalServerError],
+ InvokeRateLimitError: [openai.RateLimitError],
+ InvokeAuthorizationError: [
+ openai.AuthenticationError,
+ openai.PermissionDeniedError,
+ ],
+ InvokeBadRequestError: [
+ openai.BadRequestError,
+ openai.NotFoundError,
+ openai.UnprocessableEntityError,
+ openai.APIError,
+ ],
+ }
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py
new file mode 100644
index 00000000..7643e7f4
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py
@@ -0,0 +1,18 @@
+import logging
+
+from model_providers.core.model_runtime.model_providers.__base.model_provider import (
+ ModelProvider,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class DeepseekProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ pass
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml
new file mode 100644
index 00000000..4d77678d
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml
@@ -0,0 +1,44 @@
+provider: deepseek
+label:
+ en_US: Deepseek
+icon_large:
+ en_US: icon_l_en.svg
+icon_small:
+ en_US: icon_s_en.svg
+background: "#F9FAFB"
+help:
+ title:
+ en_US: How to integrate with Deepseek
+ zh_Hans: 如何集成 Deepseek
+ url:
+ en_US: "deepseek"
+supported_model_types:
+ - llm
+configurate_methods:
+ - customizable-model
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: base_url
+ label:
+ zh_Hans: API Base
+ en_US: API Base
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: 在此输入您的 API Base
+ en_US: Enter your API Base
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/__init__.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py
new file mode 100644
index 00000000..de99ed6c
--- /dev/null
+++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py
@@ -0,0 +1,1245 @@
+import logging
+from collections.abc import Generator
+from typing import List, Optional, Union, cast
+from decimal import Decimal
+
+import tiktoken
+from openai import OpenAI, Stream
+from openai.types import Completion
+from openai.types.chat import (
+ ChatCompletion,
+ ChatCompletionChunk,
+ ChatCompletionMessageToolCall,
+)
+from openai.types.chat.chat_completion_chunk import (
+ ChoiceDeltaFunctionCall,
+ ChoiceDeltaToolCall,
+)
+from openai.types.chat.chat_completion_message import FunctionCall
+
+from model_providers.core.model_runtime.callbacks.base_callback import Callback
+from model_providers.core.model_runtime.entities.llm_entities import (
+ LLMMode,
+ LLMResult,
+ LLMResultChunk,
+ LLMResultChunkDelta,
+)
+from model_providers.core.model_runtime.entities.message_entities import (
+ AssistantPromptMessage,
+ ImagePromptMessageContent,
+ PromptMessage,
+ PromptMessageContentType,
+ PromptMessageTool,
+ SystemPromptMessage,
+ TextPromptMessageContent,
+ ToolPromptMessage,
+ UserPromptMessage,
+)
+from model_providers.core.model_runtime.entities.model_entities import (
+ AIModelEntity,
+ FetchFrom,
+ I18nObject,
+ ModelType,
+ PriceConfig, ParameterRule, ParameterType, ModelFeature, ModelPropertyKey, DefaultParameterName,
+)
+from model_providers.core.model_runtime.errors.validate import (
+ CredentialsValidateFailedError,
+)
+from model_providers.core.model_runtime.model_providers.__base.large_language_model import (
+ LargeLanguageModel,
+)
+from model_providers.core.model_runtime.model_providers.deepseek._common import _CommonDeepseek
+
+logger = logging.getLogger(__name__)
+
+OPENAI_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+
+{{instructions}}
+
+"""
+
+
+class DeepseekLargeLanguageModel(_CommonDeepseek, LargeLanguageModel):
+ """
+ Model class for OpenAI large language model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[List[PromptMessageTool]] = None,
+ stop: Optional[List[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Invoke large language model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param tools: tools for tool calling
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+ # handle fine tune remote models
+ base_model = model
+ if model.startswith("ft:"):
+ base_model = model.split(":")[1]
+
+ # get model mode
+ model_mode = self.get_model_mode(base_model, credentials)
+
+ if model_mode == LLMMode.CHAT:
+ # chat model
+ return self._chat_generate(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ )
+ else:
+ # text completion model
+ return self._generate(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ stop=stop,
+ stream=stream,
+ user=user,
+ )
+
+ def _code_block_mode_wrapper(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[List[PromptMessageTool]] = None,
+ stop: Optional[List[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ callbacks: List[Callback] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Code block mode wrapper for invoking large language model
+ """
+ # handle fine tune remote models
+ base_model = model
+ if model.startswith("ft:"):
+ base_model = model.split(":")[1]
+
+ # get model mode
+ model_mode = self.get_model_mode(base_model, credentials)
+
+ # transform response format
+ if "response_format" in model_parameters and model_parameters[
+ "response_format"
+ ] in ["JSON", "XML"]:
+ stop = stop or []
+ if model_mode == LLMMode.CHAT:
+ # chat model
+ self._transform_chat_json_prompts(
+ model=base_model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ response_format=model_parameters["response_format"],
+ )
+ else:
+ self._transform_completion_json_prompts(
+ model=base_model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ response_format=model_parameters["response_format"],
+ )
+ model_parameters.pop("response_format")
+
+ return self._invoke(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ )
+
+ def _transform_chat_json_prompts(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ tools: Union[List[PromptMessageTool], None] = None,
+ stop: Union[List[str], None] = None,
+ stream: bool = True,
+ user: Union[str, None] = None,
+ response_format: str = "JSON",
+ ) -> None:
+ """
+ Transform json prompts
+ """
+ if "```\n" not in stop:
+ stop.append("```\n")
+ if "\n```" not in stop:
+ stop.append("\n```")
+
+ # check if there is a system message
+ if len(prompt_messages) > 0 and isinstance(
+ prompt_messages[0], SystemPromptMessage
+ ):
+ # override the system message
+ prompt_messages[0] = SystemPromptMessage(
+ content=OPENAI_BLOCK_MODE_PROMPT.replace(
+ "{{instructions}}", prompt_messages[0].content
+ ).replace("{{block}}", response_format)
+ )
+ prompt_messages.append(
+ AssistantPromptMessage(content=f"\n```{response_format}\n")
+ )
+ else:
+ # insert the system message
+ prompt_messages.insert(
+ 0,
+ SystemPromptMessage(
+ content=OPENAI_BLOCK_MODE_PROMPT.replace(
+ "{{instructions}}",
+ f"Please output a valid {response_format} object.",
+ ).replace("{{block}}", response_format)
+ ),
+ )
+ prompt_messages.append(
+ AssistantPromptMessage(content=f"\n```{response_format}")
+ )
+
+ def _transform_completion_json_prompts(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ tools: Union[List[PromptMessageTool], None] = None,
+ stop: Union[List[str], None] = None,
+ stream: bool = True,
+ user: Union[str, None] = None,
+ response_format: str = "JSON",
+ ) -> None:
+ """
+ Transform json prompts
+ """
+ if "```\n" not in stop:
+ stop.append("```\n")
+ if "\n```" not in stop:
+ stop.append("\n```")
+
+ # override the last user message
+ user_message = None
+ for i in range(len(prompt_messages) - 1, -1, -1):
+ if isinstance(prompt_messages[i], UserPromptMessage):
+ user_message = prompt_messages[i]
+ break
+
+ if user_message:
+ if prompt_messages[i].content[-11:] == "Assistant: ":
+ # now we are in the chat app, remove the last assistant message
+ prompt_messages[i].content = prompt_messages[i].content[:-11]
+ prompt_messages[i] = UserPromptMessage(
+ content=OPENAI_BLOCK_MODE_PROMPT.replace(
+ "{{instructions}}", user_message.content
+ ).replace("{{block}}", response_format)
+ )
+ prompt_messages[i].content += f"Assistant:\n```{response_format}\n"
+ else:
+ prompt_messages[i] = UserPromptMessage(
+ content=OPENAI_BLOCK_MODE_PROMPT.replace(
+ "{{instructions}}", user_message.content
+ ).replace("{{block}}", response_format)
+ )
+ prompt_messages[i].content += f"\n```{response_format}\n"
+
+ def get_num_tokens(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ tools: Optional[List[PromptMessageTool]] = None,
+ ) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return:
+ """
+ # handle fine tune remote models
+ if model.startswith("ft:"):
+ base_model = model.split(":")[1]
+ else:
+ base_model = model
+
+ # get model mode
+ model_mode = self.get_model_mode(model)
+
+ if model_mode == LLMMode.CHAT:
+ # chat model
+ return self._num_tokens_from_messages(base_model, prompt_messages, tools)
+ else:
+ # text completion model, do not support tool calling
+ return self._num_tokens_from_string(base_model, prompt_messages[0].content)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ # handle fine tune remote models
+ base_model = model
+ # fine-tuned model name likes ft:gpt-3.5-turbo-0613:personal::xxxxx
+ if model.startswith("ft:"):
+ base_model = model.split(":")[1]
+
+ # check if model exists
+ remote_models = self.remote_models(credentials)
+ remote_model_map = {model.model: model for model in remote_models}
+ if model not in remote_model_map:
+ raise CredentialsValidateFailedError(
+ f"Fine-tuned model {model} not found"
+ )
+
+ # get model mode
+ model_mode = self.get_model_mode(base_model, credentials)
+
+ if model_mode == LLMMode.CHAT:
+ # chat model
+ client.chat.completions.create(
+ messages=[{"role": "user", "content": "ping"}],
+ model=model,
+ temperature=0,
+ max_tokens=20,
+ stream=False,
+ )
+ else:
+ # text completion model
+ client.completions.create(
+ prompt="ping",
+ model=model,
+ temperature=0,
+ max_tokens=20,
+ stream=False,
+ )
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ def remote_models(self, credentials: dict) -> List[AIModelEntity]:
+ """
+ Return remote models if credentials are provided.
+
+ :param credentials: provider credentials
+ :return:
+ """
+ # get predefined models
+ predefined_models = self.predefined_models()
+ predefined_models_map = {model.model: model for model in predefined_models}
+
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ # get all remote models
+ remote_models = client.models.list()
+
+ fine_tune_models = [
+ model for model in remote_models if model.id.startswith("ft:")
+ ]
+
+ ai_model_entities = []
+ for model in fine_tune_models:
+ base_model = model.id.split(":")[1]
+
+ base_model_schema = None
+ for (
+ predefined_model_name,
+ predefined_model,
+ ) in predefined_models_map.items():
+ if predefined_model_name in base_model:
+ base_model_schema = predefined_model
+
+ if not base_model_schema:
+ continue
+
+ ai_model_entity = AIModelEntity(
+ model=model.id,
+ label=I18nObject(zh_Hans=model.id, en_US=model.id),
+ model_type=ModelType.LLM,
+ features=base_model_schema.features,
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties=base_model_schema.model_properties,
+ parameter_rules=base_model_schema.parameter_rules,
+ pricing=PriceConfig(
+ input=0.003, output=0.006, unit=0.001, currency="USD"
+ ),
+ )
+
+ ai_model_entities.append(ai_model_entity)
+
+ return ai_model_entities
+
+ def _generate(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ stop: Optional[List[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Invoke llm completion model
+
+ :param model: model name
+ :param credentials: credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+
+ # init model client
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+
+ if stop:
+ extra_model_kwargs["stop"] = stop
+
+ if user:
+ extra_model_kwargs["user"] = user
+
+ # text completion model
+ response = client.completions.create(
+ prompt=prompt_messages[0].content,
+ model=model,
+ stream=stream,
+ **model_parameters,
+ **extra_model_kwargs,
+ )
+
+ if stream:
+ return self._handle_generate_stream_response(
+ model, credentials, response, prompt_messages
+ )
+
+ return self._handle_generate_response(
+ model, credentials, response, prompt_messages
+ )
+
+ def _handle_generate_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: Completion,
+ prompt_messages: List[PromptMessage],
+ ) -> LLMResult:
+ """
+ Handle llm completion response
+
+ :param model: model name
+ :param credentials: model credentials
+ :param response: response
+ :param prompt_messages: prompt messages
+ :return: llm result
+ """
+ assistant_text = response.choices[0].text
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(content=assistant_text)
+
+ # calculate num tokens
+ if response.usage:
+ # transform usage
+ prompt_tokens = response.usage.prompt_tokens
+ completion_tokens = response.usage.completion_tokens
+ else:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_string(
+ model, prompt_messages[0].content
+ )
+ completion_tokens = self._num_tokens_from_string(model, assistant_text)
+
+ # transform usage
+ usage = self._calc_response_usage(
+ model, credentials, prompt_tokens, completion_tokens
+ )
+
+ # transform response
+ result = LLMResult(
+ model=response.model,
+ prompt_messages=prompt_messages,
+ message=assistant_prompt_message,
+ usage=usage,
+ system_fingerprint=response.system_fingerprint,
+ )
+
+ return result
+
+ def _handle_generate_stream_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: Stream[Completion],
+ prompt_messages: List[PromptMessage],
+ ) -> Generator:
+ """
+ Handle llm completion stream response
+
+ :param model: model name
+ :param credentials: model credentials
+ :param response: response
+ :param prompt_messages: prompt messages
+ :return: llm response chunk generator result
+ """
+ full_text = ""
+ for chunk in response:
+ if len(chunk.choices) == 0:
+ continue
+
+ delta = chunk.choices[0]
+
+ if delta.finish_reason is None and (delta.text is None or delta.text == ""):
+ continue
+
+ # transform assistant message to prompt message
+ text = delta.text if delta.text else ""
+ assistant_prompt_message = AssistantPromptMessage(content=text)
+
+ full_text += text
+
+ if delta.finish_reason is not None:
+ # calculate num tokens
+ if chunk.usage:
+ # transform usage
+ prompt_tokens = chunk.usage.prompt_tokens
+ completion_tokens = chunk.usage.completion_tokens
+ else:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_string(
+ model, prompt_messages[0].content
+ )
+ completion_tokens = self._num_tokens_from_string(model, full_text)
+
+ # transform usage
+ usage = self._calc_response_usage(
+ model, credentials, prompt_tokens, completion_tokens
+ )
+
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ finish_reason=delta.finish_reason,
+ usage=usage,
+ ),
+ )
+ else:
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ ),
+ )
+
+ def _chat_generate(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: List[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[List[PromptMessageTool]] = None,
+ stop: Optional[List[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ """
+ Invoke llm chat model
+
+ :param model: model name
+ :param credentials: credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param tools: tools for tool calling
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+
+ # init model client
+ client = OpenAI(**credentials_kwargs)
+
+ response_format = model_parameters.get("response_format")
+ if response_format:
+ if response_format == "json_object":
+ response_format = {"type": "json_object"}
+ else:
+ response_format = {"type": "text"}
+
+ model_parameters["response_format"] = response_format
+
+ extra_model_kwargs = {}
+
+ if tools:
+ # extra_model_kwargs['tools'] = [helper.dump_model(PromptMessageFunction(function=tool)) for tool in tools]
+ extra_model_kwargs["functions"] = [
+ {
+ "name": tool.name,
+ "description": tool.description,
+ "parameters": tool.parameters,
+ }
+ for tool in tools
+ ]
+
+ if stop:
+ extra_model_kwargs["stop"] = stop
+
+ if user:
+ extra_model_kwargs["user"] = user
+
+ # chat model
+ response = client.chat.completions.create(
+ messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+ model=model,
+ stream=stream,
+ **model_parameters,
+ **extra_model_kwargs,
+ )
+
+ if stream:
+ return self._handle_chat_generate_stream_response(
+ model, credentials, response, prompt_messages, tools
+ )
+
+ return self._handle_chat_generate_response(
+ model, credentials, response, prompt_messages, tools
+ )
+
+ def _handle_chat_generate_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: ChatCompletion,
+ prompt_messages: List[PromptMessage],
+ tools: Optional[List[PromptMessageTool]] = None,
+ ) -> LLMResult:
+ """
+ Handle llm chat response
+
+ :param model: model name
+ :param credentials: credentials
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response
+ """
+ assistant_message = response.choices[0].message
+ # assistant_message_tool_calls = assistant_message.tool_calls
+ assistant_message_function_call = assistant_message.function_call
+
+ # extract tool calls from response
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(
+ assistant_message_function_call
+ )
+ tool_calls = [function_call] if function_call else []
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(
+ content=assistant_message.content, tool_calls=tool_calls
+ )
+
+ # calculate num tokens
+ if response.usage:
+ # transform usage
+ prompt_tokens = response.usage.prompt_tokens
+ completion_tokens = response.usage.completion_tokens
+ else:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_messages(
+ model, prompt_messages, tools
+ )
+ completion_tokens = self._num_tokens_from_messages(
+ model, [assistant_prompt_message]
+ )
+
+ # transform usage
+ usage = self._calc_response_usage(
+ model, credentials, prompt_tokens, completion_tokens
+ )
+
+ # transform response
+ response = LLMResult(
+ model=response.model,
+ prompt_messages=prompt_messages,
+ message=assistant_prompt_message,
+ usage=usage,
+ system_fingerprint=response.system_fingerprint,
+ )
+
+ return response
+
+ def _handle_chat_generate_stream_response(
+ self,
+ model: str,
+ credentials: dict,
+ response: Stream[ChatCompletionChunk],
+ prompt_messages: List[PromptMessage],
+ tools: Optional[List[PromptMessageTool]] = None,
+ ) -> Generator:
+ """
+ Handle llm chat stream response
+
+ :param model: model name
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response chunk generator
+ """
+ full_assistant_content = ""
+ delta_assistant_message_function_call_storage: ChoiceDeltaFunctionCall = None
+ for chunk in response:
+ if len(chunk.choices) == 0:
+ continue
+
+ delta = chunk.choices[0]
+ has_finish_reason = delta.finish_reason is not None
+
+ if (
+ not has_finish_reason
+ and (delta.delta.content is None or delta.delta.content == "")
+ and delta.delta.function_call is None
+ ):
+ continue
+
+ # assistant_message_tool_calls = delta.delta.tool_calls
+ assistant_message_function_call = delta.delta.function_call
+
+ # extract tool calls from response
+ if delta_assistant_message_function_call_storage is not None:
+ # handle process of stream function call
+ if assistant_message_function_call:
+ # message has not ended ever
+ delta_assistant_message_function_call_storage.arguments += (
+ assistant_message_function_call.arguments
+ )
+ continue
+ else:
+ # message has ended
+ assistant_message_function_call = (
+ delta_assistant_message_function_call_storage
+ )
+ delta_assistant_message_function_call_storage = None
+ else:
+ if assistant_message_function_call:
+ # start of stream function call
+ delta_assistant_message_function_call_storage = (
+ assistant_message_function_call
+ )
+ if not has_finish_reason:
+ continue
+
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(
+ assistant_message_function_call
+ )
+ tool_calls = [function_call] if function_call else []
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(
+ content=delta.delta.content if delta.delta.content else "",
+ tool_calls=tool_calls,
+ )
+
+ full_assistant_content += delta.delta.content if delta.delta.content else ""
+
+ if has_finish_reason:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_messages(
+ model, prompt_messages, tools
+ )
+
+ full_assistant_prompt_message = AssistantPromptMessage(
+ content=full_assistant_content, tool_calls=tool_calls
+ )
+ completion_tokens = self._num_tokens_from_messages(
+ model, [full_assistant_prompt_message]
+ )
+
+ # transform usage
+ usage = self._calc_response_usage(
+ model, credentials, prompt_tokens, completion_tokens
+ )
+
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ finish_reason=delta.finish_reason,
+ usage=usage,
+ ),
+ )
+ else:
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ ),
+ )
+
+ def _extract_response_tool_calls(
+ self,
+ response_tool_calls: List[
+ Union[ChatCompletionMessageToolCall, ChoiceDeltaToolCall]
+ ],
+ ) -> List[AssistantPromptMessage.ToolCall]:
+ """
+ Extract tool calls from response
+
+ :param response_tool_calls: response tool calls
+ :return: list of tool calls
+ """
+ tool_calls = []
+ if response_tool_calls:
+ for response_tool_call in response_tool_calls:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_tool_call.function.name,
+ arguments=response_tool_call.function.arguments,
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_tool_call.id,
+ type=response_tool_call.type,
+ function=function,
+ )
+ tool_calls.append(tool_call)
+
+ return tool_calls
+
+ def _extract_response_function_call(
+ self, response_function_call: Union[FunctionCall, ChoiceDeltaFunctionCall]
+ ) -> AssistantPromptMessage.ToolCall:
+ """
+ Extract function call from response
+
+ :param response_function_call: response function call
+ :return: tool call
+ """
+ tool_call = None
+ if response_function_call:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_function_call.name,
+ arguments=response_function_call.arguments,
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_function_call.name, type="function", function=function
+ )
+
+ return tool_call
+
+ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+ """
+ Convert PromptMessage to dict for OpenAI API
+ """
+ if isinstance(message, UserPromptMessage):
+ message = cast(UserPromptMessage, message)
+ if isinstance(message.content, str):
+ message_dict = {"role": "user", "content": message.content}
+ else:
+ sub_messages = []
+ for message_content in message.content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(
+ TextPromptMessageContent, message_content
+ )
+ sub_message_dict = {
+ "type": "text",
+ "text": message_content.data,
+ }
+ sub_messages.append(sub_message_dict)
+ elif message_content.type == PromptMessageContentType.IMAGE:
+ message_content = cast(
+ ImagePromptMessageContent, message_content
+ )
+ sub_message_dict = {
+ "type": "image_url",
+ "image_url": {
+ "url": message_content.data,
+ "detail": message_content.detail.value,
+ },
+ }
+ sub_messages.append(sub_message_dict)
+
+ message_dict = {"role": "user", "content": sub_messages}
+ elif isinstance(message, AssistantPromptMessage):
+ message = cast(AssistantPromptMessage, message)
+ message_dict = {"role": "assistant", "content": message.content}
+ if message.tool_calls:
+ # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+ # message.tool_calls]
+ function_call = message.tool_calls[0]
+ message_dict["function_call"] = {
+ "name": function_call.function.name,
+ "arguments": function_call.function.arguments,
+ }
+ elif isinstance(message, SystemPromptMessage):
+ message = cast(SystemPromptMessage, message)
+ message_dict = {"role": "system", "content": message.content}
+ elif isinstance(message, ToolPromptMessage):
+ message = cast(ToolPromptMessage, message)
+ # message_dict = {
+ # "role": "tool",
+ # "content": message.content,
+ # "tool_call_id": message.tool_call_id
+ # }
+ message_dict = {
+ "role": "function",
+ "content": message.content,
+ "name": message.tool_call_id,
+ }
+ else:
+ raise ValueError(f"Got unknown type {message}")
+
+ if message.name:
+ message_dict["name"] = message.name
+
+ return message_dict
+
+ def _num_tokens_from_string(
+ self, model: str, text: str, tools: Optional[List[PromptMessageTool]] = None
+ ) -> int:
+ """
+ Calculate num tokens for text completion model with tiktoken package.
+
+ :param model: model name
+ :param text: prompt text
+ :param tools: tools for tool calling
+ :return: number of tokens
+ """
+ try:
+ encoding = tiktoken.encoding_for_model(model)
+ except KeyError:
+ encoding = tiktoken.get_encoding("cl100k_base")
+
+ num_tokens = len(encoding.encode(text))
+
+ if tools:
+ num_tokens += self._num_tokens_for_tools(encoding, tools)
+
+ return num_tokens
+
+ def _num_tokens_from_messages(
+ self,
+ model: str,
+ messages: List[PromptMessage],
+ tools: Optional[List[PromptMessageTool]] = None,
+ ) -> int:
+ """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
+
+ Official documentation: https://github.com/openai/openai-cookbook/blob/
+ main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
+ if model.startswith("ft:"):
+ model = model.split(":")[1]
+
+ try:
+ encoding = tiktoken.encoding_for_model(model)
+ except KeyError:
+ logger.warning("Warning: model not found. Using cl100k_base encoding.")
+ model = "cl100k_base"
+ encoding = tiktoken.get_encoding(model)
+
+ if model.startswith("gpt-3.5-turbo-0301"):
+ # every message follows {role/name}\n{content}\n
+ tokens_per_message = 4
+ # if there's a name, the role is omitted
+ tokens_per_name = -1
+ elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
+ tokens_per_message = 3
+ tokens_per_name = 1
+ else:
+ raise NotImplementedError(
+ f"get_num_tokens_from_messages() is not presently implemented "
+ f"for model {model}."
+ "See https://github.com/openai/openai-python/blob/main/chatml.md for "
+ "information on how messages are converted to tokens."
+ )
+ num_tokens = 0
+ messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
+ for message in messages_dict:
+ num_tokens += tokens_per_message
+ for key, value in message.items():
+ # Cast str(value) in case the message value is not a string
+ # This occurs with function messages
+ # TODO: The current token calculation method for the image type is not implemented,
+ # which need to download the image and then get the resolution for calculation,
+ # and will increase the request delay
+ if isinstance(value, list):
+ text = ""
+ for item in value:
+ if isinstance(item, dict) and item["type"] == "text":
+ text += item["text"]
+
+ value = text
+
+ if key == "tool_calls":
+ for tool_call in value:
+ for t_key, t_value in tool_call.items():
+ num_tokens += len(encoding.encode(t_key))
+ if t_key == "function":
+ for f_key, f_value in t_value.items():
+ num_tokens += len(encoding.encode(f_key))
+ num_tokens += len(encoding.encode(f_value))
+ else:
+ num_tokens += len(encoding.encode(t_key))
+ num_tokens += len(encoding.encode(t_value))
+ else:
+ num_tokens += len(encoding.encode(str(value)))
+
+ if key == "name":
+ num_tokens += tokens_per_name
+
+ # every reply is primed with assistant
+ num_tokens += 3
+
+ if tools:
+ num_tokens += self._num_tokens_for_tools(encoding, tools)
+
+ return num_tokens
+
+ def _num_tokens_for_tools(
+ self, encoding: tiktoken.Encoding, tools: List[PromptMessageTool]
+ ) -> int:
+ """
+ Calculate num tokens for tool calling with tiktoken package.
+
+ :param encoding: encoding
+ :param tools: tools for tool calling
+ :return: number of tokens
+ """
+ num_tokens = 0
+ for tool in tools:
+ num_tokens += len(encoding.encode("type"))
+ num_tokens += len(encoding.encode("function"))
+
+ # calculate num tokens for function object
+ num_tokens += len(encoding.encode("name"))
+ num_tokens += len(encoding.encode(tool.name))
+ num_tokens += len(encoding.encode("description"))
+ num_tokens += len(encoding.encode(tool.description))
+ parameters = tool.parameters
+ num_tokens += len(encoding.encode("parameters"))
+ if "title" in parameters:
+ num_tokens += len(encoding.encode("title"))
+ num_tokens += len(encoding.encode(parameters.get("title")))
+ num_tokens += len(encoding.encode("type"))
+ num_tokens += len(encoding.encode(parameters.get("type")))
+ if "properties" in parameters:
+ num_tokens += len(encoding.encode("properties"))
+ for key, value in parameters.get("properties").items():
+ num_tokens += len(encoding.encode(key))
+ for field_key, field_value in value.items():
+ num_tokens += len(encoding.encode(field_key))
+ if field_key == "enum":
+ for enum_field in field_value:
+ num_tokens += 3
+ num_tokens += len(encoding.encode(enum_field))
+ else:
+ num_tokens += len(encoding.encode(field_key))
+ num_tokens += len(encoding.encode(str(field_value)))
+ if "required" in parameters:
+ num_tokens += len(encoding.encode("required"))
+ for required_field in parameters["required"]:
+ num_tokens += 3
+ num_tokens += len(encoding.encode(required_field))
+
+ return num_tokens
+
+ def get_customizable_model_schema(
+ self, model: str, credentials: dict
+ ) -> AIModelEntity:
+ """
+ Get customizable model schema.
+
+ :param model: model name
+ :param credentials: credentials
+
+ :return: model schema
+ """
+ extras = {}
+
+
+ entity = AIModelEntity(
+ model=model,
+ label=I18nObject(zh_Hans=model, en_US=model),
+ model_type=ModelType.LLM,
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={
+ ModelPropertyKey.MODE: credentials.get("mode"),
+ ModelPropertyKey.CONTEXT_SIZE: int(
+ credentials.get("context_size", 4096)
+ ),
+ },
+ parameter_rules=[
+ ParameterRule(
+ name=DefaultParameterName.TEMPERATURE.value,
+ use_template=DefaultParameterName.TEMPERATURE.value,
+ label=I18nObject(en_US="Temperature"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="The temperature of the model. "
+ "Increasing the temperature will make the model answer "
+ "more creatively. (Default: 0.8)"
+ ),
+ default=0.8,
+ min=0,
+ max=2,
+ ),
+ ParameterRule(
+ name=DefaultParameterName.TOP_P.value,
+ use_template=DefaultParameterName.TOP_P.value,
+ label=I18nObject(en_US="Top P"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to "
+ "more diverse text, while a lower value (e.g., 0.5) will generate more "
+ "focused and conservative text. (Default: 0.9)"
+ ),
+ default=0.9,
+ min=0,
+ max=1,
+ ),
+ ParameterRule(
+ name="frequency_penalty",
+ label=I18nObject(en_US="frequency_penalty"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="A number between -2.0 and 2.0. If positive, ",
+ zh_Hans="介于 -2.0 和 2.0 之间的数字。如果该值为正,"
+ "那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,"
+ "降低模型重复相同内容的可能性"
+ ),
+ default=0,
+ min=-2,
+ max=2,
+ ),
+ ParameterRule(
+ name="presence_penalty",
+ label=I18nObject(en_US="presence_penalty"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Sets how strongly to presence_penalty. ",
+ zh_Hans="介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其是否已在已有文本中出现受到相应的惩罚,从而增加模型谈论新主题的可能性。"
+ ),
+ default=1.1,
+ min=-2,
+ max=2,
+ ),
+ ParameterRule(
+ name="max_tokens",
+ use_template="max_tokens",
+ label=I18nObject(en_US="max_tokens"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Maximum number of tokens to predict when generating text. ",
+ zh_Hans="限制一次请求中模型生成 completion 的最大 token 数。"
+ "输入 token 和输出 token 的总长度受模型的上下文长度的限制。"
+ ),
+ default=128,
+ min=-2,
+ max=int(credentials.get("max_tokens", 4096)),
+ ),
+ ParameterRule(
+ name="logprobs",
+ label=I18nObject(en_US="logprobs"),
+ type=ParameterType.BOOLEAN,
+ help=I18nObject(
+ en_US="Whether to return the log probabilities of the tokens. ",
+ zh_Hans="是否返回所输出 token 的对数概率。如果为 true,则在 message 的 content 中返回每个输出 token 的对数概率。"
+ ),
+ ),
+ ParameterRule(
+ name="top_logprobs",
+ label=I18nObject(en_US="top_logprobs"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="the format to return a response in.",
+ zh_Hans="一个介于 0 到 20 之间的整数 N,指定每个输出位置返回输出概率 top N 的 token,"
+ "且返回这些 token 的对数概率。指定此参数时,logprobs 必须为 true。"
+ ),
+ default=0,
+ min=0,
+ max=20,
+ ),
+ ],
+ pricing=PriceConfig(
+ input=Decimal(credentials.get("input_price", 0)),
+ output=Decimal(credentials.get("output_price", 0)),
+ unit=Decimal(credentials.get("unit", 0)),
+ currency=credentials.get("currency", "USD"),
+ ),
+ **extras,
+ )
+
+ return entity
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py b/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py
index c910ac97..ff3ba911 100644
--- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py
+++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py
@@ -27,9 +27,8 @@ class _CommonOllama:
"max_retries": 1,
}
- if "openai_api_base" in credentials and credentials["openai_api_base"]:
- credentials["openai_api_base"] = credentials["openai_api_base"].rstrip("/")
- credentials_kwargs["base_url"] = credentials["openai_api_base"] + "/v1"
+ if "base_url" in credentials and credentials["base_url"]:
+ credentials_kwargs["base_url"] = credentials["base_url"]
return credentials_kwargs
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py b/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py
index 0848f410..1792fa80 100644
--- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -1,6 +1,7 @@
import logging
from collections.abc import Generator
from typing import List, Optional, Union, cast
+from decimal import Decimal
import tiktoken
from openai import OpenAI, Stream
@@ -39,7 +40,7 @@ from model_providers.core.model_runtime.entities.model_entities import (
FetchFrom,
I18nObject,
ModelType,
- PriceConfig,
+ PriceConfig, ModelFeature, ModelPropertyKey, DefaultParameterName, ParameterRule, ParameterType,
)
from model_providers.core.model_runtime.errors.validate import (
CredentialsValidateFailedError,
@@ -1116,47 +1117,223 @@ class OllamaLargeLanguageModel(_CommonOllama, LargeLanguageModel):
return num_tokens
def get_customizable_model_schema(
- self, model: str, credentials: dict
+ self, model: str, credentials: dict
) -> AIModelEntity:
"""
- OpenAI supports fine-tuning of their models. This method returns the schema of the base model
- but renamed to the fine-tuned model name.
+ Get customizable model schema.
:param model: model name
:param credentials: credentials
:return: model schema
"""
- if not model.startswith("ft:"):
- base_model = model
- else:
- # get base_model
- base_model = model.split(":")[1]
+ extras = {}
- # get model schema
- models = self.predefined_models()
- model_map = {model.model: model for model in models}
- if base_model not in model_map:
- raise ValueError(f"Base model {base_model} not found")
-
- base_model_schema = model_map[base_model]
-
- base_model_schema_features = base_model_schema.features or []
- base_model_schema_model_properties = base_model_schema.model_properties or {}
- base_model_schema_parameters_rules = base_model_schema.parameter_rules or []
+ if "vision_support" in credentials and credentials["vision_support"] == "true":
+ extras["features"] = [ModelFeature.VISION]
entity = AIModelEntity(
model=model,
label=I18nObject(zh_Hans=model, en_US=model),
model_type=ModelType.LLM,
- features=[feature for feature in base_model_schema_features],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
- key: property
- for key, property in base_model_schema_model_properties.items()
+ ModelPropertyKey.MODE: credentials.get("mode"),
+ ModelPropertyKey.CONTEXT_SIZE: int(
+ credentials.get("context_size", 4096)
+ ),
},
- parameter_rules=[rule for rule in base_model_schema_parameters_rules],
- pricing=base_model_schema.pricing,
+ parameter_rules=[
+ ParameterRule(
+ name=DefaultParameterName.TEMPERATURE.value,
+ use_template=DefaultParameterName.TEMPERATURE.value,
+ label=I18nObject(en_US="Temperature"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="The temperature of the model. "
+ "Increasing the temperature will make the model answer "
+ "more creatively. (Default: 0.8)"
+ ),
+ default=0.8,
+ min=0,
+ max=2,
+ ),
+ ParameterRule(
+ name=DefaultParameterName.TOP_P.value,
+ use_template=DefaultParameterName.TOP_P.value,
+ label=I18nObject(en_US="Top P"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to "
+ "more diverse text, while a lower value (e.g., 0.5) will generate more "
+ "focused and conservative text. (Default: 0.9)"
+ ),
+ default=0.9,
+ min=0,
+ max=1,
+ ),
+ ParameterRule(
+ name="top_k",
+ label=I18nObject(en_US="Top K"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Reduces the probability of generating nonsense. "
+ "A higher value (e.g. 100) will give more diverse answers, "
+ "while a lower value (e.g. 10) will be more conservative. (Default: 40)"
+ ),
+ default=40,
+ min=1,
+ max=100,
+ ),
+ ParameterRule(
+ name="repeat_penalty",
+ label=I18nObject(en_US="Repeat Penalty"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Sets how strongly to penalize repetitions. "
+ "A higher value (e.g., 1.5) will penalize repetitions more strongly, "
+ "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"
+ ),
+ default=1.1,
+ min=-2,
+ max=2,
+ ),
+ ParameterRule(
+ name="num_predict",
+ use_template="max_tokens",
+ label=I18nObject(en_US="Num Predict"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Maximum number of tokens to predict when generating text. "
+ "(Default: 128, -1 = infinite generation, -2 = fill context)"
+ ),
+ default=128,
+ min=-2,
+ max=int(credentials.get("max_tokens", 4096)),
+ ),
+ ParameterRule(
+ name="mirostat",
+ label=I18nObject(en_US="Mirostat sampling"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Enable Mirostat sampling for controlling perplexity. "
+ "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"
+ ),
+ default=0,
+ min=0,
+ max=2,
+ ),
+ ParameterRule(
+ name="mirostat_eta",
+ label=I18nObject(en_US="Mirostat Eta"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Influences how quickly the algorithm responds to feedback from "
+ "the generated text. A lower learning rate will result in slower adjustments, "
+ "while a higher learning rate will make the algorithm more responsive. "
+ "(Default: 0.1)"
+ ),
+ default=0.1,
+ precision=1,
+ ),
+ ParameterRule(
+ name="mirostat_tau",
+ label=I18nObject(en_US="Mirostat Tau"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Controls the balance between coherence and diversity of the output. "
+ "A lower value will result in more focused and coherent text. (Default: 5.0)"
+ ),
+ default=5.0,
+ precision=1,
+ ),
+ ParameterRule(
+ name="num_ctx",
+ label=I18nObject(en_US="Size of context window"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Sets the size of the context window used to generate the next token. "
+ "(Default: 2048)"
+ ),
+ default=2048,
+ min=1,
+ ),
+ ParameterRule(
+ name="num_gpu",
+ label=I18nObject(en_US="Num GPU"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="The number of layers to send to the GPU(s). "
+ "On macOS it defaults to 1 to enable metal support, 0 to disable."
+ ),
+ default=1,
+ min=0,
+ max=1,
+ ),
+ ParameterRule(
+ name="num_thread",
+ label=I18nObject(en_US="Num Thread"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Sets the number of threads to use during computation. "
+ "By default, Ollama will detect this for optimal performance. "
+ "It is recommended to set this value to the number of physical CPU cores "
+ "your system has (as opposed to the logical number of cores)."
+ ),
+ min=1,
+ ),
+ ParameterRule(
+ name="repeat_last_n",
+ label=I18nObject(en_US="Repeat last N"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Sets how far back for the model to look back to prevent repetition. "
+ "(Default: 64, 0 = disabled, -1 = num_ctx)"
+ ),
+ default=64,
+ min=-1,
+ ),
+ ParameterRule(
+ name="tfs_z",
+ label=I18nObject(en_US="TFS Z"),
+ type=ParameterType.FLOAT,
+ help=I18nObject(
+ en_US="Tail free sampling is used to reduce the impact of less probable tokens "
+ "from the output. A higher value (e.g., 2.0) will reduce the impact more, "
+ "while a value of 1.0 disables this setting. (default: 1)"
+ ),
+ default=1,
+ precision=1,
+ ),
+ ParameterRule(
+ name="seed",
+ label=I18nObject(en_US="Seed"),
+ type=ParameterType.INT,
+ help=I18nObject(
+ en_US="Sets the random number seed to use for generation. Setting this to "
+ "a specific number will make the model generate the same text for "
+ "the same prompt. (Default: 0)"
+ ),
+ default=0,
+ ),
+ ParameterRule(
+ name="format",
+ label=I18nObject(en_US="Format"),
+ type=ParameterType.STRING,
+ help=I18nObject(
+ en_US="the format to return a response in."
+ " Currently the only accepted value is json."
+ ),
+ options=["json"],
+ ),
+ ],
+ pricing=PriceConfig(
+ input=Decimal(credentials.get("input_price", 0)),
+ output=Decimal(credentials.get("output_price", 0)),
+ unit=Decimal(credentials.get("unit", 0)),
+ currency=credentials.get("currency", "USD"),
+ ),
+ **extras,
)
return entity
diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml b/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml
index d69c4ca9..67edb22d 100644
--- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml
+++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml
@@ -27,7 +27,7 @@ model_credential_schema:
zh_Hans: 输入模型名称
credential_form_schemas:
- - variable: openai_api_base
+ - variable: base_url
label:
zh_Hans: API Base
en_US: API Base
@@ -35,4 +35,4 @@ model_credential_schema:
required: false
placeholder:
zh_Hans: 在此输入您的 API Base
- en_US: Enter your API Base
\ No newline at end of file
+ en_US: Enter your API Base
diff --git a/model-providers/tests/deepseek_providers_test/model_providers.yaml b/model-providers/tests/deepseek_providers_test/model_providers.yaml
new file mode 100644
index 00000000..c157cd40
--- /dev/null
+++ b/model-providers/tests/deepseek_providers_test/model_providers.yaml
@@ -0,0 +1,11 @@
+
+deepseek:
+ model_credential:
+ - model: 'deepseek-chat'
+ model_type: 'llm'
+ model_credentials:
+ base_url: 'https://api.deepseek.com'
+ api_key: 'sk-dcb625fcbc1e497d80b7b9493b51d758'
+
+
+
diff --git a/model-providers/tests/deepseek_providers_test/test_deepseek_service.py b/model-providers/tests/deepseek_providers_test/test_deepseek_service.py
new file mode 100644
index 00000000..a529ecc7
--- /dev/null
+++ b/model-providers/tests/deepseek_providers_test/test_deepseek_service.py
@@ -0,0 +1,21 @@
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+import pytest
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.requires("openai")
+def test_llm(init_server: str):
+ llm = ChatOpenAI(model_name="deepseek-chat", openai_api_key="sk-dcb625fcbc1e497d80b7b9493b51d758", openai_api_base=f"{init_server}/deepseek/v1")
+ template = """Question: {question}
+
+ Answer: Let's think step by step."""
+
+ prompt = PromptTemplate.from_template(template)
+
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
+ responses = llm_chain.run("你好")
+ logger.info("\033[1;32m" + f"llm_chain: {responses}" + "\033[0m")
diff --git a/model-providers/tests/ollama_providers_test/model_providers.yaml b/model-providers/tests/ollama_providers_test/model_providers.yaml
index 9ef23a0d..a1d2278e 100644
--- a/model-providers/tests/ollama_providers_test/model_providers.yaml
+++ b/model-providers/tests/ollama_providers_test/model_providers.yaml
@@ -4,7 +4,7 @@ ollama:
- model: 'llama3'
model_type: 'llm'
model_credentials:
- openai_api_base: 'http://172.21.80.1:11434'
+ base_url: 'http://172.21.80.1:11434/v1'
diff --git a/model-providers/tests/unit_tests/deepseek/model_providers.yaml b/model-providers/tests/unit_tests/deepseek/model_providers.yaml
new file mode 100644
index 00000000..c157cd40
--- /dev/null
+++ b/model-providers/tests/unit_tests/deepseek/model_providers.yaml
@@ -0,0 +1,11 @@
+
+deepseek:
+ model_credential:
+ - model: 'deepseek-chat'
+ model_type: 'llm'
+ model_credentials:
+ base_url: 'https://api.deepseek.com'
+ api_key: 'sk-dcb625fcbc1e497d80b7b9493b51d758'
+
+
+
diff --git a/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py b/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py
new file mode 100644
index 00000000..109fd37b
--- /dev/null
+++ b/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py
@@ -0,0 +1,39 @@
+import asyncio
+import logging
+
+import pytest
+from omegaconf import OmegaConf
+
+from model_providers import BootstrapWebBuilder, _to_custom_provide_configuration
+from model_providers.core.model_manager import ModelManager
+from model_providers.core.model_runtime.entities.model_entities import ModelType
+from model_providers.core.provider_manager import ProviderManager
+
+logger = logging.getLogger(__name__)
+
+
+def test_ollama_provider_manager_models(logging_conf: dict, providers_file: str) -> None:
+ logging.config.dictConfig(logging_conf) # type: ignore
+ # 读取配置文件
+ cfg = OmegaConf.load(
+ providers_file
+ )
+ # 转换配置文件
+ (
+ provider_name_to_provider_records_dict,
+ provider_name_to_provider_model_records_dict,
+ ) = _to_custom_provide_configuration(cfg)
+ # 创建模型管理器
+ provider_manager = ProviderManager(
+ provider_name_to_provider_records_dict=provider_name_to_provider_records_dict,
+ provider_name_to_provider_model_records_dict=provider_name_to_provider_model_records_dict,
+ )
+
+ provider_model_bundle_llm = provider_manager.get_provider_model_bundle(
+ provider="deepseek", model_type=ModelType.LLM
+ )
+ predefined_models = (
+ provider_model_bundle_llm.model_type_instance.predefined_models()
+ )
+
+ logger.info(f"predefined_models: {predefined_models}")
diff --git a/model-providers/tests/unit_tests/ollama/model_providers.yaml b/model-providers/tests/unit_tests/ollama/model_providers.yaml
index 9ef23a0d..a1d2278e 100644
--- a/model-providers/tests/unit_tests/ollama/model_providers.yaml
+++ b/model-providers/tests/unit_tests/ollama/model_providers.yaml
@@ -4,7 +4,7 @@ ollama:
- model: 'llama3'
model_type: 'llm'
model_credentials:
- openai_api_base: 'http://172.21.80.1:11434'
+ base_url: 'http://172.21.80.1:11434/v1'