From 5bd8a4ed8e13e8ede6ac98cb83cec3d24ed66feb Mon Sep 17 00:00:00 2001 From: glide-the <2533736852@qq.com> Date: Thu, 9 May 2024 18:17:43 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81deepseek=E5=AE=A2=E6=88=B7?= =?UTF-8?q?=E7=AB=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../model_providers/deepseek/__init__.py | 0 .../deepseek/_assets/icon_l_en.svg | 1188 ++++++++++++++++ .../deepseek/_assets/icon_s_en.svg | 1188 ++++++++++++++++ .../model_providers/deepseek/_common.py | 59 + .../model_providers/deepseek/deepseek.py | 18 + .../model_providers/deepseek/deepseek.yaml | 44 + .../model_providers/deepseek/llm/__init__.py | 0 .../model_providers/deepseek/llm/llm.py | 1245 +++++++++++++++++ .../model_providers/ollama/_common.py | 5 +- .../model_providers/ollama/llm/llm.py | 227 ++- .../model_providers/ollama/ollama.yaml | 4 +- .../model_providers.yaml | 11 + .../test_deepseek_service.py | 21 + .../model_providers.yaml | 2 +- .../unit_tests/deepseek/model_providers.yaml | 11 + .../deepseek/test_provider_manager_models.py | 39 + .../unit_tests/ollama/model_providers.yaml | 2 +- 17 files changed, 4032 insertions(+), 32 deletions(-) create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/__init__.py create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/__init__.py create mode 100644 model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py create mode 100644 model-providers/tests/deepseek_providers_test/model_providers.yaml create mode 100644 model-providers/tests/deepseek_providers_test/test_deepseek_service.py create mode 100644 model-providers/tests/unit_tests/deepseek/model_providers.yaml create mode 100644 model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/__init__.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg new file mode 100644 index 00000000..b85a4726 --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_l_en.svg @@ -0,0 +1,1188 @@ + + + + diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg new file mode 100644 index 00000000..b85a4726 --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_assets/icon_s_en.svg @@ -0,0 +1,1188 @@ + + + + diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py new file mode 100644 index 00000000..6961bb63 --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/_common.py @@ -0,0 +1,59 @@ +from typing import Dict, List, Type + +import openai +from httpx import Timeout + +from model_providers.core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class _CommonDeepseek: + def _to_credential_kwargs(self, credentials: dict) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = { + "api_key": credentials["api_key"], + "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0), + "max_retries": 1, + } + + if "base_url" in credentials and credentials["base_url"]: + credentials_kwargs["base_url"] = credentials["base_url"] + + return credentials_kwargs + + @property + def _invoke_error_mapping(self) -> Dict[Type[InvokeError], List[Type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError], + InvokeServerUnavailableError: [openai.InternalServerError], + InvokeRateLimitError: [openai.RateLimitError], + InvokeAuthorizationError: [ + openai.AuthenticationError, + openai.PermissionDeniedError, + ], + InvokeBadRequestError: [ + openai.BadRequestError, + openai.NotFoundError, + openai.UnprocessableEntityError, + openai.APIError, + ], + } diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py new file mode 100644 index 00000000..7643e7f4 --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.py @@ -0,0 +1,18 @@ +import logging + +from model_providers.core.model_runtime.model_providers.__base.model_provider import ( + ModelProvider, +) + +logger = logging.getLogger(__name__) + + +class DeepseekProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + pass diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml new file mode 100644 index 00000000..4d77678d --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/deepseek.yaml @@ -0,0 +1,44 @@ +provider: deepseek +label: + en_US: Deepseek +icon_large: + en_US: icon_l_en.svg +icon_small: + en_US: icon_s_en.svg +background: "#F9FAFB" +help: + title: + en_US: How to integrate with Deepseek + zh_Hans: 如何集成 Deepseek + url: + en_US: "deepseek" +supported_model_types: + - llm +configurate_methods: + - customizable-model +model_credential_schema: + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter your model name + zh_Hans: 输入模型名称 + credential_form_schemas: + - variable: api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key + - variable: base_url + label: + zh_Hans: API Base + en_US: API Base + type: text-input + required: false + placeholder: + zh_Hans: 在此输入您的 API Base + en_US: Enter your API Base diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/__init__.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py new file mode 100644 index 00000000..de99ed6c --- /dev/null +++ b/model-providers/model_providers/core/model_runtime/model_providers/deepseek/llm/llm.py @@ -0,0 +1,1245 @@ +import logging +from collections.abc import Generator +from typing import List, Optional, Union, cast +from decimal import Decimal + +import tiktoken +from openai import OpenAI, Stream +from openai.types import Completion +from openai.types.chat import ( + ChatCompletion, + ChatCompletionChunk, + ChatCompletionMessageToolCall, +) +from openai.types.chat.chat_completion_chunk import ( + ChoiceDeltaFunctionCall, + ChoiceDeltaToolCall, +) +from openai.types.chat.chat_completion_message import FunctionCall + +from model_providers.core.model_runtime.callbacks.base_callback import Callback +from model_providers.core.model_runtime.entities.llm_entities import ( + LLMMode, + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, +) +from model_providers.core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessage, + PromptMessageContentType, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, +) +from model_providers.core.model_runtime.entities.model_entities import ( + AIModelEntity, + FetchFrom, + I18nObject, + ModelType, + PriceConfig, ParameterRule, ParameterType, ModelFeature, ModelPropertyKey, DefaultParameterName, +) +from model_providers.core.model_runtime.errors.validate import ( + CredentialsValidateFailedError, +) +from model_providers.core.model_runtime.model_providers.__base.large_language_model import ( + LargeLanguageModel, +) +from model_providers.core.model_runtime.model_providers.deepseek._common import _CommonDeepseek + +logger = logging.getLogger(__name__) + +OPENAI_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object. +The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure +if you are not sure about the structure. + + +{{instructions}} + +""" + + +class DeepseekLargeLanguageModel(_CommonDeepseek, LargeLanguageModel): + """ + Model class for OpenAI large language model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + tools: Optional[List[PromptMessageTool]] = None, + stop: Optional[List[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + # handle fine tune remote models + base_model = model + if model.startswith("ft:"): + base_model = model.split(":")[1] + + # get model mode + model_mode = self.get_model_mode(base_model, credentials) + + if model_mode == LLMMode.CHAT: + # chat model + return self._chat_generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + else: + # text completion model + return self._generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + stop=stop, + stream=stream, + user=user, + ) + + def _code_block_mode_wrapper( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + tools: Optional[List[PromptMessageTool]] = None, + stop: Optional[List[str]] = None, + stream: bool = True, + user: Optional[str] = None, + callbacks: List[Callback] = None, + ) -> Union[LLMResult, Generator]: + """ + Code block mode wrapper for invoking large language model + """ + # handle fine tune remote models + base_model = model + if model.startswith("ft:"): + base_model = model.split(":")[1] + + # get model mode + model_mode = self.get_model_mode(base_model, credentials) + + # transform response format + if "response_format" in model_parameters and model_parameters[ + "response_format" + ] in ["JSON", "XML"]: + stop = stop or [] + if model_mode == LLMMode.CHAT: + # chat model + self._transform_chat_json_prompts( + model=base_model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + response_format=model_parameters["response_format"], + ) + else: + self._transform_completion_json_prompts( + model=base_model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + response_format=model_parameters["response_format"], + ) + model_parameters.pop("response_format") + + return self._invoke( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def _transform_chat_json_prompts( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + tools: Union[List[PromptMessageTool], None] = None, + stop: Union[List[str], None] = None, + stream: bool = True, + user: Union[str, None] = None, + response_format: str = "JSON", + ) -> None: + """ + Transform json prompts + """ + if "```\n" not in stop: + stop.append("```\n") + if "\n```" not in stop: + stop.append("\n```") + + # check if there is a system message + if len(prompt_messages) > 0 and isinstance( + prompt_messages[0], SystemPromptMessage + ): + # override the system message + prompt_messages[0] = SystemPromptMessage( + content=OPENAI_BLOCK_MODE_PROMPT.replace( + "{{instructions}}", prompt_messages[0].content + ).replace("{{block}}", response_format) + ) + prompt_messages.append( + AssistantPromptMessage(content=f"\n```{response_format}\n") + ) + else: + # insert the system message + prompt_messages.insert( + 0, + SystemPromptMessage( + content=OPENAI_BLOCK_MODE_PROMPT.replace( + "{{instructions}}", + f"Please output a valid {response_format} object.", + ).replace("{{block}}", response_format) + ), + ) + prompt_messages.append( + AssistantPromptMessage(content=f"\n```{response_format}") + ) + + def _transform_completion_json_prompts( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + tools: Union[List[PromptMessageTool], None] = None, + stop: Union[List[str], None] = None, + stream: bool = True, + user: Union[str, None] = None, + response_format: str = "JSON", + ) -> None: + """ + Transform json prompts + """ + if "```\n" not in stop: + stop.append("```\n") + if "\n```" not in stop: + stop.append("\n```") + + # override the last user message + user_message = None + for i in range(len(prompt_messages) - 1, -1, -1): + if isinstance(prompt_messages[i], UserPromptMessage): + user_message = prompt_messages[i] + break + + if user_message: + if prompt_messages[i].content[-11:] == "Assistant: ": + # now we are in the chat app, remove the last assistant message + prompt_messages[i].content = prompt_messages[i].content[:-11] + prompt_messages[i] = UserPromptMessage( + content=OPENAI_BLOCK_MODE_PROMPT.replace( + "{{instructions}}", user_message.content + ).replace("{{block}}", response_format) + ) + prompt_messages[i].content += f"Assistant:\n```{response_format}\n" + else: + prompt_messages[i] = UserPromptMessage( + content=OPENAI_BLOCK_MODE_PROMPT.replace( + "{{instructions}}", user_message.content + ).replace("{{block}}", response_format) + ) + prompt_messages[i].content += f"\n```{response_format}\n" + + def get_num_tokens( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + tools: Optional[List[PromptMessageTool]] = None, + ) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: + """ + # handle fine tune remote models + if model.startswith("ft:"): + base_model = model.split(":")[1] + else: + base_model = model + + # get model mode + model_mode = self.get_model_mode(model) + + if model_mode == LLMMode.CHAT: + # chat model + return self._num_tokens_from_messages(base_model, prompt_messages, tools) + else: + # text completion model, do not support tool calling + return self._num_tokens_from_string(base_model, prompt_messages[0].content) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + # handle fine tune remote models + base_model = model + # fine-tuned model name likes ft:gpt-3.5-turbo-0613:personal::xxxxx + if model.startswith("ft:"): + base_model = model.split(":")[1] + + # check if model exists + remote_models = self.remote_models(credentials) + remote_model_map = {model.model: model for model in remote_models} + if model not in remote_model_map: + raise CredentialsValidateFailedError( + f"Fine-tuned model {model} not found" + ) + + # get model mode + model_mode = self.get_model_mode(base_model, credentials) + + if model_mode == LLMMode.CHAT: + # chat model + client.chat.completions.create( + messages=[{"role": "user", "content": "ping"}], + model=model, + temperature=0, + max_tokens=20, + stream=False, + ) + else: + # text completion model + client.completions.create( + prompt="ping", + model=model, + temperature=0, + max_tokens=20, + stream=False, + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def remote_models(self, credentials: dict) -> List[AIModelEntity]: + """ + Return remote models if credentials are provided. + + :param credentials: provider credentials + :return: + """ + # get predefined models + predefined_models = self.predefined_models() + predefined_models_map = {model.model: model for model in predefined_models} + + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + # get all remote models + remote_models = client.models.list() + + fine_tune_models = [ + model for model in remote_models if model.id.startswith("ft:") + ] + + ai_model_entities = [] + for model in fine_tune_models: + base_model = model.id.split(":")[1] + + base_model_schema = None + for ( + predefined_model_name, + predefined_model, + ) in predefined_models_map.items(): + if predefined_model_name in base_model: + base_model_schema = predefined_model + + if not base_model_schema: + continue + + ai_model_entity = AIModelEntity( + model=model.id, + label=I18nObject(zh_Hans=model.id, en_US=model.id), + model_type=ModelType.LLM, + features=base_model_schema.features, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties=base_model_schema.model_properties, + parameter_rules=base_model_schema.parameter_rules, + pricing=PriceConfig( + input=0.003, output=0.006, unit=0.001, currency="USD" + ), + ) + + ai_model_entities.append(ai_model_entity) + + return ai_model_entities + + def _generate( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + stop: Optional[List[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke llm completion model + + :param model: model name + :param credentials: credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + + # init model client + client = OpenAI(**credentials_kwargs) + + extra_model_kwargs = {} + + if stop: + extra_model_kwargs["stop"] = stop + + if user: + extra_model_kwargs["user"] = user + + # text completion model + response = client.completions.create( + prompt=prompt_messages[0].content, + model=model, + stream=stream, + **model_parameters, + **extra_model_kwargs, + ) + + if stream: + return self._handle_generate_stream_response( + model, credentials, response, prompt_messages + ) + + return self._handle_generate_response( + model, credentials, response, prompt_messages + ) + + def _handle_generate_response( + self, + model: str, + credentials: dict, + response: Completion, + prompt_messages: List[PromptMessage], + ) -> LLMResult: + """ + Handle llm completion response + + :param model: model name + :param credentials: model credentials + :param response: response + :param prompt_messages: prompt messages + :return: llm result + """ + assistant_text = response.choices[0].text + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage(content=assistant_text) + + # calculate num tokens + if response.usage: + # transform usage + prompt_tokens = response.usage.prompt_tokens + completion_tokens = response.usage.completion_tokens + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_string( + model, prompt_messages[0].content + ) + completion_tokens = self._num_tokens_from_string(model, assistant_text) + + # transform usage + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + + # transform response + result = LLMResult( + model=response.model, + prompt_messages=prompt_messages, + message=assistant_prompt_message, + usage=usage, + system_fingerprint=response.system_fingerprint, + ) + + return result + + def _handle_generate_stream_response( + self, + model: str, + credentials: dict, + response: Stream[Completion], + prompt_messages: List[PromptMessage], + ) -> Generator: + """ + Handle llm completion stream response + + :param model: model name + :param credentials: model credentials + :param response: response + :param prompt_messages: prompt messages + :return: llm response chunk generator result + """ + full_text = "" + for chunk in response: + if len(chunk.choices) == 0: + continue + + delta = chunk.choices[0] + + if delta.finish_reason is None and (delta.text is None or delta.text == ""): + continue + + # transform assistant message to prompt message + text = delta.text if delta.text else "" + assistant_prompt_message = AssistantPromptMessage(content=text) + + full_text += text + + if delta.finish_reason is not None: + # calculate num tokens + if chunk.usage: + # transform usage + prompt_tokens = chunk.usage.prompt_tokens + completion_tokens = chunk.usage.completion_tokens + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_string( + model, prompt_messages[0].content + ) + completion_tokens = self._num_tokens_from_string(model, full_text) + + # transform usage + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + finish_reason=delta.finish_reason, + usage=usage, + ), + ) + else: + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + ), + ) + + def _chat_generate( + self, + model: str, + credentials: dict, + prompt_messages: List[PromptMessage], + model_parameters: dict, + tools: Optional[List[PromptMessageTool]] = None, + stop: Optional[List[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke llm chat model + + :param model: model name + :param credentials: credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + + # init model client + client = OpenAI(**credentials_kwargs) + + response_format = model_parameters.get("response_format") + if response_format: + if response_format == "json_object": + response_format = {"type": "json_object"} + else: + response_format = {"type": "text"} + + model_parameters["response_format"] = response_format + + extra_model_kwargs = {} + + if tools: + # extra_model_kwargs['tools'] = [helper.dump_model(PromptMessageFunction(function=tool)) for tool in tools] + extra_model_kwargs["functions"] = [ + { + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters, + } + for tool in tools + ] + + if stop: + extra_model_kwargs["stop"] = stop + + if user: + extra_model_kwargs["user"] = user + + # chat model + response = client.chat.completions.create( + messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages], + model=model, + stream=stream, + **model_parameters, + **extra_model_kwargs, + ) + + if stream: + return self._handle_chat_generate_stream_response( + model, credentials, response, prompt_messages, tools + ) + + return self._handle_chat_generate_response( + model, credentials, response, prompt_messages, tools + ) + + def _handle_chat_generate_response( + self, + model: str, + credentials: dict, + response: ChatCompletion, + prompt_messages: List[PromptMessage], + tools: Optional[List[PromptMessageTool]] = None, + ) -> LLMResult: + """ + Handle llm chat response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response + """ + assistant_message = response.choices[0].message + # assistant_message_tool_calls = assistant_message.tool_calls + assistant_message_function_call = assistant_message.function_call + + # extract tool calls from response + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call( + assistant_message_function_call + ) + tool_calls = [function_call] if function_call else [] + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage( + content=assistant_message.content, tool_calls=tool_calls + ) + + # calculate num tokens + if response.usage: + # transform usage + prompt_tokens = response.usage.prompt_tokens + completion_tokens = response.usage.completion_tokens + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_messages( + model, prompt_messages, tools + ) + completion_tokens = self._num_tokens_from_messages( + model, [assistant_prompt_message] + ) + + # transform usage + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + + # transform response + response = LLMResult( + model=response.model, + prompt_messages=prompt_messages, + message=assistant_prompt_message, + usage=usage, + system_fingerprint=response.system_fingerprint, + ) + + return response + + def _handle_chat_generate_stream_response( + self, + model: str, + credentials: dict, + response: Stream[ChatCompletionChunk], + prompt_messages: List[PromptMessage], + tools: Optional[List[PromptMessageTool]] = None, + ) -> Generator: + """ + Handle llm chat stream response + + :param model: model name + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response chunk generator + """ + full_assistant_content = "" + delta_assistant_message_function_call_storage: ChoiceDeltaFunctionCall = None + for chunk in response: + if len(chunk.choices) == 0: + continue + + delta = chunk.choices[0] + has_finish_reason = delta.finish_reason is not None + + if ( + not has_finish_reason + and (delta.delta.content is None or delta.delta.content == "") + and delta.delta.function_call is None + ): + continue + + # assistant_message_tool_calls = delta.delta.tool_calls + assistant_message_function_call = delta.delta.function_call + + # extract tool calls from response + if delta_assistant_message_function_call_storage is not None: + # handle process of stream function call + if assistant_message_function_call: + # message has not ended ever + delta_assistant_message_function_call_storage.arguments += ( + assistant_message_function_call.arguments + ) + continue + else: + # message has ended + assistant_message_function_call = ( + delta_assistant_message_function_call_storage + ) + delta_assistant_message_function_call_storage = None + else: + if assistant_message_function_call: + # start of stream function call + delta_assistant_message_function_call_storage = ( + assistant_message_function_call + ) + if not has_finish_reason: + continue + + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call( + assistant_message_function_call + ) + tool_calls = [function_call] if function_call else [] + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage( + content=delta.delta.content if delta.delta.content else "", + tool_calls=tool_calls, + ) + + full_assistant_content += delta.delta.content if delta.delta.content else "" + + if has_finish_reason: + # calculate num tokens + prompt_tokens = self._num_tokens_from_messages( + model, prompt_messages, tools + ) + + full_assistant_prompt_message = AssistantPromptMessage( + content=full_assistant_content, tool_calls=tool_calls + ) + completion_tokens = self._num_tokens_from_messages( + model, [full_assistant_prompt_message] + ) + + # transform usage + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + finish_reason=delta.finish_reason, + usage=usage, + ), + ) + else: + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + ), + ) + + def _extract_response_tool_calls( + self, + response_tool_calls: List[ + Union[ChatCompletionMessageToolCall, ChoiceDeltaToolCall] + ], + ) -> List[AssistantPromptMessage.ToolCall]: + """ + Extract tool calls from response + + :param response_tool_calls: response tool calls + :return: list of tool calls + """ + tool_calls = [] + if response_tool_calls: + for response_tool_call in response_tool_calls: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_tool_call.function.name, + arguments=response_tool_call.function.arguments, + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_tool_call.id, + type=response_tool_call.type, + function=function, + ) + tool_calls.append(tool_call) + + return tool_calls + + def _extract_response_function_call( + self, response_function_call: Union[FunctionCall, ChoiceDeltaFunctionCall] + ) -> AssistantPromptMessage.ToolCall: + """ + Extract function call from response + + :param response_function_call: response function call + :return: tool call + """ + tool_call = None + if response_function_call: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_function_call.name, + arguments=response_function_call.arguments, + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_function_call.name, type="function", function=function + ) + + return tool_call + + def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: + """ + Convert PromptMessage to dict for OpenAI API + """ + if isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + message_dict = {"role": "user", "content": message.content} + else: + sub_messages = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + sub_message_dict = { + "type": "text", + "text": message_content.data, + } + sub_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast( + ImagePromptMessageContent, message_content + ) + sub_message_dict = { + "type": "image_url", + "image_url": { + "url": message_content.data, + "detail": message_content.detail.value, + }, + } + sub_messages.append(sub_message_dict) + + message_dict = {"role": "user", "content": sub_messages} + elif isinstance(message, AssistantPromptMessage): + message = cast(AssistantPromptMessage, message) + message_dict = {"role": "assistant", "content": message.content} + if message.tool_calls: + # message_dict["tool_calls"] = [tool_call.dict() for tool_call in + # message.tool_calls] + function_call = message.tool_calls[0] + message_dict["function_call"] = { + "name": function_call.function.name, + "arguments": function_call.function.arguments, + } + elif isinstance(message, SystemPromptMessage): + message = cast(SystemPromptMessage, message) + message_dict = {"role": "system", "content": message.content} + elif isinstance(message, ToolPromptMessage): + message = cast(ToolPromptMessage, message) + # message_dict = { + # "role": "tool", + # "content": message.content, + # "tool_call_id": message.tool_call_id + # } + message_dict = { + "role": "function", + "content": message.content, + "name": message.tool_call_id, + } + else: + raise ValueError(f"Got unknown type {message}") + + if message.name: + message_dict["name"] = message.name + + return message_dict + + def _num_tokens_from_string( + self, model: str, text: str, tools: Optional[List[PromptMessageTool]] = None + ) -> int: + """ + Calculate num tokens for text completion model with tiktoken package. + + :param model: model name + :param text: prompt text + :param tools: tools for tool calling + :return: number of tokens + """ + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") + + num_tokens = len(encoding.encode(text)) + + if tools: + num_tokens += self._num_tokens_for_tools(encoding, tools) + + return num_tokens + + def _num_tokens_from_messages( + self, + model: str, + messages: List[PromptMessage], + tools: Optional[List[PromptMessageTool]] = None, + ) -> int: + """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package. + + Official documentation: https://github.com/openai/openai-cookbook/blob/ + main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb""" + if model.startswith("ft:"): + model = model.split(":")[1] + + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + logger.warning("Warning: model not found. Using cl100k_base encoding.") + model = "cl100k_base" + encoding = tiktoken.get_encoding(model) + + if model.startswith("gpt-3.5-turbo-0301"): + # every message follows {role/name}\n{content}\n + tokens_per_message = 4 + # if there's a name, the role is omitted + tokens_per_name = -1 + elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"): + tokens_per_message = 3 + tokens_per_name = 1 + else: + raise NotImplementedError( + f"get_num_tokens_from_messages() is not presently implemented " + f"for model {model}." + "See https://github.com/openai/openai-python/blob/main/chatml.md for " + "information on how messages are converted to tokens." + ) + num_tokens = 0 + messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages] + for message in messages_dict: + num_tokens += tokens_per_message + for key, value in message.items(): + # Cast str(value) in case the message value is not a string + # This occurs with function messages + # TODO: The current token calculation method for the image type is not implemented, + # which need to download the image and then get the resolution for calculation, + # and will increase the request delay + if isinstance(value, list): + text = "" + for item in value: + if isinstance(item, dict) and item["type"] == "text": + text += item["text"] + + value = text + + if key == "tool_calls": + for tool_call in value: + for t_key, t_value in tool_call.items(): + num_tokens += len(encoding.encode(t_key)) + if t_key == "function": + for f_key, f_value in t_value.items(): + num_tokens += len(encoding.encode(f_key)) + num_tokens += len(encoding.encode(f_value)) + else: + num_tokens += len(encoding.encode(t_key)) + num_tokens += len(encoding.encode(t_value)) + else: + num_tokens += len(encoding.encode(str(value))) + + if key == "name": + num_tokens += tokens_per_name + + # every reply is primed with assistant + num_tokens += 3 + + if tools: + num_tokens += self._num_tokens_for_tools(encoding, tools) + + return num_tokens + + def _num_tokens_for_tools( + self, encoding: tiktoken.Encoding, tools: List[PromptMessageTool] + ) -> int: + """ + Calculate num tokens for tool calling with tiktoken package. + + :param encoding: encoding + :param tools: tools for tool calling + :return: number of tokens + """ + num_tokens = 0 + for tool in tools: + num_tokens += len(encoding.encode("type")) + num_tokens += len(encoding.encode("function")) + + # calculate num tokens for function object + num_tokens += len(encoding.encode("name")) + num_tokens += len(encoding.encode(tool.name)) + num_tokens += len(encoding.encode("description")) + num_tokens += len(encoding.encode(tool.description)) + parameters = tool.parameters + num_tokens += len(encoding.encode("parameters")) + if "title" in parameters: + num_tokens += len(encoding.encode("title")) + num_tokens += len(encoding.encode(parameters.get("title"))) + num_tokens += len(encoding.encode("type")) + num_tokens += len(encoding.encode(parameters.get("type"))) + if "properties" in parameters: + num_tokens += len(encoding.encode("properties")) + for key, value in parameters.get("properties").items(): + num_tokens += len(encoding.encode(key)) + for field_key, field_value in value.items(): + num_tokens += len(encoding.encode(field_key)) + if field_key == "enum": + for enum_field in field_value: + num_tokens += 3 + num_tokens += len(encoding.encode(enum_field)) + else: + num_tokens += len(encoding.encode(field_key)) + num_tokens += len(encoding.encode(str(field_value))) + if "required" in parameters: + num_tokens += len(encoding.encode("required")) + for required_field in parameters["required"]: + num_tokens += 3 + num_tokens += len(encoding.encode(required_field)) + + return num_tokens + + def get_customizable_model_schema( + self, model: str, credentials: dict + ) -> AIModelEntity: + """ + Get customizable model schema. + + :param model: model name + :param credentials: credentials + + :return: model schema + """ + extras = {} + + + entity = AIModelEntity( + model=model, + label=I18nObject(zh_Hans=model, en_US=model), + model_type=ModelType.LLM, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: credentials.get("mode"), + ModelPropertyKey.CONTEXT_SIZE: int( + credentials.get("context_size", 4096) + ), + }, + parameter_rules=[ + ParameterRule( + name=DefaultParameterName.TEMPERATURE.value, + use_template=DefaultParameterName.TEMPERATURE.value, + label=I18nObject(en_US="Temperature"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="The temperature of the model. " + "Increasing the temperature will make the model answer " + "more creatively. (Default: 0.8)" + ), + default=0.8, + min=0, + max=2, + ), + ParameterRule( + name=DefaultParameterName.TOP_P.value, + use_template=DefaultParameterName.TOP_P.value, + label=I18nObject(en_US="Top P"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to " + "more diverse text, while a lower value (e.g., 0.5) will generate more " + "focused and conservative text. (Default: 0.9)" + ), + default=0.9, + min=0, + max=1, + ), + ParameterRule( + name="frequency_penalty", + label=I18nObject(en_US="frequency_penalty"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="A number between -2.0 and 2.0. If positive, ", + zh_Hans="介于 -2.0 和 2.0 之间的数字。如果该值为正," + "那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚," + "降低模型重复相同内容的可能性" + ), + default=0, + min=-2, + max=2, + ), + ParameterRule( + name="presence_penalty", + label=I18nObject(en_US="presence_penalty"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Sets how strongly to presence_penalty. ", + zh_Hans="介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其是否已在已有文本中出现受到相应的惩罚,从而增加模型谈论新主题的可能性。" + ), + default=1.1, + min=-2, + max=2, + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + label=I18nObject(en_US="max_tokens"), + type=ParameterType.INT, + help=I18nObject( + en_US="Maximum number of tokens to predict when generating text. ", + zh_Hans="限制一次请求中模型生成 completion 的最大 token 数。" + "输入 token 和输出 token 的总长度受模型的上下文长度的限制。" + ), + default=128, + min=-2, + max=int(credentials.get("max_tokens", 4096)), + ), + ParameterRule( + name="logprobs", + label=I18nObject(en_US="logprobs"), + type=ParameterType.BOOLEAN, + help=I18nObject( + en_US="Whether to return the log probabilities of the tokens. ", + zh_Hans="是否返回所输出 token 的对数概率。如果为 true,则在 message 的 content 中返回每个输出 token 的对数概率。" + ), + ), + ParameterRule( + name="top_logprobs", + label=I18nObject(en_US="top_logprobs"), + type=ParameterType.INT, + help=I18nObject( + en_US="the format to return a response in.", + zh_Hans="一个介于 0 到 20 之间的整数 N,指定每个输出位置返回输出概率 top N 的 token," + "且返回这些 token 的对数概率。指定此参数时,logprobs 必须为 true。" + ), + default=0, + min=0, + max=20, + ), + ], + pricing=PriceConfig( + input=Decimal(credentials.get("input_price", 0)), + output=Decimal(credentials.get("output_price", 0)), + unit=Decimal(credentials.get("unit", 0)), + currency=credentials.get("currency", "USD"), + ), + **extras, + ) + + return entity diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py b/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py index c910ac97..ff3ba911 100644 --- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py +++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/_common.py @@ -27,9 +27,8 @@ class _CommonOllama: "max_retries": 1, } - if "openai_api_base" in credentials and credentials["openai_api_base"]: - credentials["openai_api_base"] = credentials["openai_api_base"].rstrip("/") - credentials_kwargs["base_url"] = credentials["openai_api_base"] + "/v1" + if "base_url" in credentials and credentials["base_url"]: + credentials_kwargs["base_url"] = credentials["base_url"] return credentials_kwargs diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py b/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py index 0848f410..1792fa80 100644 --- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/llm/llm.py @@ -1,6 +1,7 @@ import logging from collections.abc import Generator from typing import List, Optional, Union, cast +from decimal import Decimal import tiktoken from openai import OpenAI, Stream @@ -39,7 +40,7 @@ from model_providers.core.model_runtime.entities.model_entities import ( FetchFrom, I18nObject, ModelType, - PriceConfig, + PriceConfig, ModelFeature, ModelPropertyKey, DefaultParameterName, ParameterRule, ParameterType, ) from model_providers.core.model_runtime.errors.validate import ( CredentialsValidateFailedError, @@ -1116,47 +1117,223 @@ class OllamaLargeLanguageModel(_CommonOllama, LargeLanguageModel): return num_tokens def get_customizable_model_schema( - self, model: str, credentials: dict + self, model: str, credentials: dict ) -> AIModelEntity: """ - OpenAI supports fine-tuning of their models. This method returns the schema of the base model - but renamed to the fine-tuned model name. + Get customizable model schema. :param model: model name :param credentials: credentials :return: model schema """ - if not model.startswith("ft:"): - base_model = model - else: - # get base_model - base_model = model.split(":")[1] + extras = {} - # get model schema - models = self.predefined_models() - model_map = {model.model: model for model in models} - if base_model not in model_map: - raise ValueError(f"Base model {base_model} not found") - - base_model_schema = model_map[base_model] - - base_model_schema_features = base_model_schema.features or [] - base_model_schema_model_properties = base_model_schema.model_properties or {} - base_model_schema_parameters_rules = base_model_schema.parameter_rules or [] + if "vision_support" in credentials and credentials["vision_support"] == "true": + extras["features"] = [ModelFeature.VISION] entity = AIModelEntity( model=model, label=I18nObject(zh_Hans=model, en_US=model), model_type=ModelType.LLM, - features=[feature for feature in base_model_schema_features], fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, model_properties={ - key: property - for key, property in base_model_schema_model_properties.items() + ModelPropertyKey.MODE: credentials.get("mode"), + ModelPropertyKey.CONTEXT_SIZE: int( + credentials.get("context_size", 4096) + ), }, - parameter_rules=[rule for rule in base_model_schema_parameters_rules], - pricing=base_model_schema.pricing, + parameter_rules=[ + ParameterRule( + name=DefaultParameterName.TEMPERATURE.value, + use_template=DefaultParameterName.TEMPERATURE.value, + label=I18nObject(en_US="Temperature"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="The temperature of the model. " + "Increasing the temperature will make the model answer " + "more creatively. (Default: 0.8)" + ), + default=0.8, + min=0, + max=2, + ), + ParameterRule( + name=DefaultParameterName.TOP_P.value, + use_template=DefaultParameterName.TOP_P.value, + label=I18nObject(en_US="Top P"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to " + "more diverse text, while a lower value (e.g., 0.5) will generate more " + "focused and conservative text. (Default: 0.9)" + ), + default=0.9, + min=0, + max=1, + ), + ParameterRule( + name="top_k", + label=I18nObject(en_US="Top K"), + type=ParameterType.INT, + help=I18nObject( + en_US="Reduces the probability of generating nonsense. " + "A higher value (e.g. 100) will give more diverse answers, " + "while a lower value (e.g. 10) will be more conservative. (Default: 40)" + ), + default=40, + min=1, + max=100, + ), + ParameterRule( + name="repeat_penalty", + label=I18nObject(en_US="Repeat Penalty"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Sets how strongly to penalize repetitions. " + "A higher value (e.g., 1.5) will penalize repetitions more strongly, " + "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)" + ), + default=1.1, + min=-2, + max=2, + ), + ParameterRule( + name="num_predict", + use_template="max_tokens", + label=I18nObject(en_US="Num Predict"), + type=ParameterType.INT, + help=I18nObject( + en_US="Maximum number of tokens to predict when generating text. " + "(Default: 128, -1 = infinite generation, -2 = fill context)" + ), + default=128, + min=-2, + max=int(credentials.get("max_tokens", 4096)), + ), + ParameterRule( + name="mirostat", + label=I18nObject(en_US="Mirostat sampling"), + type=ParameterType.INT, + help=I18nObject( + en_US="Enable Mirostat sampling for controlling perplexity. " + "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)" + ), + default=0, + min=0, + max=2, + ), + ParameterRule( + name="mirostat_eta", + label=I18nObject(en_US="Mirostat Eta"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Influences how quickly the algorithm responds to feedback from " + "the generated text. A lower learning rate will result in slower adjustments, " + "while a higher learning rate will make the algorithm more responsive. " + "(Default: 0.1)" + ), + default=0.1, + precision=1, + ), + ParameterRule( + name="mirostat_tau", + label=I18nObject(en_US="Mirostat Tau"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Controls the balance between coherence and diversity of the output. " + "A lower value will result in more focused and coherent text. (Default: 5.0)" + ), + default=5.0, + precision=1, + ), + ParameterRule( + name="num_ctx", + label=I18nObject(en_US="Size of context window"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the size of the context window used to generate the next token. " + "(Default: 2048)" + ), + default=2048, + min=1, + ), + ParameterRule( + name="num_gpu", + label=I18nObject(en_US="Num GPU"), + type=ParameterType.INT, + help=I18nObject( + en_US="The number of layers to send to the GPU(s). " + "On macOS it defaults to 1 to enable metal support, 0 to disable." + ), + default=1, + min=0, + max=1, + ), + ParameterRule( + name="num_thread", + label=I18nObject(en_US="Num Thread"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the number of threads to use during computation. " + "By default, Ollama will detect this for optimal performance. " + "It is recommended to set this value to the number of physical CPU cores " + "your system has (as opposed to the logical number of cores)." + ), + min=1, + ), + ParameterRule( + name="repeat_last_n", + label=I18nObject(en_US="Repeat last N"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets how far back for the model to look back to prevent repetition. " + "(Default: 64, 0 = disabled, -1 = num_ctx)" + ), + default=64, + min=-1, + ), + ParameterRule( + name="tfs_z", + label=I18nObject(en_US="TFS Z"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Tail free sampling is used to reduce the impact of less probable tokens " + "from the output. A higher value (e.g., 2.0) will reduce the impact more, " + "while a value of 1.0 disables this setting. (default: 1)" + ), + default=1, + precision=1, + ), + ParameterRule( + name="seed", + label=I18nObject(en_US="Seed"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the random number seed to use for generation. Setting this to " + "a specific number will make the model generate the same text for " + "the same prompt. (Default: 0)" + ), + default=0, + ), + ParameterRule( + name="format", + label=I18nObject(en_US="Format"), + type=ParameterType.STRING, + help=I18nObject( + en_US="the format to return a response in." + " Currently the only accepted value is json." + ), + options=["json"], + ), + ], + pricing=PriceConfig( + input=Decimal(credentials.get("input_price", 0)), + output=Decimal(credentials.get("output_price", 0)), + unit=Decimal(credentials.get("unit", 0)), + currency=credentials.get("currency", "USD"), + ), + **extras, ) return entity diff --git a/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml b/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml index d69c4ca9..67edb22d 100644 --- a/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml +++ b/model-providers/model_providers/core/model_runtime/model_providers/ollama/ollama.yaml @@ -27,7 +27,7 @@ model_credential_schema: zh_Hans: 输入模型名称 credential_form_schemas: - - variable: openai_api_base + - variable: base_url label: zh_Hans: API Base en_US: API Base @@ -35,4 +35,4 @@ model_credential_schema: required: false placeholder: zh_Hans: 在此输入您的 API Base - en_US: Enter your API Base \ No newline at end of file + en_US: Enter your API Base diff --git a/model-providers/tests/deepseek_providers_test/model_providers.yaml b/model-providers/tests/deepseek_providers_test/model_providers.yaml new file mode 100644 index 00000000..c157cd40 --- /dev/null +++ b/model-providers/tests/deepseek_providers_test/model_providers.yaml @@ -0,0 +1,11 @@ + +deepseek: + model_credential: + - model: 'deepseek-chat' + model_type: 'llm' + model_credentials: + base_url: 'https://api.deepseek.com' + api_key: 'sk-dcb625fcbc1e497d80b7b9493b51d758' + + + diff --git a/model-providers/tests/deepseek_providers_test/test_deepseek_service.py b/model-providers/tests/deepseek_providers_test/test_deepseek_service.py new file mode 100644 index 00000000..a529ecc7 --- /dev/null +++ b/model-providers/tests/deepseek_providers_test/test_deepseek_service.py @@ -0,0 +1,21 @@ +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +import pytest +import logging + +logger = logging.getLogger(__name__) + + +@pytest.mark.requires("openai") +def test_llm(init_server: str): + llm = ChatOpenAI(model_name="deepseek-chat", openai_api_key="sk-dcb625fcbc1e497d80b7b9493b51d758", openai_api_base=f"{init_server}/deepseek/v1") + template = """Question: {question} + + Answer: Let's think step by step.""" + + prompt = PromptTemplate.from_template(template) + + llm_chain = LLMChain(prompt=prompt, llm=llm) + responses = llm_chain.run("你好") + logger.info("\033[1;32m" + f"llm_chain: {responses}" + "\033[0m") diff --git a/model-providers/tests/ollama_providers_test/model_providers.yaml b/model-providers/tests/ollama_providers_test/model_providers.yaml index 9ef23a0d..a1d2278e 100644 --- a/model-providers/tests/ollama_providers_test/model_providers.yaml +++ b/model-providers/tests/ollama_providers_test/model_providers.yaml @@ -4,7 +4,7 @@ ollama: - model: 'llama3' model_type: 'llm' model_credentials: - openai_api_base: 'http://172.21.80.1:11434' + base_url: 'http://172.21.80.1:11434/v1' diff --git a/model-providers/tests/unit_tests/deepseek/model_providers.yaml b/model-providers/tests/unit_tests/deepseek/model_providers.yaml new file mode 100644 index 00000000..c157cd40 --- /dev/null +++ b/model-providers/tests/unit_tests/deepseek/model_providers.yaml @@ -0,0 +1,11 @@ + +deepseek: + model_credential: + - model: 'deepseek-chat' + model_type: 'llm' + model_credentials: + base_url: 'https://api.deepseek.com' + api_key: 'sk-dcb625fcbc1e497d80b7b9493b51d758' + + + diff --git a/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py b/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py new file mode 100644 index 00000000..109fd37b --- /dev/null +++ b/model-providers/tests/unit_tests/deepseek/test_provider_manager_models.py @@ -0,0 +1,39 @@ +import asyncio +import logging + +import pytest +from omegaconf import OmegaConf + +from model_providers import BootstrapWebBuilder, _to_custom_provide_configuration +from model_providers.core.model_manager import ModelManager +from model_providers.core.model_runtime.entities.model_entities import ModelType +from model_providers.core.provider_manager import ProviderManager + +logger = logging.getLogger(__name__) + + +def test_ollama_provider_manager_models(logging_conf: dict, providers_file: str) -> None: + logging.config.dictConfig(logging_conf) # type: ignore + # 读取配置文件 + cfg = OmegaConf.load( + providers_file + ) + # 转换配置文件 + ( + provider_name_to_provider_records_dict, + provider_name_to_provider_model_records_dict, + ) = _to_custom_provide_configuration(cfg) + # 创建模型管理器 + provider_manager = ProviderManager( + provider_name_to_provider_records_dict=provider_name_to_provider_records_dict, + provider_name_to_provider_model_records_dict=provider_name_to_provider_model_records_dict, + ) + + provider_model_bundle_llm = provider_manager.get_provider_model_bundle( + provider="deepseek", model_type=ModelType.LLM + ) + predefined_models = ( + provider_model_bundle_llm.model_type_instance.predefined_models() + ) + + logger.info(f"predefined_models: {predefined_models}") diff --git a/model-providers/tests/unit_tests/ollama/model_providers.yaml b/model-providers/tests/unit_tests/ollama/model_providers.yaml index 9ef23a0d..a1d2278e 100644 --- a/model-providers/tests/unit_tests/ollama/model_providers.yaml +++ b/model-providers/tests/unit_tests/ollama/model_providers.yaml @@ -4,7 +4,7 @@ ollama: - model: 'llama3' model_type: 'llm' model_credentials: - openai_api_base: 'http://172.21.80.1:11434' + base_url: 'http://172.21.80.1:11434/v1'