From af9a838e240bb0f7385bc33fb18ce246427ca2f7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:04:59 +0000 Subject: [PATCH] chore: go live (#5) --- .github/workflows/ci.yml | 4 +- README.md | 4 +- bin/check-release-environment | 4 +- bin/check-test-server | 0 bin/test | 0 examples/chat_completion.py | 14 +- examples/chat_completion_async.py | 16 +- examples/chat_completion_async_streaming.py | 15 +- examples/chat_completion_stop.py | 16 +- examples/chat_completion_streaming.py | 14 +- src/groq/_streaming.py | 4 - src/groq/resources/chat/completions.py | 214 +----------------- src/groq/types/chat/chat_completion.py | 16 +- .../types/chat/completion_create_params.py | 16 +- tests/api_resources/chat/test_completions.py | 124 ++++++++-- 15 files changed, 158 insertions(+), 303 deletions(-) mode change 100644 => 100755 bin/check-test-server mode change 100644 => 100755 bin/test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebfa5ce..aea1868 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,10 @@ name: CI on: push: branches: - - main + - stainless pull_request: branches: - - main + - stainless jobs: lint: diff --git a/README.md b/README.md index 0c14ce0..5c0abdd 100644 --- a/README.md +++ b/README.md @@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.id) ``` -These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object. +These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` diff --git a/bin/check-release-environment b/bin/check-release-environment index 29306d8..e35a371 100644 --- a/bin/check-release-environment +++ b/bin/check-release-environment @@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") fi -len=${#errors[@]} +lenErrors=${#errors[@]} -if [[ len -gt 0 ]]; then +if [[ lenErrors -gt 0 ]]; then echo -e "Found the following errors in the release environment:\n" for error in "${errors[@]}"; do diff --git a/bin/check-test-server b/bin/check-test-server old mode 100644 new mode 100755 diff --git a/bin/test b/bin/test old mode 100644 new mode 100755 diff --git a/examples/chat_completion.py b/examples/chat_completion.py index 18ea48e..06e7664 100644 --- a/examples/chat_completion.py +++ b/examples/chat_completion.py @@ -10,43 +10,33 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=False, ) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py index 58b1974..b1f0fde 100644 --- a/examples/chat_completion_async.py +++ b/examples/chat_completion_async.py @@ -2,6 +2,7 @@ from groq import AsyncGroq + async def main(): client = AsyncGroq() @@ -13,43 +14,33 @@ async def main(): # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become # deterministic and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=False, ) @@ -57,4 +48,5 @@ async def main(): # Print the completion returned by the LLM. print(chat_completion.choices[0].message.content) + asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py index 1e71f73..594d61a 100644 --- a/examples/chat_completion_async_streaming.py +++ b/examples/chat_completion_async_streaming.py @@ -2,6 +2,7 @@ from groq import AsyncGroq + async def main(): client = AsyncGroq() @@ -13,39 +14,30 @@ async def main(): # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become # deterministic and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. stream=True, @@ -55,4 +47,5 @@ async def main(): async for chunk in stream: print(chunk.choices[0].delta.content, end="") + asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py index 1c749bc..d68a90d 100644 --- a/examples/chat_completion_stop.py +++ b/examples/chat_completion_stop.py @@ -10,37 +10,28 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", - "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", - } + "content": 'Count to 10. Your response must begin with "1, ". example: 1, 2, 3, ...', + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and @@ -49,7 +40,6 @@ # If multiple stop values are needed, an array of string may be passed, # stop=[", 6", ", six", ", Six"] stop=", 6", - # If set, partial message deltas will be sent. stream=False, ) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py index dd3edbc..5bae3a7 100644 --- a/examples/chat_completion_streaming.py +++ b/examples/chat_completion_streaming.py @@ -10,43 +10,33 @@ # Set an optional system message. This sets the behavior of the # assistant and can be used to provide specific instructions for # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, + {"role": "system", "content": "you are a helpful assistant."}, # Set a user message for the assistant to respond to. { "role": "user", "content": "Explain the importance of low latency LLMs", - } + }, ], - # The language model which will generate the completion. model="mixtral-8x7b-32768", - # # Optional parameters # - # Controls randomness: lowering results in less random completions. # As the temperature approaches zero, the model will become deterministic # and repetitive. temperature=0.5, - # The maximum number of tokens to generate. Requests can use up to # 2048 tokens shared between prompt and completion. max_tokens=1024, - # Controls diversity via nucleus sampling: 0.5 means half of all # likelihood-weighted options are considered. top_p=1, - # A stop sequence is a predefined or user-specified text string that # signals an AI to stop generating content, ensuring its responses # remain focused and concise. Examples include punctuation marks and # markers like "[end]". stop=None, - # If set, partial message deltas will be sent. stream=True, ) diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index 2769874..ac0ea8a 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: - if sse.data.startswith("[DONE]"): - break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): - if sse.data.startswith("[DONE]"): - break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 2661803..f1c5a02 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,11 +2,10 @@ from __future__ import annotations -from typing import Dict, List, Literal, Union, Iterable, Optional, overload +from typing import Dict, List, Union, Iterable, Optional import httpx -from ...lib.chat_completion_chunk import ChatCompletionChunk from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven from ..._utils import maybe_transform from ..._compat import cached_property @@ -17,7 +16,6 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from ..._streaming import AsyncStream, Stream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, @@ -35,22 +33,21 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) - @overload def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -64,98 +61,6 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[ChatCompletionChunk]: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: - ... - - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -175,12 +80,12 @@ def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, @@ -200,8 +105,6 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=Stream[ChatCompletionChunk], ) @@ -214,22 +117,21 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) - @overload async def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -243,98 +145,6 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[ChatCompletionChunk]: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - ... - - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -354,12 +164,12 @@ async def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, @@ -379,8 +189,6 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py index 77365e4..1ff1f5d 100644 --- a/src/groq/types/chat/chat_completion.py +++ b/src/groq/types/chat/chat_completion.py @@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel): class ChoiceMessage(BaseModel): - content: Optional[str] = None + content: str - role: Optional[str] = None + role: str tool_calls: Optional[List[ChoiceMessageToolCall]] = None class Choice(BaseModel): - finish_reason: Optional[str] = None + finish_reason: str - index: Optional[int] = None + index: int - logprobs: Optional[ChoiceLogprobs] = None + logprobs: ChoiceLogprobs - message: Optional[ChoiceMessage] = None + message: ChoiceMessage class Usage(BaseModel): @@ -86,9 +86,9 @@ class Usage(BaseModel): class ChatCompletion(BaseModel): - id: Optional[str] = None + choices: List[Choice] - choices: Optional[List[Choice]] = None + id: Optional[str] = None created: Optional[int] = None diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py index 1aea783..0f9712b 100644 --- a/src/groq/types/chat/completion_create_params.py +++ b/src/groq/types/chat/completion_create_params.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Dict, List, Union, Iterable, Optional -from typing_extensions import Annotated, TypedDict +from typing_extensions import Required, Annotated, TypedDict from ..._utils import PropertyInfo @@ -22,6 +22,10 @@ class CompletionCreateParams(TypedDict, total=False): + messages: Required[Iterable[Message]] + + model: Required[str] + frequency_penalty: float logit_bias: Dict[str, int] @@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False): max_tokens: int - messages: Iterable[Message] - - model: str - n: int presence_penalty: float @@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False): class Message(TypedDict, total=False): - content: str + content: Required[str] - name: str + role: Required[str] - role: str + name: str tool_call_id: str """ToolMessage Fields""" diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 1eabfe7..1fdfc34 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -19,16 +19,28 @@ class TestCompletions: @parametrize def test_method_create(self, client: Groq) -> None: - completion = client.chat.completions.create() + completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize def test_method_create_with_all_params(self, client: Groq) -> None: completion = client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None: }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None: @parametrize def test_raw_response_create(self, client: Groq) -> None: - response = client.chat.completions.with_raw_response.create() + response = client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None: @parametrize def test_streaming_response_create(self, client: Groq) -> None: - with client.chat.completions.with_streaming_response.create() as response: + with client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -200,16 +248,28 @@ class TestAsyncCompletions: @parametrize async def test_method_create(self, async_client: AsyncGroq) -> None: - completion = await async_client.chat.completions.create() + completion = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None: completion = await async_client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N @parametrize async def test_raw_response_create(self, async_client: AsyncGroq) -> None: - response = await async_client.chat.completions.with_raw_response.create() + response = await async_client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None: @parametrize async def test_streaming_response_create(self, async_client: AsyncGroq) -> None: - async with async_client.chat.completions.with_streaming_response.create() as response: + async with async_client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python"