From af9a838e240bb0f7385bc33fb18ce246427ca2f7 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:04:59 +0000
Subject: [PATCH] chore: go live (#5)

---
 .github/workflows/ci.yml                      |   4 +-
 README.md                                     |   4 +-
 bin/check-release-environment                 |   4 +-
 bin/check-test-server                         |   0
 bin/test                                      |   0
 examples/chat_completion.py                   |  14 +-
 examples/chat_completion_async.py             |  16 +-
 examples/chat_completion_async_streaming.py   |  15 +-
 examples/chat_completion_stop.py              |  16 +-
 examples/chat_completion_streaming.py         |  14 +-
 src/groq/_streaming.py                        |   4 -
 src/groq/resources/chat/completions.py        | 214 +-----------------
 src/groq/types/chat/chat_completion.py        |  16 +-
 .../types/chat/completion_create_params.py    |  16 +-
 tests/api_resources/chat/test_completions.py  | 124 ++++++++--
 15 files changed, 158 insertions(+), 303 deletions(-)
 mode change 100644 => 100755 bin/check-test-server
 mode change 100644 => 100755 bin/test

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ebfa5ce..aea1868 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - stainless
   pull_request:
     branches:
-      - main
+      - stainless
 
 jobs:
   lint:
diff --git a/README.md b/README.md
index 0c14ce0..5c0abdd 100644
--- a/README.md
+++ b/README.md
@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 29306d8..e35a371 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do
diff --git a/bin/check-test-server b/bin/check-test-server
old mode 100644
new mode 100755
diff --git a/bin/test b/bin/test
old mode 100644
new mode 100755
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
index 18ea48e..06e7664 100644
--- a/examples/chat_completion.py
+++ b/examples/chat_completion.py
@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=False,
 )
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
index 58b1974..b1f0fde 100644
--- a/examples/chat_completion_async.py
+++ b/examples/chat_completion_async.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,43 +14,33 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         top_p=1,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # If set, partial message deltas will be sent.
         stream=False,
     )
@@ -57,4 +48,5 @@ async def main():
     # Print the completion returned by the LLM.
     print(chat_completion.choices[0].message.content)
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
index 1e71f73..594d61a 100644
--- a/examples/chat_completion_async_streaming.py
+++ b/examples/chat_completion_async_streaming.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,39 +14,30 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         stream=True,
@@ -55,4 +47,5 @@ async def main():
     async for chunk in stream:
         print(chunk.choices[0].delta.content, end="")
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
index 1c749bc..d68a90d 100644
--- a/examples/chat_completion_stop.py
+++ b/examples/chat_completion_stop.py
@@ -10,37 +10,28 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
-            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
-        }
+            "content": 'Count to 10.  Your response must begin with "1, ".  example: 1, 2, 3, ...',
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
@@ -49,7 +40,6 @@
     # If multiple stop values are needed, an array of string may be passed,
     # stop=[", 6", ", six", ", Six"]
     stop=", 6",
-
     # If set, partial message deltas will be sent.
     stream=False,
 )
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
index dd3edbc..5bae3a7 100644
--- a/examples/chat_completion_streaming.py
+++ b/examples/chat_completion_streaming.py
@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=True,
 )
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index 2769874..ac0ea8a 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
-            if sse.data.startswith("[DONE]"):
-                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 2661803..f1c5a02 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Literal, Union, Iterable, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional
 
 import httpx
 
-from ...lib.chat_completion_chunk import ChatCompletionChunk
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import maybe_transform
 from ..._compat import cached_property
@@ -17,7 +16,6 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import AsyncStream, Stream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
@@ -35,22 +33,21 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
-    @overload
     def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -64,98 +61,6 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        ...
-
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -175,12 +80,12 @@ def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
@@ -200,8 +105,6 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -214,22 +117,21 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
-    @overload
     async def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -243,98 +145,6 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        ...
-
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -354,12 +164,12 @@ async def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
@@ -379,8 +189,6 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py
index 77365e4..1ff1f5d 100644
--- a/src/groq/types/chat/chat_completion.py
+++ b/src/groq/types/chat/chat_completion.py
@@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel):
 
 
 class ChoiceMessage(BaseModel):
-    content: Optional[str] = None
+    content: str
 
-    role: Optional[str] = None
+    role: str
 
     tool_calls: Optional[List[ChoiceMessageToolCall]] = None
 
 
 class Choice(BaseModel):
-    finish_reason: Optional[str] = None
+    finish_reason: str
 
-    index: Optional[int] = None
+    index: int
 
-    logprobs: Optional[ChoiceLogprobs] = None
+    logprobs: ChoiceLogprobs
 
-    message: Optional[ChoiceMessage] = None
+    message: ChoiceMessage
 
 
 class Usage(BaseModel):
@@ -86,9 +86,9 @@ class Usage(BaseModel):
 
 
 class ChatCompletion(BaseModel):
-    id: Optional[str] = None
+    choices: List[Choice]
 
-    choices: Optional[List[Choice]] = None
+    id: Optional[str] = None
 
     created: Optional[int] = None
 
diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py
index 1aea783..0f9712b 100644
--- a/src/groq/types/chat/completion_create_params.py
+++ b/src/groq/types/chat/completion_create_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import Required, Annotated, TypedDict
 
 from ..._utils import PropertyInfo
 
@@ -22,6 +22,10 @@
 
 
 class CompletionCreateParams(TypedDict, total=False):
+    messages: Required[Iterable[Message]]
+
+    model: Required[str]
+
     frequency_penalty: float
 
     logit_bias: Dict[str, int]
@@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False):
 
     max_tokens: int
 
-    messages: Iterable[Message]
-
-    model: str
-
     n: int
 
     presence_penalty: float
@@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False):
 
 
 class Message(TypedDict, total=False):
-    content: str
+    content: Required[str]
 
-    name: str
+    role: Required[str]
 
-    role: str
+    name: str
 
     tool_call_id: str
     """ToolMessage Fields"""
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 1eabfe7..1fdfc34 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -19,16 +19,28 @@ class TestCompletions:
 
     @parametrize
     def test_method_create(self, client: Groq) -> None:
-        completion = client.chat.completions.create()
+        completion = client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: Groq) -> None:
         completion = client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: Groq) -> None:
-        response = client.chat.completions.with_raw_response.create()
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: Groq) -> None:
-        with client.chat.completions.with_streaming_response.create() as response:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -200,16 +248,28 @@ class TestAsyncCompletions:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncGroq) -> None:
-        completion = await async_client.chat.completions.create()
+        completion = await async_client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None:
         completion = await async_client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
-        response = await async_client.chat.completions.with_raw_response.create()
+        response = await async_client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncGroq) -> None:
-        async with async_client.chat.completions.with_streaming_response.create() as response:
+        async with async_client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"