From 431f296a40958e3cff08739933c08ccc62ade995 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:19:35 +0200 Subject: [PATCH 01/63] Update `TelemetryClient` to use `huggingface_hub.utils` --- .../src/argilla_server/constants.py | 1 - .../src/argilla_server/telemetry.py | 108 ++++++++---------- 2 files changed, 49 insertions(+), 60 deletions(-) diff --git a/argilla-server/src/argilla_server/constants.py b/argilla-server/src/argilla_server/constants.py index 6bda197c27..64eca87f2a 100644 --- a/argilla-server/src/argilla_server/constants.py +++ b/argilla-server/src/argilla_server/constants.py @@ -23,7 +23,6 @@ DEFAULT_API_KEY = "argilla.apikey" DEFAULT_MAX_KEYWORD_LENGTH = 128 -DEFAULT_TELEMETRY_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" # Questions settings defaults DEFAULT_LABEL_SELECTION_OPTIONS_MAX_ITEMS = 500 diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 2be0ebaa2b..24a1e06232 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -17,26 +17,20 @@ import logging import platform import uuid -from typing import Any, Dict, Optional +from typing import Optional from fastapi import Request +from huggingface_hub.utils import send_telemetry +from argilla_server._version import __version__ from argilla_server.constants import DEFAULT_USERNAME -from argilla_server.models import User +from argilla_server.models import User, Workspace from argilla_server.settings import settings from argilla_server.utils._telemetry import ( is_running_on_docker_container, server_deployment_type, ) -try: - from analytics import Client # This import works only for version 2.2.0 -except (ImportError, ModuleNotFoundError): - # TODO: show some warning info - settings.enable_telemetry = False - Client = None - - _LOGGER = logging.getLogger(__name__) @@ -44,8 +38,6 @@ class TelemetryClient: enable_telemetry: dataclasses.InitVar[bool] = settings.enable_telemetry disable_send: dataclasses.InitVar[bool] = False - api_key: dataclasses.InitVar[str] = settings.telemetry_key - host: dataclasses.InitVar[str] = "https://api.segment.io" _server_id: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) @@ -53,11 +45,10 @@ class TelemetryClient: def server_id(self) -> uuid.UUID: return self._server_id - def __post_init__(self, enable_telemetry: bool, disable_send: bool, api_key: str, host: str): - from argilla_server._version import __version__ - + def __post_init__(self): self._server_id = uuid.UUID(int=uuid.getnode()) self._system_info = { + "server_id": self._server_id, "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), @@ -65,66 +56,65 @@ def __post_init__(self, enable_telemetry: bool, disable_send: bool, api_key: str "sys_version": platform.version(), "deployment": server_deployment_type(), "docker": is_running_on_docker_container(), - "version": __version__, } _LOGGER.info("System Info:") _LOGGER.info(f"Server id: {self.server_id}") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - self.client: Optional[Client] = None - if enable_telemetry: - try: - client = Client(write_key=api_key, gzip=True, host=host, send=not disable_send, max_retries=10) - client.identify(user_id=str(self._server_id), traits=self._system_info) - - self.client = client - except Exception as err: - _LOGGER.warning(f"Cannot initialize telemetry. Error: {err}. Disabling...") - - def track_data(self, action: str, data: Dict[str, Any], include_system_info: bool = True): - if not self.client: - return + def track_data( + self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1, type: str = None + ): + library_name = "argilla" + topic = f"{library_name}/{topic}" - event_data = data.copy() - - context = {} if include_system_info: - context = self._system_info.copy() + user_agent.update(self._system_info) + user_agent["count"] = count - self.client.track(user_id=str(self._server_id), event=action, properties=event_data, context=context) + send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) + @staticmethod + def _process_request_info(request: Request): + return {header: request.headers.get(header) for header in ["user-agent", "accept-language"]} -_CLIENT = TelemetryClient() + @staticmethod + def _process_workspace_model(workspace: Workspace): + return { + "workspace_id": str(workspace.id), + "workspace": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=workspace.name)), + } + @staticmethod + def _process_user_model(user: User): + return { + "user_id": str(user.id), + "role": user.role, + "is_default_user": user.username == DEFAULT_USERNAME, + "user_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=user.username)), + } -def _process_request_info(request: Request): - return {header: request.headers.get(header) for header in ["user-agent", "accept-language"]} + async def track_user_login(self, request: Request, user: User): + topic = "user/login" + user_agent = self._process_user_model(user=user) + user_agent.update(**self._process_request_info(request)) + self.track_data(topic=topic, user_agent=user_agent) + async def track_crud_user(self, action: str, user: User, is_oauth: bool = None): + topic = f"user/{action}" + user_agent = self._process_user_model(user=user) + if is_oauth is not None: + user_agent["is_oauth"] = is_oauth + self.track_data(topic=topic, user_agent=user_agent) -async def track_login(request: Request, user: User): - _CLIENT.track_data( - action="UserInfoRequested", - data={ - "is_default_user": user.username == DEFAULT_USERNAME, - "user_id": str(user.id), - "user_hash": str(uuid.uuid5(namespace=_CLIENT.server_id, name=user.username)), - **_process_request_info(request), - }, - ) + async def track_crud_workspace(self, action: str, workspace: Workspace): + topic: str = f"workspace/{action}" + user_agent = self._process_workspace_model(workspace) + self.track_data(topic=topic, user_agent=user_agent) -def track_user_created(user: User, is_oauth: bool = False): - _CLIENT.track_data( - action="UserCreated", - data={ - "user_id": str(user.id), - "role": user.role, - "is_default_user": user.username == DEFAULT_USERNAME, - "is_oauth": is_oauth, - }, - ) +def get_telemetry_client() -> TelemetryClient: + return _TELEMETRY_CLIENT -def get_telemetry_client() -> TelemetryClient: - return _CLIENT +_TELEMETRY_CLIENT = TelemetryClient() From 2d4aef41f24190789a666abf8b6efb76facbb18e Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:19:55 +0200 Subject: [PATCH 02/63] Update `user``CRUD` telemetry tracking --- .../argilla_server/api/handlers/v1/oauth2.py | 4 ++-- .../argilla_server/api/handlers/v1/users.py | 22 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py index 96f5912492..d2db7cd6f4 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py @@ -16,7 +16,6 @@ from fastapi.responses import RedirectResponse from sqlalchemy.ext.asyncio import AsyncSession -from argilla_server import telemetry from argilla_server.api.schemas.v1.oauth2 import Provider, Providers, Token from argilla_server.contexts import accounts from argilla_server.database import get_async_db @@ -27,6 +26,7 @@ from argilla_server.security.authentication.oauth2 import OAuth2ClientProvider from argilla_server.security.authentication.userinfo import UserInfo from argilla_server.security.settings import settings +from argilla_server.telemetry import _TELEMETRY_CLIENT router = APIRouter(prefix="/oauth2", tags=["Authentication"]) @@ -74,7 +74,7 @@ async def get_access_token( role=_USER_ROLE_ON_CREATION, workspaces=[workspace.name for workspace in settings.oauth.allowed_workspaces], ) - telemetry.track_user_created(user, is_oauth=True) + _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=True) elif not _is_user_created_by_oauth_provider(user): # User should sign in using username/password workflow raise AuthenticationError("Could not authenticate user") diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 98d4ffcd3d..32e4a90f72 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -17,7 +17,6 @@ from fastapi import APIRouter, Depends, Request, Security, status from sqlalchemy.ext.asyncio import AsyncSession -from argilla_server import telemetry from argilla_server.api.policies.v1 import UserPolicy, authorize from argilla_server.api.schemas.v1.users import User as UserSchema from argilla_server.api.schemas.v1.users import UserCreate, Users @@ -26,13 +25,14 @@ from argilla_server.database import get_async_db from argilla_server.models import User from argilla_server.security import auth +from argilla_server.telemetry import _TELEMETRY_CLIENT router = APIRouter(tags=["users"]) @router.get("/me", response_model=UserSchema) async def get_current_user(request: Request, current_user: User = Security(auth.get_current_user)): - await telemetry.track_login(request, current_user) + await _TELEMETRY_CLIENT.track_user_login(request, current_user) return current_user @@ -45,8 +45,9 @@ async def get_user( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.get) - - return await User.get_or_raise(db, user_id) + user = await User.get_or_raise(db, user_id) + await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) + return user @router.get("/users", response_model=Users) @@ -56,9 +57,9 @@ async def list_users( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.list) - users = await accounts.list_users(db) - + for user in users: + await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) return Users(items=users) @@ -70,10 +71,8 @@ async def create_user( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.create) - user = await accounts.create_user(db, user_create.dict()) - - telemetry.track_user_created(user) + await _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=False) return user @@ -86,10 +85,11 @@ async def delete_user( current_user: User = Security(auth.get_current_user), ): user = await User.get_or_raise(db, user_id) - await authorize(current_user, UserPolicy.delete) + user = await accounts.delete_user(db, user) + await _TELEMETRY_CLIENT.track_crud_user(action="delete", user=user) - return await accounts.delete_user(db, user) + return user @router.get("/users/{user_id}/workspaces", response_model=Workspaces) From 71cf614ba3848f81435884e6c86dd1b3b9d45656 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:20:11 +0200 Subject: [PATCH 03/63] Update `workspace` CRUD telemetry tracking --- .../api/handlers/v1/workspaces.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index 1636998ea4..34743f189f 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -33,6 +33,7 @@ from argilla_server.errors.future import NotFoundError, UnprocessableEntityError from argilla_server.models import User, Workspace, WorkspaceUser from argilla_server.security import auth +from argilla_server.telemetry import _TELEMETRY_CLIENT router = APIRouter(tags=["workspaces"]) @@ -46,7 +47,11 @@ async def get_workspace( ): await authorize(current_user, WorkspacePolicy.get(workspace_id)) - return await Workspace.get_or_raise(db, workspace_id) + workspace = await Workspace.get_or_raise(db, workspace_id) + + _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) + + return workspace @router.post("/workspaces", status_code=status.HTTP_201_CREATED, response_model=WorkspaceSchema) @@ -58,7 +63,11 @@ async def create_workspace( ): await authorize(current_user, WorkspacePolicy.create) - return await accounts.create_workspace(db, workspace_create.dict()) + workspace = await accounts.create_workspace(db, workspace_create.dict()) + + _TELEMETRY_CLIENT.track_crud_workspace(action="create", workspace=workspace) + + return workspace @router.delete("/workspaces/{workspace_id}", response_model=WorkspaceSchema) @@ -72,7 +81,11 @@ async def delete_workspace( workspace = await Workspace.get_or_raise(db, workspace_id) - return await accounts.delete_workspace(db, workspace) + workspace = await accounts.delete_workspace(db, workspace) + + _TELEMETRY_CLIENT.track_crud_workspace(action="delete", workspace=workspace) + + return workspace @router.get("/me/workspaces", response_model=Workspaces) @@ -88,6 +101,9 @@ async def list_workspaces_me( else: workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) + for workspace in workspaces.items: + _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) + return Workspaces(items=workspaces) From e1924e4f4ec79e47e6c74e81683b2302b701ecf6 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:22:03 +0200 Subject: [PATCH 04/63] Update `workspace` telemetry from `list_user_workspaces` method --- .../src/argilla_server/api/handlers/v1/users.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 32e4a90f72..7302457d1b 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -45,8 +45,11 @@ async def get_user( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.get) + user = await User.get_or_raise(db, user_id) + await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) + return user @@ -57,9 +60,12 @@ async def list_users( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.list) + users = await accounts.list_users(db) + for user in users: await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) + return Users(items=users) @@ -71,7 +77,9 @@ async def create_user( current_user: User = Security(auth.get_current_user), ): await authorize(current_user, UserPolicy.create) + user = await accounts.create_user(db, user_create.dict()) + await _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=False) return user @@ -85,8 +93,11 @@ async def delete_user( current_user: User = Security(auth.get_current_user), ): user = await User.get_or_raise(db, user_id) + await authorize(current_user, UserPolicy.delete) + user = await accounts.delete_user(db, user) + await _TELEMETRY_CLIENT.track_crud_user(action="delete", user=user) return user @@ -108,4 +119,7 @@ async def list_user_workspaces( else: workspaces = await accounts.list_workspaces_by_user_id(db, user_id) + for workspace in workspaces: + await _TELEMETRY_CLIENT.track_crud_workspace(action="read", user=user) + return Workspaces(items=workspaces) From 11a1f8533f76226f891776e48ee9c079de0a800c Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:25:40 +0200 Subject: [PATCH 05/63] Fix arguments passed to `track_crud_workspace` in `list_user_workspaces` --- argilla-server/src/argilla_server/api/handlers/v1/users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 7302457d1b..1973ca3cd2 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -120,6 +120,6 @@ async def list_user_workspaces( workspaces = await accounts.list_workspaces_by_user_id(db, user_id) for workspace in workspaces: - await _TELEMETRY_CLIENT.track_crud_workspace(action="read", user=user) + await _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) return Workspaces(items=workspaces) From 9adfc164d2623c68e94795d0f03e0e9587d37746 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 14:29:17 +0200 Subject: [PATCH 06/63] Fix `await` to `telemetry` call --- .../src/argilla_server/api/handlers/v1/oauth2.py | 2 +- argilla-server/src/argilla_server/telemetry.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py index d2db7cd6f4..4f04a8882d 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py @@ -74,7 +74,7 @@ async def get_access_token( role=_USER_ROLE_ON_CREATION, workspaces=[workspace.name for workspace in settings.oauth.allowed_workspaces], ) - _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=True) + await _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=True) elif not _is_user_created_by_oauth_provider(user): # User should sign in using username/password workflow raise AuthenticationError("Could not authenticate user") diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 24a1e06232..c1cffc6b73 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -17,7 +17,7 @@ import logging import platform import uuid -from typing import Optional +from typing import Optional, Union from fastapi import Request from huggingface_hub.utils import send_telemetry @@ -62,9 +62,7 @@ def __post_init__(self): _LOGGER.info(f"Server id: {self.server_id}") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - def track_data( - self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1, type: str = None - ): + def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): library_name = "argilla" topic = f"{library_name}/{topic}" @@ -100,7 +98,7 @@ async def track_user_login(self, request: Request, user: User): user_agent.update(**self._process_request_info(request)) self.track_data(topic=topic, user_agent=user_agent) - async def track_crud_user(self, action: str, user: User, is_oauth: bool = None): + async def track_crud_user(self, action: str, user: User, is_oauth: Union[bool, None] = None): topic = f"user/{action}" user_agent = self._process_user_model(user=user) if is_oauth is not None: From 0109e44c781da9dc699d1a89221c7d352914f7eb Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 12 Jul 2024 16:40:38 +0200 Subject: [PATCH 07/63] Update `telemetry_client: TelemetryClient = Depends(get_telemetry_client),` instead of direct `_TELEMETRY_CLIENT` import --- .../argilla_server/api/handlers/v1/oauth2.py | 5 ++-- .../argilla_server/api/handlers/v1/users.py | 25 +++++++++++++------ .../api/handlers/v1/workspaces.py | 14 +++++++---- argilla-server/src/argilla_server/settings.py | 2 -- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py index 4f04a8882d..0f799c115d 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py @@ -26,7 +26,7 @@ from argilla_server.security.authentication.oauth2 import OAuth2ClientProvider from argilla_server.security.authentication.userinfo import UserInfo from argilla_server.security.settings import settings -from argilla_server.telemetry import _TELEMETRY_CLIENT +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(prefix="/oauth2", tags=["Authentication"]) @@ -55,6 +55,7 @@ async def get_access_token( request: Request, provider: str, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ) -> Token: _check_oauth_enabled_or_raise() @@ -74,7 +75,7 @@ async def get_access_token( role=_USER_ROLE_ON_CREATION, workspaces=[workspace.name for workspace in settings.oauth.allowed_workspaces], ) - await _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=True) + await telemetry_client.track_crud_user(action="create", user=user, is_oauth=True) elif not _is_user_created_by_oauth_provider(user): # User should sign in using username/password workflow raise AuthenticationError("Could not authenticate user") diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 1973ca3cd2..537c85e50a 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -25,14 +25,18 @@ from argilla_server.database import get_async_db from argilla_server.models import User from argilla_server.security import auth -from argilla_server.telemetry import _TELEMETRY_CLIENT +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["users"]) @router.get("/me", response_model=UserSchema) -async def get_current_user(request: Request, current_user: User = Security(auth.get_current_user)): - await _TELEMETRY_CLIENT.track_user_login(request, current_user) +async def get_current_user( + request: Request, + current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), +): + await telemetry_client.track_user_login(request, current_user) return current_user @@ -43,12 +47,13 @@ async def get_user( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.get) user = await User.get_or_raise(db, user_id) - await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) + await telemetry_client.track_crud_user(action="read", user=user) return user @@ -58,13 +63,14 @@ async def list_users( *, db: AsyncSession = Depends(get_async_db), current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.list) users = await accounts.list_users(db) for user in users: - await _TELEMETRY_CLIENT.track_crud_user(action="read", user=user) + await telemetry_client.track_crud_user(action="read", user=user) return Users(items=users) @@ -75,12 +81,13 @@ async def create_user( db: AsyncSession = Depends(get_async_db), user_create: UserCreate, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.create) user = await accounts.create_user(db, user_create.dict()) - await _TELEMETRY_CLIENT.track_crud_user(action="create", user=user, is_oauth=False) + await telemetry_client.track_crud_user(action="create", user=user, is_oauth=False) return user @@ -91,6 +98,7 @@ async def delete_user( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): user = await User.get_or_raise(db, user_id) @@ -98,7 +106,7 @@ async def delete_user( user = await accounts.delete_user(db, user) - await _TELEMETRY_CLIENT.track_crud_user(action="delete", user=user) + await telemetry_client.track_crud_user(action="delete", user=user) return user @@ -109,6 +117,7 @@ async def list_user_workspaces( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.list_workspaces) @@ -120,6 +129,6 @@ async def list_user_workspaces( workspaces = await accounts.list_workspaces_by_user_id(db, user_id) for workspace in workspaces: - await _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="read", workspace=workspace) return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index 34743f189f..aadae57643 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -33,7 +33,7 @@ from argilla_server.errors.future import NotFoundError, UnprocessableEntityError from argilla_server.models import User, Workspace, WorkspaceUser from argilla_server.security import auth -from argilla_server.telemetry import _TELEMETRY_CLIENT +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["workspaces"]) @@ -44,12 +44,13 @@ async def get_workspace( db: AsyncSession = Depends(get_async_db), workspace_id: UUID, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.get(workspace_id)) workspace = await Workspace.get_or_raise(db, workspace_id) - _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) + telemetry_client.track_crud_workspace(action="read", workspace=workspace) return workspace @@ -60,12 +61,13 @@ async def create_workspace( db: AsyncSession = Depends(get_async_db), workspace_create: WorkspaceCreate, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.create) workspace = await accounts.create_workspace(db, workspace_create.dict()) - _TELEMETRY_CLIENT.track_crud_workspace(action="create", workspace=workspace) + telemetry_client.track_crud_workspace(action="create", workspace=workspace) return workspace @@ -76,6 +78,7 @@ async def delete_workspace( db: AsyncSession = Depends(get_async_db), workspace_id: UUID, current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.delete) @@ -83,7 +86,7 @@ async def delete_workspace( workspace = await accounts.delete_workspace(db, workspace) - _TELEMETRY_CLIENT.track_crud_workspace(action="delete", workspace=workspace) + telemetry_client.track_crud_workspace(action="delete", workspace=workspace) return workspace @@ -93,6 +96,7 @@ async def list_workspaces_me( *, db: AsyncSession = Depends(get_async_db), current_user: User = Security(auth.get_current_user), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ) -> Workspaces: await authorize(current_user, WorkspacePolicy.list_workspaces_me) @@ -102,7 +106,7 @@ async def list_workspaces_me( workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) for workspace in workspaces.items: - _TELEMETRY_CLIENT.track_crud_workspace(action="read", workspace=workspace) + telemetry_client.track_crud_workspace(action="read", workspace=workspace) return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index d0900896fa..4bcb142d37 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -29,7 +29,6 @@ DEFAULT_LABEL_SELECTION_OPTIONS_MAX_ITEMS, DEFAULT_MAX_KEYWORD_LENGTH, DEFAULT_SPAN_OPTIONS_MAX_ITEMS, - DEFAULT_TELEMETRY_KEY, SEARCH_ENGINE_ELASTICSEARCH, SEARCH_ENGINE_OPENSEARCH, ) @@ -138,7 +137,6 @@ class Settings(BaseSettings): # See also the telemetry.py module enable_telemetry: bool = True - telemetry_key: str = DEFAULT_TELEMETRY_KEY @validator("home_path", always=True) def set_home_path_default(cls, home_path: str): From 038d8f622749d0887ad3e0834f8be2cd23cd6cbf Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:35:30 +0200 Subject: [PATCH 08/63] Add telemetry methods, dataset, workspace, user, settings, records, record_subtopic --- .../src/argilla_server/telemetry.py | 131 +++++++++++++++--- 1 file changed, 112 insertions(+), 19 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index c1cffc6b73..d57c092a00 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -17,14 +17,14 @@ import logging import platform import uuid -from typing import Optional, Union +from typing import Any, Optional, Union from fastapi import Request from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ from argilla_server.constants import DEFAULT_USERNAME -from argilla_server.models import User, Workspace +from argilla_server.models import Dataset, MetadataProperty, Record, Response, Suggestion, User, Vector, Workspace from argilla_server.settings import settings from argilla_server.utils._telemetry import ( is_running_on_docker_container, @@ -62,16 +62,6 @@ def __post_init__(self): _LOGGER.info(f"Server id: {self.server_id}") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): - library_name = "argilla" - topic = f"{library_name}/{topic}" - - if include_system_info: - user_agent.update(self._system_info) - user_agent["count"] = count - - send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) - @staticmethod def _process_request_info(request: Request): return {header: request.headers.get(header) for header in ["user-agent", "accept-language"]} @@ -80,9 +70,38 @@ def _process_request_info(request: Request): def _process_workspace_model(workspace: Workspace): return { "workspace_id": str(workspace.id), - "workspace": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=workspace.name)), + "workspace_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=workspace.name)), } + @staticmethod + def _process_dataset_model(dataset: Dataset): + return { + "dataset_id": str(dataset.id), + "dataset_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=dataset.name)), + } + + @staticmethod + def _process_record_model(record: Record): + return { + "dataset_id": str(record.dataset.id), + "dataset_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=record.dataset.name)), + } + + @staticmethod + def _process_dataset_settings(dataset: Dataset): + return { + "count_fields": len(dataset.fields), + "count_questions": len(dataset.questions), + "count_vector_settings": len(dataset.vectors_settings), + "count_metadata_properties": len(dataset.metadata_properties), + "allow_extra_metadata": dataset.allow_extra_metadata, + "guidelines": True if dataset.guidelines else False, + } + + @staticmethod + def _process_dataset_setting_settings(setting: Any): + return {} + @staticmethod def _process_user_model(user: User): return { @@ -92,23 +111,97 @@ def _process_user_model(user: User): "user_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=user.username)), } + def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): + library_name = "argilla" + topic = f"{library_name}/{topic}" + + if include_system_info: + user_agent.update(self._system_info) + if count is not None: + user_agent["count"] = count + + send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) + async def track_user_login(self, request: Request, user: User): topic = "user/login" user_agent = self._process_user_model(user=user) user_agent.update(**self._process_request_info(request)) self.track_data(topic=topic, user_agent=user_agent) - async def track_crud_user(self, action: str, user: User, is_oauth: Union[bool, None] = None): + async def track_crud_user( + self, + action: str, + user: Union[User, None] = None, + is_oauth: Union[bool, None] = None, + count: Union[int, None] = None, + ): topic = f"user/{action}" - user_agent = self._process_user_model(user=user) + if user: + user_agent = self._process_user_model(user=user) if is_oauth is not None: user_agent["is_oauth"] = is_oauth - self.track_data(topic=topic, user_agent=user_agent) + self.track_data(topic=topic, user_agent=user_agent, count=count) - async def track_crud_workspace(self, action: str, workspace: Workspace): + async def track_crud_workspace( + self, action: str, workspace: Union[Workspace, None] = None, count: Union[int, None] = None + ): topic: str = f"workspace/{action}" - user_agent = self._process_workspace_model(workspace) - self.track_data(topic=topic, user_agent=user_agent) + if workspace: + user_agent = self._process_workspace_model(workspace=workspace) + self.track_data(topic=topic, user_agent=user_agent, count=count) + + async def track_crud_dataset( + self, action: str, dataset: Union[Dataset, None] = None, count: Union[int, None] = None + ): + topic = f"dataset/{action}" + if dataset: + user_agent = self._process_dataset_model(dataset=dataset) + user_agent.update(self._process_dataset_settings(dataset=dataset)) + self.track_data(topic=topic, user_agent=user_agent, count=count) + + return None + for field in dataset.fields: + self.track_crud_dataset_settings(action=action, setting_name="fields", dataset=dataset, setting=field) + for question in dataset.questions: + self.track_crud_dataset_settings(action=action, setting_name="questions", dataset=dataset, setting=question) + for vector in dataset.vectors_settings: + self.track_crud_dataset_settings( + action=action, setting_name="vectors_settings", dataset=dataset, setting=vector + ) + for meta_data in dataset.metadata_properties: + self.track_crud_dataset_settings( + action=action, setting_name="metadata_properties", dataset=dataset, setting=meta_data + ) + + async def track_crud_dataset_setting( + self, + action: str, + setting_name: str, + dataset: Dataset, + setting: Union[Any, None] = None, + count: Union[int, None] = None, + ): + topic = f"dataset/{setting_name}/{setting.settings.type}/{action}" + user_agent = self._process_dataset_model(dataset=dataset) + if setting: + user_agent.update(self._process_dataset_setting_settings(setting=setting)) + self.track_data(topic=topic, user_agent=user_agent, count=count) + + async def track_crud_records(self, action: str, record: Union[Record, None] = None, count: Union[int, None] = None): + topic = f"dataset/records/{action}" + user_agent = self._process_record_model(record=record) + self.track_data(topic=topic, user_agent=user_agent, count=count) + + async def track_crud_records_subtopic( + self, + action: str, + sub_topic_name: str, + sub_topic: Union[Suggestion, Response, MetadataProperty, Vector], + count: Union[int, None] = None, + ): + topic = f"dataset/records/{sub_topic}/{action}" + user_agent = {"record_id": sub_topic.record_id} + self.track_data(topic=topic, user_agent=user_agent, count=count) def get_telemetry_client() -> TelemetryClient: From dbcebdcaf80808d7c3ee1716a9da42b5b8dfdf07 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:36:36 +0200 Subject: [PATCH 09/63] Add telemetry methods `fields` --- .../argilla_server/api/handlers/v1/fields.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/fields.py b/argilla-server/src/argilla_server/api/handlers/v1/fields.py index a62c7dbcde..ec17e77650 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/fields.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/fields.py @@ -25,6 +25,7 @@ from argilla_server.database import get_async_db from argilla_server.models import Field, User from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["fields"]) @@ -33,6 +34,7 @@ async def update_field( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), field_id: UUID, field_update: FieldUpdate, current_user: User = Security(auth.get_current_user), @@ -41,13 +43,20 @@ async def update_field( await authorize(current_user, FieldPolicy.update(field)) - return await datasets.update_field(db, field, field_update) + field = await datasets.update_field(db, field, field_update) + + await telemetry_client.track_crud_dataset_setting( + action="update", dataset=field.dataset, setting_name="fields", setting=field + ) + + return field @router.delete("/fields/{field_id}", response_model=FieldSchema) async def delete_field( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), field_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -55,4 +64,10 @@ async def delete_field( await authorize(current_user, FieldPolicy.delete(field)) - return await datasets.delete_field(db, field) + field = await datasets.delete_field(db, field) + + await telemetry_client.track_crud_dataset_setting( + action="delete", dataset=field.dataset, setting_name="fields", setting=field + ) + + return field From 5581837ace4f1661d5baead0166842ee643b58cd Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:37:03 +0200 Subject: [PATCH 10/63] Add telemetry methods `metadata_properties` --- .../api/handlers/v1/metadata_properties.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py index ae0392bfb0..88dbb88c29 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py @@ -31,6 +31,7 @@ from argilla_server.models import MetadataProperty, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["metadata properties"]) @@ -58,6 +59,7 @@ async def get_metadata_property_metrics( async def update_metadata_property( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), metadata_property_id: UUID, metadata_property_update: MetadataPropertyUpdate, current_user: User = Security(auth.get_current_user), @@ -70,13 +72,20 @@ async def update_metadata_property( await authorize(current_user, MetadataPropertyPolicy.update(metadata_property)) - return await datasets.update_metadata_property(db, metadata_property, metadata_property_update) + metadata_property = await datasets.update_metadata_property(db, metadata_property, metadata_property_update) + + await telemetry_client.track_crud_dataset_setting( + action="update", setting_name="metadata_properties", dataset=MetadataProperty.dataset, setting=metadata_property + ) + + return metadata_property @router.delete("/metadata-properties/{metadata_property_id}", response_model=MetadataPropertySchema) async def delete_metadata_property( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), metadata_property_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -88,4 +97,10 @@ async def delete_metadata_property( await authorize(current_user, MetadataPropertyPolicy.delete(metadata_property)) - return await datasets.delete_metadata_property(db, metadata_property) + metadata_property = await datasets.delete_metadata_property(db, metadata_property) + + await telemetry_client.track_crud_dataset_setting( + action="delete", setting_name="metadata_properties", dataset=MetadataProperty.dataset, setting=metadata_property + ) + + return metadata_property From 9d7316d0be591dc6a3a909fb3e863714b6816629 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:37:38 +0200 Subject: [PATCH 11/63] Add telemetry methods `questions` --- .../api/handlers/v1/questions.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/questions.py index 27d8cedf85..1b12919395 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/questions.py @@ -25,6 +25,7 @@ from argilla_server.database import get_async_db from argilla_server.models import Question, User from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["questions"]) @@ -33,6 +34,7 @@ async def update_question( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), question_id: UUID, question_update: QuestionUpdate, current_user: User = Security(auth.get_current_user), @@ -41,13 +43,20 @@ async def update_question( await authorize(current_user, QuestionPolicy.update(question)) - return await questions.update_question(db, question, question_update) + question = await questions.update_question(db, question, question_update) + + await telemetry_client.track_crud_dataset_setting( + action="update", dataset=question.dataset, setting_name="questions", setting=question + ) + + return question @router.delete("/questions/{question_id}", response_model=QuestionSchema) async def delete_question( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), question_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -55,4 +64,10 @@ async def delete_question( await authorize(current_user, QuestionPolicy.delete(question)) - return await questions.delete_question(db, question) + question = await questions.delete_question(db, question) + + await telemetry_client.track_crud_dataset_setting( + action="delete", dataset=question.dataset, setting_name="questions", setting=question + ) + + return question From 01d8af71d0dc91735bb0a9bb3639866a67400416 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:48:31 +0200 Subject: [PATCH 12/63] Add telemetry methods `records` --- .../argilla_server/api/handlers/v1/records.py | 44 +++++++++++++++++-- .../src/argilla_server/telemetry.py | 12 ++--- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index 3778921ee2..a6113c718f 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -31,6 +31,7 @@ from argilla_server.models import Dataset, Question, Record, Suggestion, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client from argilla_server.utils import parse_uuids DELETE_RECORD_SUGGESTIONS_LIMIT = 100 @@ -42,6 +43,7 @@ async def get_record( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -57,6 +59,8 @@ async def get_record( await authorize(current_user, RecordPolicy.get(record)) + telemetry_client.track_crud_records(action="read", record=record) + return record @@ -65,6 +69,7 @@ async def update_record( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, record_update: RecordUpdate, current_user: User = Security(auth.get_current_user), @@ -82,7 +87,11 @@ async def update_record( await authorize(current_user, RecordPolicy.update(record)) - return await datasets.update_record(db, search_engine, record, record_update) + record = await datasets.update_record(db, search_engine, record, record_update) + + telemetry_client.track_crud_records(action="update", record=record) + + return record @router.post("/records/{record_id}/responses", status_code=status.HTTP_201_CREATED, response_model=Response) @@ -90,6 +99,7 @@ async def create_record_response( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, response_create: ResponseCreate, current_user: User = Security(auth.get_current_user), @@ -105,13 +115,18 @@ async def create_record_response( await authorize(current_user, RecordPolicy.create_response(record)) - return await datasets.create_response(db, search_engine, record, current_user, response_create) + response = await datasets.create_response(db, search_engine, record, current_user, response_create) + + telemetry_client.track_crud_records_subtopic(action="create", sub_topic="responses", record_id=record_id) + + return response @router.get("/records/{record_id}/suggestions", status_code=status.HTTP_200_OK, response_model=Suggestions) async def get_record_suggestions( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -127,6 +142,10 @@ async def get_record_suggestions( await authorize(current_user, RecordPolicy.get_suggestions(record)) + telemetry_client.track_crud_records_subtopic( + topic="read", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + ) + return Suggestions(items=record.suggestions) @@ -144,6 +163,7 @@ async def upsert_suggestion( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, suggestion_create: SuggestionCreate, current_user: User = Security(auth.get_current_user), @@ -174,7 +194,13 @@ async def upsert_suggestion( if await Suggestion.get_by(db, record_id=record_id, question_id=suggestion_create.question_id): response.status_code = status.HTTP_200_OK - return await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) + suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) + + telemetry_client.track_crud_records_subtopic( + topic="update", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + ) + + return suggestion @router.delete( @@ -186,6 +212,7 @@ async def delete_record_suggestions( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ids: str = Query(..., description="A comma separated list with the IDs of the suggestions to be removed"), @@ -212,12 +239,17 @@ async def delete_record_suggestions( await datasets.delete_suggestions(db, search_engine, record, suggestion_ids) + telemetry_client.track_crud_records_subtopic( + topic="delete", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + ) + @router.delete("/records/{record_id}", response_model=RecordSchema, response_model_exclude_unset=True) async def delete_record( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -232,4 +264,8 @@ async def delete_record( await authorize(current_user, RecordPolicy.delete(record)) - return await datasets.delete_record(db, search_engine, record) + record = await datasets.delete_record(db, search_engine, record) + + telemetry_client.track_crud_records(topic="delete", record=record) + + return record diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index d57c092a00..e87879a2e1 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -24,7 +24,7 @@ from argilla_server._version import __version__ from argilla_server.constants import DEFAULT_USERNAME -from argilla_server.models import Dataset, MetadataProperty, Record, Response, Suggestion, User, Vector, Workspace +from argilla_server.models import Dataset, Record, User, Workspace from argilla_server.settings import settings from argilla_server.utils._telemetry import ( is_running_on_docker_container, @@ -83,8 +83,8 @@ def _process_dataset_model(dataset: Dataset): @staticmethod def _process_record_model(record: Record): return { - "dataset_id": str(record.dataset.id), - "dataset_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=record.dataset.name)), + "dataset_id": str(record.dataset_id), + "record_id": str(record.id), } @staticmethod @@ -195,12 +195,12 @@ async def track_crud_records(self, action: str, record: Union[Record, None] = No async def track_crud_records_subtopic( self, action: str, - sub_topic_name: str, - sub_topic: Union[Suggestion, Response, MetadataProperty, Vector], + sub_topic: str, + record_id: str, count: Union[int, None] = None, ): topic = f"dataset/records/{sub_topic}/{action}" - user_agent = {"record_id": sub_topic.record_id} + user_agent = {"record_id": record_id} self.track_data(topic=topic, user_agent=user_agent, count=count) From cea525c93de05dc5eb8763e1548996c72b7c7868 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:53:37 +0200 Subject: [PATCH 13/63] Add telemetry methods to `responses` --- .../api/handlers/v1/responses.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index 56cb695c95..0dfe957992 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -32,6 +32,7 @@ from argilla_server.models import Dataset, Record, Response, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client from argilla_server.use_cases.responses.upsert_responses_in_bulk import ( UpsertResponsesInBulkUseCase, UpsertResponsesInBulkUseCaseFactory, @@ -46,9 +47,15 @@ async def create_current_user_responses_bulk( body: ResponsesBulkCreate, current_user: User = Security(auth.get_current_user), use_case: UpsertResponsesInBulkUseCase = Depends(UpsertResponsesInBulkUseCaseFactory()), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): responses_bulk_items = await use_case.execute(body.items, user=current_user) + for response in responses_bulk_items: + telemetry_client.track_crud_records_subtopic( + topic="create", sub_topic="responses", record_id=response.record_id + ) + return ResponsesBulk(items=responses_bulk_items) @@ -57,6 +64,7 @@ async def update_response( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), response_id: UUID, response_update: ResponseUpdate, current_user: User = Security(auth.get_current_user), @@ -69,7 +77,11 @@ async def update_response( await authorize(current_user, ResponsePolicy.update(response)) - return await datasets.update_response(db, search_engine, response, response_update) + response = await datasets.update_response(db, search_engine, response, response_update) + + telemetry_client.track_crud_records_subtopic(topic="update", sub_topic="responses", record_id=response.record_id) + + return response @router.delete("/responses/{response_id}", response_model=ResponseSchema) @@ -77,6 +89,7 @@ async def delete_response( *, db: AsyncSession = Depends(get_async_db), search_engine=Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), response_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -88,4 +101,8 @@ async def delete_response( await authorize(current_user, ResponsePolicy.delete(response)) - return await datasets.delete_response(db, search_engine, response) + response = await datasets.delete_response(db, search_engine, response) + + telemetry_client.track_crud_records_subtopic(topic="delete", sub_topic="responses", record_id=response.record_id) + + return response From aa9c6ca9442f166b7cac1ee8a639ebcb4abf52b7 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 13:54:00 +0200 Subject: [PATCH 14/63] =?UTF-8?q?Add=20telemetry=20methods=20to=20=C3=B9se?= =?UTF-8?q?rs`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api/handlers/v1/datasets/datasets.py | 109 +++++++++++++++--- .../api/handlers/v1/datasets/questions.py | 24 +++- .../api/handlers/v1/datasets/records.py | 18 ++- .../api/handlers/v1/datasets/records_bulk.py | 6 +- .../argilla_server/api/handlers/v1/users.py | 6 +- .../api/handlers/v1/vectors_settings.py | 19 ++- .../api/handlers/v1/workspaces.py | 3 - .../argilla_server/errors/error_handler.py | 5 +- 8 files changed, 161 insertions(+), 29 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index 63f95391e1..707a7f96a3 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -71,6 +71,7 @@ async def _filter_metadata_properties_by_policy( async def list_current_user_datasets( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), workspace_id: Optional[UUID] = None, current_user: User = Security(auth.get_current_user), ): @@ -85,34 +86,66 @@ async def list_current_user_datasets( else: dataset_list = await datasets.list_datasets_by_workspace_id(db, workspace_id) + await telemetry_client.track_crud_dataset(action="list", count=len(dataset_list)) + for dataset in dataset_list: + await telemetry_client.track_crud_dataset(action="read", dataset=dataset) + return Datasets(items=dataset_list) @router.get("/datasets/{dataset_id}/fields", response_model=Fields) async def list_dataset_fields( - *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) + *, + db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, + current_user: User = Security(auth.get_current_user), ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.fields)]) await authorize(current_user, DatasetPolicy.get(dataset)) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) + ) + for field in dataset.fields: + await telemetry_client.track_crud_dataset_setting( + action="read", dataset=dataset, setting_name="fields", setting=field + ) + return Fields(items=dataset.fields) @router.get("/datasets/{dataset_id}/vectors-settings", response_model=VectorsSettings) async def list_dataset_vector_settings( - *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) + *, + db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, + current_user: User = Security(auth.get_current_user), ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.vectors_settings)]) await authorize(current_user, DatasetPolicy.get(dataset)) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) + ) + for vectors_setting in dataset.vectors_settings: + await telemetry_client.track_crud_dataset_setting( + action="read", dataset=dataset, setting_name="vectors_settings", setting=vectors_setting + ) + return VectorsSettings(items=dataset.vectors_settings) @router.get("/me/datasets/{dataset_id}/metadata-properties", response_model=MetadataProperties) async def list_current_user_dataset_metadata_properties( - *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) + *, + db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, + current_user: User = Security(auth.get_current_user), ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.metadata_properties)]) @@ -122,17 +155,31 @@ async def list_current_user_dataset_metadata_properties( current_user, dataset.metadata_properties ) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) + ) + for metadata_property in filtered_metadata_properties: + await telemetry_client.track_crud_dataset_setting( + action="read", dataset=dataset, setting_name="metadata_properties", setting=metadata_property + ) + return MetadataProperties(items=filtered_metadata_properties) @router.get("/datasets/{dataset_id}", response_model=DatasetSchema) async def get_dataset( - *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) + *, + db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, + current_user: User = Security(auth.get_current_user), ): dataset = await Dataset.get_or_raise(db, dataset_id) await authorize(current_user, DatasetPolicy.get(dataset)) + await telemetry_client.track_crud_dataset(action="read", dataset=dataset) + return dataset @@ -184,18 +231,25 @@ async def get_dataset_progress( async def create_dataset( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, dataset_create: DatasetCreate, current_user: User = Security(auth.get_current_user), ): await authorize(current_user, DatasetPolicy.create(dataset_create.workspace_id)) - return await datasets.create_dataset(db, dataset_create) + dataset = await datasets.create_dataset(db, dataset_create) + + await telemetry_client.track_crud_dataset(action="create", dataset=dataset) + + return dataset @router.post("/datasets/{dataset_id}/fields", status_code=status.HTTP_201_CREATED, response_model=Field) async def create_dataset_field( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, field_create: FieldCreate, current_user: User = Security(auth.get_current_user), @@ -204,7 +258,13 @@ async def create_dataset_field( await authorize(current_user, DatasetPolicy.create_field(dataset)) - return await datasets.create_field(db, dataset, field_create) + field = await datasets.create_field(db, dataset, field_create) + + await telemetry_client.track_crud_dataset_setting( + action="create", setting_name="fields", dataset=dataset, setting=field + ) + + return field @router.post( @@ -214,6 +274,7 @@ async def create_dataset_metadata_property( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, metadata_property_create: MetadataPropertyCreate, current_user: User = Security(auth.get_current_user), @@ -222,7 +283,13 @@ async def create_dataset_metadata_property( await authorize(current_user, DatasetPolicy.create_metadata_property(dataset)) - return await datasets.create_metadata_property(db, search_engine, dataset, metadata_property_create) + metadata_property = await datasets.create_metadata_property(db, search_engine, dataset, metadata_property_create) + + await telemetry_client.track_crud_dataset_setting( + action="create", setting_name="metadata_properties", dataset=dataset, setting=metadata_property + ) + + return metadata_property @router.post( @@ -232,6 +299,7 @@ async def create_dataset_vector_settings( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, vector_settings_create: VectorSettingsCreate, current_user: User = Security(auth.get_current_user), @@ -240,7 +308,13 @@ async def create_dataset_vector_settings( await authorize(current_user, DatasetPolicy.create_vector_settings(dataset)) - return await datasets.create_vector_settings(db, search_engine, dataset, vector_settings_create) + vector_setting = await datasets.create_vector_settings(db, search_engine, dataset, vector_settings_create) + + await telemetry_client.track_crud_dataset_setting( + action="create", setting_name="vectors_settings", dataset=dataset, setting=vector_setting + ) + + return vector_setting @router.put("/datasets/{dataset_id}/publish", response_model=DatasetSchema) @@ -267,10 +341,7 @@ async def publish_dataset( dataset = await datasets.publish_dataset(db, search_engine, dataset) - telemetry_client.track_data( - action="PublishedDataset", - data={"questions": list(set([question.settings["type"] for question in dataset.questions]))}, - ) + await telemetry_client.track_crud_dataset(action="create", dataset=dataset) return dataset @@ -280,6 +351,7 @@ async def delete_dataset( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -287,13 +359,18 @@ async def delete_dataset( await authorize(current_user, DatasetPolicy.delete(dataset)) - return await datasets.delete_dataset(db, search_engine, dataset) + dataset = await datasets.delete_dataset(db, search_engine, dataset) + + await telemetry_client.track_crud_dataset(action="delete", dataset=dataset) + + return dataset @router.patch("/datasets/{dataset_id}", response_model=DatasetSchema) async def update_dataset( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, dataset_update: DatasetUpdate, current_user: User = Security(auth.get_current_user), @@ -302,4 +379,8 @@ async def update_dataset( await authorize(current_user, DatasetPolicy.update(dataset)) - return await datasets.update_dataset(db, dataset, dataset_update) + dataset = await datasets.update_dataset(db, dataset, dataset_update) + + await telemetry_client.track_crud_dataset(action="update", dataset=dataset) + + return dataset diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index 69815d755f..176253db6c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -25,18 +25,31 @@ from argilla_server.database import get_async_db from argilla_server.models import Dataset, User from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter() @router.get("/datasets/{dataset_id}/questions", response_model=Questions) async def list_dataset_questions( - *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) + *, + db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), + dataset_id: UUID, + current_user: User = Security(auth.get_current_user), ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.questions)]) await authorize(current_user, DatasetPolicy.get(dataset)) + await telemetry_client.track_crud_dataset_setting( + action="list", setting_name="questions", count=len(dataset.questions) + ) + for question in dataset.questions: + await telemetry_client.track_crud_dataset_setting( + action="read", dataset=dataset, setting_name="questions", setting=question + ) + return Questions(items=dataset.questions) @@ -44,6 +57,7 @@ async def list_dataset_questions( async def create_dataset_question( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, question_create: QuestionCreate, current_user: User = Security(auth.get_current_user), @@ -61,4 +75,10 @@ async def create_dataset_question( await authorize(current_user, DatasetPolicy.create_question(dataset)) - return await questions.create_question(db, dataset, question_create) + question = await questions.create_question(db, dataset, question_create) + + await telemetry_client.track_crud_dataset_setting( + action="create", setting_name="questions", dataset=dataset, setting=question + ) + + return question diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index e032aa7037..9b09e8550a 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -358,6 +358,7 @@ async def list_current_user_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, metadata: MetadataQueryParams = Depends(), sort_by_query_param: SortByQueryParamParsed, @@ -388,6 +389,8 @@ async def list_current_user_dataset_records( record.dataset = dataset record.metadata_ = await _filter_record_metadata_for_user(record, current_user) + telemetry_client.track_crud_records(topic="list", dataset=dataset, count=len(records)) + return Records(items=records, total=total) @@ -396,6 +399,7 @@ async def list_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, metadata: MetadataQueryParams = Depends(), sort_by_query_param: SortByQueryParamParsed, @@ -421,6 +425,8 @@ async def list_dataset_records( sort_by_query_param=sort_by_query_param or LIST_DATASET_RECORDS_DEFAULT_SORT_BY, ) + telemetry_client.track_crud_records(topic="list", dataset=dataset, count=len(records)) + return Records(items=records, total=total) @@ -454,7 +460,7 @@ async def create_dataset_records( await datasets.create_records(db, search_engine, dataset, records_create) - telemetry_client.track_data(action="DatasetRecordsCreated", data={"records": len(records_create.items)}) + telemetry_client.track_crud_records(topic="create", dataset=dataset, count=len(records_create.items)) @router.patch( @@ -486,7 +492,7 @@ async def update_dataset_records( await datasets.update_records(db, search_engine, dataset, records_update) - telemetry_client.track_data(action="DatasetRecordsUpdated", data={"records": len(records_update.items)}) + telemetry_client.track_crud_records(topic="update", dataset=dataset, count=len(records_update.items)) @router.delete("/datasets/{dataset_id}/records", status_code=status.HTTP_204_NO_CONTENT) @@ -494,6 +500,7 @@ async def delete_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ids: str = Query(..., description="A comma separated list with the IDs of the records to be removed"), @@ -513,6 +520,8 @@ async def delete_dataset_records( await datasets.delete_records(db, search_engine, dataset, record_ids) + telemetry_client.track_crud_dataset(topic="delete", dataset=dataset, count=len(record_ids)) + @router.post( "/me/datasets/{dataset_id}/records/search", @@ -583,6 +592,8 @@ async def search_current_user_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) + telemetry_client.track_data(topic="read", dataset=dataset, count=len(search_responses.total)) + return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, @@ -599,6 +610,7 @@ async def search_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, body: SearchRecordsQuery, metadata: MetadataQueryParams = Depends(), @@ -645,6 +657,8 @@ async def search_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) + telemetry_client.track_data(topic="read", dataset=dataset, count=len(search_responses.total)) + return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py index af8be6cf6f..ee2cda1e4b 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py @@ -60,7 +60,7 @@ async def create_dataset_records_bulk( records_bulk = await CreateRecordsBulk(db, search_engine).create_records_bulk(dataset, records_bulk_create) - telemetry_client.track_data(action="DatasetRecordsCreated", data={"records": len(records_bulk.items)}) + telemetry_client.track_crud_records(topic="create", dataset=dataset, count=len(records_bulk.items)) return records_bulk @@ -93,7 +93,7 @@ async def upsert_dataset_records_bulk( updated = len(records_bulk.updated_item_ids) created = len(records_bulk.items) - updated - telemetry_client.track_data(action="DatasetRecordsCreated", data={"records": created}) - telemetry_client.track_data(action="DatasetRecordsUpdated", data={"records": updated}) + telemetry_client.track_crud_records(topic="create", dataset=dataset, count=created) + telemetry_client.track_crud_records(topic="update", dataset=dataset, count=updated) return records_bulk diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 537c85e50a..c5cef2ddf6 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -69,8 +69,9 @@ async def list_users( users = await accounts.list_users(db) + await telemetry_client.track_crud_user(action="list", user=None, is_oauth=False, count=len(users)) for user in users: - await telemetry_client.track_crud_user(action="read", user=user) + await telemetry_client.track_crud_user(action="read", user=user, is_oauth=False) return Users(items=users) @@ -128,7 +129,8 @@ async def list_user_workspaces( else: workspaces = await accounts.list_workspaces_by_user_id(db, user_id) + await telemetry_client.track_crud_workspace(action="list", workspace=None, is_oauth=False, count=len(workspaces)) for workspace in workspaces: - await telemetry_client.track_crud_workspace(action="read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="read", workspace=workspace, is_oauth=False) return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py index 511e9a5b99..2a09b2e6b4 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py @@ -25,6 +25,7 @@ from argilla_server.database import get_async_db from argilla_server.models import User, VectorSettings from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["vectors-settings"]) @@ -33,6 +34,7 @@ async def update_vector_settings( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), vector_settings_id: UUID, vector_settings_update: VectorSettingsUpdate, current_user: User = Security(auth.get_current_user), @@ -45,13 +47,20 @@ async def update_vector_settings( await authorize(current_user, VectorSettingsPolicy.update(vector_settings)) - return await datasets.update_vector_settings(db, vector_settings, vector_settings_update) + vectors_setting = await datasets.update_vector_settings(db, vector_settings, vector_settings_update) + + await telemetry_client.track_crud_dataset_setting( + action="update", setting_name="vectors_settings", dataset=VectorSettings.dataset, setting=vectors_setting + ) + + return vector_settings @router.delete("/vectors-settings/{vector_settings_id}", response_model=VectorSettingsSchema) async def delete_vector_settings( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), vector_settings_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -63,4 +72,10 @@ async def delete_vector_settings( await authorize(current_user, VectorSettingsPolicy.delete(vector_settings)) - return await datasets.delete_vector_settings(db, vector_settings) + vectors_setting = await datasets.delete_vector_settings(db, vector_settings) + + await telemetry_client.track_crud_dataset_setting( + action="delete", setting_name="vectors_settings", dataset=VectorSettings.dataset, setting=vectors_setting + ) + + return vectors_setting diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index aadae57643..396f3fce67 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -105,9 +105,6 @@ async def list_workspaces_me( else: workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) - for workspace in workspaces.items: - telemetry_client.track_crud_workspace(action="read", workspace=workspace) - return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index 094047e76b..65ea038e7a 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -41,6 +41,9 @@ class ErrorDetail(BaseModel): params: Dict[str, Any] +telemetry_client: TelemetryClient = (Depends(get_telemetry_client),) + + # TODO(@frascuchon): Review class Naming class ServerHTTPException(HTTPException): def __init__(self, error: ServerError): @@ -61,7 +64,7 @@ async def track_error(cls, error: ServerError, request: Request): if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): data["type"] = error.type - telemetry.get_telemetry_client().track_data(action="ServerErrorFound", data=data) + telemetry.get_telemetry_client().track_data(topic="error/server", user_agent=data) @classmethod async def common_exception_handler(cls, request: Request, error: Exception): From b694ce16a89a31647abf985aeb7c497013068509 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 15 Jul 2024 14:04:58 +0200 Subject: [PATCH 15/63] Add telemetry `suggestions` --- .../src/argilla_server/api/handlers/v1/suggestions.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py index 1257545749..14c4bf55d5 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py @@ -25,6 +25,7 @@ from argilla_server.models import Record, Suggestion, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth +from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["suggestions"]) @@ -34,6 +35,7 @@ async def delete_suggestion( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), suggestion_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -48,4 +50,10 @@ async def delete_suggestion( await authorize(current_user, SuggestionPolicy.delete(suggestion)) - return await datasets.delete_suggestion(db, search_engine, suggestion) + suggestion = await datasets.delete_suggestion(db, search_engine, suggestion) + + await telemetry_client.track_crud_records_subtopic( + action="delete", sub_topic="suggestions", record_id=suggestion.record_id + ) + + return suggestion From ebf139e9421000391f159967e46a00807a6f5ab7 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 10:57:37 +0200 Subject: [PATCH 16/63] Update `track_crud_dataset_setting` processing --- .../src/argilla_server/telemetry.py | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index e87879a2e1..e1fc07aab8 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -17,14 +17,23 @@ import logging import platform import uuid -from typing import Any, Optional, Union +from typing import Optional, Union from fastapi import Request from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ from argilla_server.constants import DEFAULT_USERNAME -from argilla_server.models import Dataset, Record, User, Workspace +from argilla_server.models import ( + Dataset, + Field, + MetadataPropertySettings, + Question, + Record, + User, + VectorSettings, + Workspace, +) from argilla_server.settings import settings from argilla_server.utils._telemetry import ( is_running_on_docker_container, @@ -99,8 +108,17 @@ def _process_dataset_settings(dataset: Dataset): } @staticmethod - def _process_dataset_setting_settings(setting: Any): - return {} + def _process_dataset_setting_settings(setting: Union[Field, VectorSettings, Question, MetadataPropertySettings]): + user_data = {"dataset_id": str(setting.dataset_id)} + if isinstance(setting, (Field, Question)): + user_data["required"] = setting.required + user_data.update(setting.settings) + elif isinstance(setting, MetadataPropertySettings): + user_data["type"] = setting.type + elif isinstance(setting, VectorSettings): + user_data["dimensions"] = setting.dimensions + + return user_data @staticmethod def _process_user_model(user: User): @@ -159,17 +177,16 @@ async def track_crud_dataset( user_agent.update(self._process_dataset_settings(dataset=dataset)) self.track_data(topic=topic, user_agent=user_agent, count=count) - return None for field in dataset.fields: - self.track_crud_dataset_settings(action=action, setting_name="fields", dataset=dataset, setting=field) + self.track_crud_dataset_setting(action=action, setting_name="fields", dataset=dataset, setting=field) for question in dataset.questions: - self.track_crud_dataset_settings(action=action, setting_name="questions", dataset=dataset, setting=question) + self.track_crud_dataset_setting(action=action, setting_name="questions", dataset=dataset, setting=question) for vector in dataset.vectors_settings: - self.track_crud_dataset_settings( + self.track_crud_dataset_setting( action=action, setting_name="vectors_settings", dataset=dataset, setting=vector ) for meta_data in dataset.metadata_properties: - self.track_crud_dataset_settings( + self.track_crud_dataset_setting( action=action, setting_name="metadata_properties", dataset=dataset, setting=meta_data ) @@ -178,13 +195,12 @@ async def track_crud_dataset_setting( action: str, setting_name: str, dataset: Dataset, - setting: Union[Any, None] = None, + setting: Union[Field, VectorSettings, Question, MetadataPropertySettings], count: Union[int, None] = None, ): topic = f"dataset/{setting_name}/{setting.settings.type}/{action}" user_agent = self._process_dataset_model(dataset=dataset) - if setting: - user_agent.update(self._process_dataset_setting_settings(setting=setting)) + user_agent.update(self._process_dataset_setting_settings(setting=setting)) self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_records(self, action: str, record: Union[Record, None] = None, count: Union[int, None] = None): From 77dd130707f1235e72492b5fd7400f97721f0ccc Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:10:51 +0200 Subject: [PATCH 17/63] Add `enable_telemetry` check --- argilla-server/src/argilla_server/telemetry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index e1fc07aab8..867b1a9735 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -130,6 +130,9 @@ def _process_user_model(user: User): } def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): + if not self.enable_telemetry: + return + library_name = "argilla" topic = f"{library_name}/{topic}" From 6979112201caafc9037bd4ca292127c93de00d58 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:11:33 +0200 Subject: [PATCH 18/63] Remove `disable_send` --- argilla-server/src/argilla_server/telemetry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 867b1a9735..2eec0734df 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -46,7 +46,6 @@ @dataclasses.dataclass class TelemetryClient: enable_telemetry: dataclasses.InitVar[bool] = settings.enable_telemetry - disable_send: dataclasses.InitVar[bool] = False _server_id: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) From 9bffe185c0c24b440251a8d5368fd8fac62176e2 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:36:53 +0200 Subject: [PATCH 19/63] Deprecate `ARGILLA_ENABLE_TELEMETRY` env var --- .../argilla-frontend.deploy-environment.yml | 2 +- .github/workflows/argilla-server.yml | 2 +- argilla-server/pyproject.toml | 2 +- argilla-server/src/argilla_server/_app.py | 2 +- argilla-server/src/argilla_server/settings.py | 25 +++++++++++++++---- .../deployments/docker/docker-compose.yaml | 3 ++- .../docker/nginx/docker-compose.yaml | 3 ++- .../docker/traefik/docker-compose.yaml | 2 +- 8 files changed, 29 insertions(+), 12 deletions(-) diff --git a/.github/workflows/argilla-frontend.deploy-environment.yml b/.github/workflows/argilla-frontend.deploy-environment.yml index 558cd5a6de..3e52d0c088 100644 --- a/.github/workflows/argilla-frontend.deploy-environment.yml +++ b/.github/workflows/argilla-frontend.deploy-environment.yml @@ -63,7 +63,7 @@ jobs: ADMIN_API_KEY=${{ steps.credentials.outputs.admin }} ANNOTATOR_PASSWORD=${{ steps.credentials.outputs.annotator }} ANNOTATOR_API_KEY=${{ steps.credentials.outputs.annotator }} - ARGILLA_ENABLE_TELEMETRY=0 + HF_HUB_DISABLE_TELEMETRY=1 API_BASE_URL=https://dev.argilla.io/ - name: Post credentials in Slack diff --git a/.github/workflows/argilla-server.yml b/.github/workflows/argilla-server.yml index 89b1592097..a3e05a905e 100644 --- a/.github/workflows/argilla-server.yml +++ b/.github/workflows/argilla-server.yml @@ -52,7 +52,7 @@ jobs: - 5432:5432 env: - ARGILLA_ENABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 1 steps: - name: Checkout Code 🛎 diff --git a/argilla-server/pyproject.toml b/argilla-server/pyproject.toml index f830d03a67..3e6996362d 100644 --- a/argilla-server/pyproject.toml +++ b/argilla-server/pyproject.toml @@ -108,7 +108,7 @@ log_format = "%(asctime)s %(name)s %(levelname)s %(message)s" log_date_format = "%Y-%m-%d %H:%M:%S" log_cli = "True" testpaths = ["tests"] -env = ["ARGILLA_ENABLE_TELEMETRY=0"] +env = ["HF_HUB_DISABLE_TELEMETRY=1"] [tool.coverage.run] concurrency = ["greenlet", "thread", "multiprocessing"] diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index ebe4634cf0..e0ef3e37a2 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -201,7 +201,7 @@ def configure_telemetry(app: FastAPI): """ ) message += "\n\n " - message += "#set ARGILLA_ENABLE_TELEMETRY=0" if os.name == "nt" else "$>export ARGILLA_ENABLE_TELEMETRY=0" + message += "#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1" message += "\n" @app.on_event("startup") diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index e265d59ae5..1b9016edfe 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -26,16 +26,16 @@ from urllib.parse import urlparse from argilla_server.constants import ( - DATABASE_SQLITE, DATABASE_POSTGRESQL, + DATABASE_SQLITE, + DEFAULT_DATABASE_POSTGRESQL_MAX_OVERFLOW, + DEFAULT_DATABASE_POSTGRESQL_POOL_SIZE, + DEFAULT_DATABASE_SQLITE_TIMEOUT, DEFAULT_LABEL_SELECTION_OPTIONS_MAX_ITEMS, DEFAULT_MAX_KEYWORD_LENGTH, DEFAULT_SPAN_OPTIONS_MAX_ITEMS, - DEFAULT_DATABASE_SQLITE_TIMEOUT, SEARCH_ENGINE_ELASTICSEARCH, SEARCH_ENGINE_OPENSEARCH, - DEFAULT_DATABASE_POSTGRESQL_POOL_SIZE, - DEFAULT_DATABASE_POSTGRESQL_MAX_OVERFLOW, ) from argilla_server.pydantic_v1 import BaseSettings, Field, root_validator, validator @@ -156,8 +156,23 @@ class Settings(BaseSettings): description="If True, show a warning when Hugging Face space persistent storage is disabled", ) + # Hugging Face telemetry + enable_telemetry: bool = Field( + default=True, description="The telemetry configuration for Hugging Face hub telemetry. " + ) + # See also the telemetry.py module - enable_telemetry: bool = True + @validator("database_url", pre=True, always=True) + def set_enable_telemetry(cls, enable_telemetry: bool) -> bool: + if os.getenv("HF_HUB_DISABLE_TELEMETRY") == "1" or os.getenv("HF_HUB_OFFLINE") == "1": + enable_telemetry = False + elif os.getenv("ARGILLA_ENABLE_TELEMETRY") == "0": + warnings.warn( + "environment vairbale ARGILLA_ENABLE_TELEMETRY is deprecated, use HF_HUB_DISABLE_TELEMETRY or HF_HUB_OFFLINE instead." + ) + enable_telemetry = False + + return enable_telemetry @validator("home_path", always=True) def set_home_path_default(cls, home_path: str): diff --git a/examples/deployments/docker/docker-compose.yaml b/examples/deployments/docker/docker-compose.yaml index 923cae49db..6f903b75a7 100644 --- a/examples/deployments/docker/docker-compose.yaml +++ b/examples/deployments/docker/docker-compose.yaml @@ -12,7 +12,8 @@ services: ARGILLA_ELASTICSEARCH: http://elasticsearch:9200 ARGILLA_AUTH_SECRET_KEY: ${ARGILLA_AUTH_SECRET_KEY:? Please generate a 32 character random string with `openssl rand -hex 32`} - # ARGILLA_ENABLE_TELEMETRY: 0 # Opt-out for telemetry https://docs.argilla.io/en/latest/reference/telemetry.html + # HF_HUB_DISABLE_TELEMETRY: 1 # Opt-out for telemetry https://huggingface.co/docs/huggingface_hub/main/en/package_reference/utilities#huggingface_hub.utils.send_telemetry + # HF_HUB_OFFLINE: 1 # Opt-out for telemetry https://huggingface.co/docs/huggingface_hub/main/en/package_reference/utilities#huggingface_hub.utils.send_telemetry # Set user configuration https://docs.argilla.io/en/latest/getting_started/installation/configurations/user_management.html # ARGILLA_LOCAL_AUTH_USERS_DB_FILE: /config/.users.yaml diff --git a/examples/deployments/docker/nginx/docker-compose.yaml b/examples/deployments/docker/nginx/docker-compose.yaml index 6c2d9b8f61..ab7d588f8e 100644 --- a/examples/deployments/docker/nginx/docker-compose.yaml +++ b/examples/deployments/docker/nginx/docker-compose.yaml @@ -12,8 +12,9 @@ services: argilla: image: argilla/argilla-quickstart:latest environment: - ARGILLA_ENABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 1 ARGILLA_BASE_URL: /argilla ARGILLA_AUTH_SECRET_KEY: ${ARGILLA_AUTH_SECRET_KEY:? Please generate a 32 character random string with `openssl rand -hex 32`} ports: - "6900:6900" + diff --git a/examples/deployments/docker/traefik/docker-compose.yaml b/examples/deployments/docker/traefik/docker-compose.yaml index d699561035..ea5a9bf97e 100644 --- a/examples/deployments/docker/traefik/docker-compose.yaml +++ b/examples/deployments/docker/traefik/docker-compose.yaml @@ -19,7 +19,7 @@ services: argilla: image: argilla/argilla-quickstart:latest environment: - ARGILLA_ENABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 0 ARGILLA_BASE_URL: /argilla ARGILLA_AUTH_SECRET_KEY: ${ARGILLA_AUTH_SECRET_KEY:? Please generate a 32 character random string with `openssl rand -hex 32`} labels: From e6763ccaa5f48541f5c2da36fcd59718b84d0008 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:39:30 +0200 Subject: [PATCH 20/63] Update `test_telemetry` --- argilla-server/tests/unit/commons/test_telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 0b1992d4b0..4ac9a714c4 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -28,7 +28,7 @@ def test_disable_telemetry(): telemetry_client = TelemetryClient(enable_telemetry=False) - assert telemetry_client.client is None + assert telemetry_client.enable_telemetry == False @pytest.mark.asyncio From a94ab7cca3fcd5f1b8a346ea0e1429b58663bb00 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:43:20 +0200 Subject: [PATCH 21/63] Add enable telemetry to post_init --- argilla-server/src/argilla_server/telemetry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 2eec0734df..83ed02a3ea 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -53,7 +53,7 @@ class TelemetryClient: def server_id(self) -> uuid.UUID: return self._server_id - def __post_init__(self): + def __post_init__(self, enable_telemetry: bool): self._server_id = uuid.UUID(int=uuid.getnode()) self._system_info = { "server_id": self._server_id, @@ -69,6 +69,7 @@ def __post_init__(self): _LOGGER.info("System Info:") _LOGGER.info(f"Server id: {self.server_id}") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") + self.enable_telemetry = enable_telemetry @staticmethod def _process_request_info(request: Request): From a6f7c0fffc1ccea3a13feeb2a1520dcff8842e51 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:49:09 +0200 Subject: [PATCH 22/63] Add `UUID` to `str` covnersion --- argilla-server/src/argilla_server/telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 83ed02a3ea..98678350c6 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -54,7 +54,7 @@ def server_id(self) -> uuid.UUID: return self._server_id def __post_init__(self, enable_telemetry: bool): - self._server_id = uuid.UUID(int=uuid.getnode()) + self._server_id = str(uuid.UUID(int=uuid.getnode())) self._system_info = { "server_id": self._server_id, "system": platform.system(), From a70c59093ec109b4bdf26ad037eb1798a3128e1b Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:52:30 +0200 Subject: [PATCH 23/63] Run tests with enabled telemetry --- .github/workflows/argilla-server.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/argilla-server.yml b/.github/workflows/argilla-server.yml index a3e05a905e..c4a1a401e9 100644 --- a/.github/workflows/argilla-server.yml +++ b/.github/workflows/argilla-server.yml @@ -52,7 +52,7 @@ jobs: - 5432:5432 env: - HF_HUB_DISABLE_TELEMETRY: 1 + HF_HUB_DISABLE_TELEMETRY: 0 steps: - name: Checkout Code 🛎 From d762dc3a4cc9630d77b6e0b3f6358fecd01924be Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 11:53:23 +0200 Subject: [PATCH 24/63] Remove `telemetry` client --- argilla-server/src/argilla_server/errors/error_handler.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index 65ea038e7a..f43d28f5e4 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -41,9 +41,6 @@ class ErrorDetail(BaseModel): params: Dict[str, Any] -telemetry_client: TelemetryClient = (Depends(get_telemetry_client),) - - # TODO(@frascuchon): Review class Naming class ServerHTTPException(HTTPException): def __init__(self, error: ServerError): From 4addc7b67e1882abb568381b24e9952d081344e3 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 13:35:35 +0200 Subject: [PATCH 25/63] Fix tests errors --- .github/workflows/argilla-server.yml | 2 +- .../api/handlers/v1/datasets/questions.py | 2 +- .../api/handlers/v1/datasets/records.py | 18 ++-- .../api/handlers/v1/datasets/records_bulk.py | 6 +- .../api/handlers/v1/metadata_properties.py | 10 +- .../argilla_server/api/handlers/v1/records.py | 20 ++-- .../api/handlers/v1/responses.py | 15 +-- .../argilla_server/api/handlers/v1/users.py | 4 +- .../api/handlers/v1/vectors_settings.py | 4 +- .../api/handlers/v1/workspaces.py | 6 +- .../argilla_server/errors/error_handler.py | 8 +- .../src/argilla_server/telemetry.py | 102 +++++++++++------- .../tests/unit/commons/test_telemetry.py | 27 +---- .../tests/unit/errors/test_api_errors.py | 14 ++- 14 files changed, 137 insertions(+), 101 deletions(-) diff --git a/.github/workflows/argilla-server.yml b/.github/workflows/argilla-server.yml index c4a1a401e9..a3e05a905e 100644 --- a/.github/workflows/argilla-server.yml +++ b/.github/workflows/argilla-server.yml @@ -52,7 +52,7 @@ jobs: - 5432:5432 env: - HF_HUB_DISABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 1 steps: - name: Checkout Code 🛎 diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index 176253db6c..2150ecb367 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -43,7 +43,7 @@ async def list_dataset_questions( await authorize(current_user, DatasetPolicy.get(dataset)) await telemetry_client.track_crud_dataset_setting( - action="list", setting_name="questions", count=len(dataset.questions) + action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) ) for question in dataset.questions: await telemetry_client.track_crud_dataset_setting( diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index 9b09e8550a..7ba5217510 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -389,7 +389,7 @@ async def list_current_user_dataset_records( record.dataset = dataset record.metadata_ = await _filter_record_metadata_for_user(record, current_user) - telemetry_client.track_crud_records(topic="list", dataset=dataset, count=len(records)) + await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) return Records(items=records, total=total) @@ -425,7 +425,7 @@ async def list_dataset_records( sort_by_query_param=sort_by_query_param or LIST_DATASET_RECORDS_DEFAULT_SORT_BY, ) - telemetry_client.track_crud_records(topic="list", dataset=dataset, count=len(records)) + await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) return Records(items=records, total=total) @@ -460,7 +460,9 @@ async def create_dataset_records( await datasets.create_records(db, search_engine, dataset, records_create) - telemetry_client.track_crud_records(topic="create", dataset=dataset, count=len(records_create.items)) + await telemetry_client.track_crud_records( + action="create", record_or_dataset=dataset, count=len(records_create.items) + ) @router.patch( @@ -492,7 +494,9 @@ async def update_dataset_records( await datasets.update_records(db, search_engine, dataset, records_update) - telemetry_client.track_crud_records(topic="update", dataset=dataset, count=len(records_update.items)) + await telemetry_client.track_crud_records( + action="update", record_or_dataset=dataset, count=len(records_update.items) + ) @router.delete("/datasets/{dataset_id}/records", status_code=status.HTTP_204_NO_CONTENT) @@ -520,7 +524,7 @@ async def delete_dataset_records( await datasets.delete_records(db, search_engine, dataset, record_ids) - telemetry_client.track_crud_dataset(topic="delete", dataset=dataset, count=len(record_ids)) + await telemetry_client.track_crud_dataset(action="delete", dataset=dataset, count=len(record_ids)) @router.post( @@ -592,7 +596,7 @@ async def search_current_user_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - telemetry_client.track_data(topic="read", dataset=dataset, count=len(search_responses.total)) + await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=search_responses.total) return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], @@ -657,7 +661,7 @@ async def search_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - telemetry_client.track_data(topic="read", dataset=dataset, count=len(search_responses.total)) + await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=search_responses.total) return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py index ee2cda1e4b..6841be8a24 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py @@ -60,7 +60,7 @@ async def create_dataset_records_bulk( records_bulk = await CreateRecordsBulk(db, search_engine).create_records_bulk(dataset, records_bulk_create) - telemetry_client.track_crud_records(topic="create", dataset=dataset, count=len(records_bulk.items)) + await telemetry_client.track_crud_records(action="create", record_or_dataset=dataset, count=len(records_bulk.items)) return records_bulk @@ -93,7 +93,7 @@ async def upsert_dataset_records_bulk( updated = len(records_bulk.updated_item_ids) created = len(records_bulk.items) - updated - telemetry_client.track_crud_records(topic="create", dataset=dataset, count=created) - telemetry_client.track_crud_records(topic="update", dataset=dataset, count=updated) + await telemetry_client.track_crud_records(action="create", record_or_dataset=dataset, count=created) + await telemetry_client.track_crud_records(action="update", record_or_dataset=dataset, count=updated) return records_bulk diff --git a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py index 88dbb88c29..8941f97a63 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py @@ -75,7 +75,10 @@ async def update_metadata_property( metadata_property = await datasets.update_metadata_property(db, metadata_property, metadata_property_update) await telemetry_client.track_crud_dataset_setting( - action="update", setting_name="metadata_properties", dataset=MetadataProperty.dataset, setting=metadata_property + action="update", + setting_name="metadata_properties", + dataset=metadata_property.dataset, + setting=metadata_property, ) return metadata_property @@ -100,7 +103,10 @@ async def delete_metadata_property( metadata_property = await datasets.delete_metadata_property(db, metadata_property) await telemetry_client.track_crud_dataset_setting( - action="delete", setting_name="metadata_properties", dataset=MetadataProperty.dataset, setting=metadata_property + action="delete", + setting_name="metadata_properties", + dataset=metadata_property.dataset, + setting=metadata_property, ) return metadata_property diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index a6113c718f..f08626a516 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -59,7 +59,7 @@ async def get_record( await authorize(current_user, RecordPolicy.get(record)) - telemetry_client.track_crud_records(action="read", record=record) + await telemetry_client.track_crud_records(action="read", record_or_dataset=record) return record @@ -89,7 +89,7 @@ async def update_record( record = await datasets.update_record(db, search_engine, record, record_update) - telemetry_client.track_crud_records(action="update", record=record) + await telemetry_client.track_crud_records(action="update", record_or_dataset=record) return record @@ -117,7 +117,7 @@ async def create_record_response( response = await datasets.create_response(db, search_engine, record, current_user, response_create) - telemetry_client.track_crud_records_subtopic(action="create", sub_topic="responses", record_id=record_id) + await telemetry_client.track_crud_records_subtopic(action="create", sub_topic="responses", record_id=record_id) return response @@ -142,8 +142,8 @@ async def get_record_suggestions( await authorize(current_user, RecordPolicy.get_suggestions(record)) - telemetry_client.track_crud_records_subtopic( - topic="read", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + await telemetry_client.track_crud_records_subtopic( + action="read", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) ) return Suggestions(items=record.suggestions) @@ -196,8 +196,8 @@ async def upsert_suggestion( suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - telemetry_client.track_crud_records_subtopic( - topic="update", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + await telemetry_client.track_crud_records_subtopic( + action="update", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) ) return suggestion @@ -239,8 +239,8 @@ async def delete_record_suggestions( await datasets.delete_suggestions(db, search_engine, record, suggestion_ids) - telemetry_client.track_crud_records_subtopic( - topic="delete", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + await telemetry_client.track_crud_records_subtopic( + action="delete", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) ) @@ -266,6 +266,6 @@ async def delete_record( record = await datasets.delete_record(db, search_engine, record) - telemetry_client.track_crud_records(topic="delete", record=record) + await telemetry_client.track_crud_records(action="delete", record_or_dataset=record) return record diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index 0dfe957992..d91091414e 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -51,10 +51,9 @@ async def create_current_user_responses_bulk( ): responses_bulk_items = await use_case.execute(body.items, user=current_user) - for response in responses_bulk_items: - telemetry_client.track_crud_records_subtopic( - topic="create", sub_topic="responses", record_id=response.record_id - ) + await telemetry_client.track_crud_records_subtopic( + action="create", sub_topic="responses", record_id=None, count=len(responses_bulk_items) + ) return ResponsesBulk(items=responses_bulk_items) @@ -79,7 +78,9 @@ async def update_response( response = await datasets.update_response(db, search_engine, response, response_update) - telemetry_client.track_crud_records_subtopic(topic="update", sub_topic="responses", record_id=response.record_id) + await telemetry_client.track_crud_records_subtopic( + action="update", sub_topic="responses", record_id=response.record_id + ) return response @@ -103,6 +104,8 @@ async def delete_response( response = await datasets.delete_response(db, search_engine, response) - telemetry_client.track_crud_records_subtopic(topic="delete", sub_topic="responses", record_id=response.record_id) + await telemetry_client.track_crud_records_subtopic( + action="delete", sub_topic="responses", record_id=response.record_id + ) return response diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index c5cef2ddf6..34cc2c68fa 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -129,8 +129,8 @@ async def list_user_workspaces( else: workspaces = await accounts.list_workspaces_by_user_id(db, user_id) - await telemetry_client.track_crud_workspace(action="list", workspace=None, is_oauth=False, count=len(workspaces)) + await telemetry_client.track_crud_workspace(action="list", workspace=None, count=len(workspaces)) for workspace in workspaces: - await telemetry_client.track_crud_workspace(action="read", workspace=workspace, is_oauth=False) + await telemetry_client.track_crud_workspace(action="read", workspace=workspace) return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py index 2a09b2e6b4..384d078e60 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py @@ -50,7 +50,7 @@ async def update_vector_settings( vectors_setting = await datasets.update_vector_settings(db, vector_settings, vector_settings_update) await telemetry_client.track_crud_dataset_setting( - action="update", setting_name="vectors_settings", dataset=VectorSettings.dataset, setting=vectors_setting + action="update", setting_name="vectors_settings", dataset=vectors_setting.dataset, setting=vectors_setting ) return vector_settings @@ -75,7 +75,7 @@ async def delete_vector_settings( vectors_setting = await datasets.delete_vector_settings(db, vector_settings) await telemetry_client.track_crud_dataset_setting( - action="delete", setting_name="vectors_settings", dataset=VectorSettings.dataset, setting=vectors_setting + action="delete", setting_name="vectors_settings", dataset=vectors_setting.dataset, setting=vectors_setting ) return vectors_setting diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index 396f3fce67..de8e67b265 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -50,7 +50,7 @@ async def get_workspace( workspace = await Workspace.get_or_raise(db, workspace_id) - telemetry_client.track_crud_workspace(action="read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="read", workspace=workspace) return workspace @@ -67,7 +67,7 @@ async def create_workspace( workspace = await accounts.create_workspace(db, workspace_create.dict()) - telemetry_client.track_crud_workspace(action="create", workspace=workspace) + await telemetry_client.track_crud_workspace(action="create", workspace=workspace) return workspace @@ -86,7 +86,7 @@ async def delete_workspace( workspace = await accounts.delete_workspace(db, workspace) - telemetry_client.track_crud_workspace(action="delete", workspace=workspace) + await telemetry_client.track_crud_workspace(action="delete", workspace=workspace) return workspace diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index f43d28f5e4..9af811bdaa 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -18,7 +18,6 @@ from fastapi.exception_handlers import http_exception_handler from fastapi.exceptions import RequestValidationError -from argilla_server import telemetry from argilla_server.errors.base_errors import ( BadRequestError, ClosedDatasetError, @@ -34,6 +33,7 @@ WrongTaskError, ) from argilla_server.pydantic_v1 import BaseModel +from argilla_server.telemetry import get_telemetry_client class ErrorDetail(BaseModel): @@ -53,15 +53,15 @@ def __init__(self, error: ServerError): class APIErrorHandler: @classmethod async def track_error(cls, error: ServerError, request: Request): - data = { + user_agent = { "code": error.code, "user-agent": request.headers.get("user-agent"), "accept-language": request.headers.get("accept-language"), } if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): - data["type"] = error.type + user_agent["type"] = error.type - telemetry.get_telemetry_client().track_data(topic="error/server", user_agent=data) + await get_telemetry_client().track_data(topic="error/server", user_agent=user_agent) @classmethod async def common_exception_handler(cls, request: Request, error: Exception): diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 98678350c6..aa682cad58 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -27,9 +27,12 @@ from argilla_server.models import ( Dataset, Field, + FloatMetadataPropertySettings, + IntegerMetadataPropertySettings, MetadataPropertySettings, Question, Record, + TermsMetadataPropertySettings, User, VectorSettings, Workspace, @@ -54,9 +57,9 @@ def server_id(self) -> uuid.UUID: return self._server_id def __post_init__(self, enable_telemetry: bool): - self._server_id = str(uuid.UUID(int=uuid.getnode())) + self._server_id = uuid.UUID(int=uuid.getnode()) self._system_info = { - "server_id": self._server_id, + "server_id": str(self._server_id), "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), @@ -108,12 +111,23 @@ def _process_dataset_settings(dataset: Dataset): } @staticmethod - def _process_dataset_setting_settings(setting: Union[Field, VectorSettings, Question, MetadataPropertySettings]): + def _process_dataset_setting_settings( + setting: Union[ + Field, + VectorSettings, + Question, + FloatMetadataPropertySettings, + TermsMetadataPropertySettings, + IntegerMetadataPropertySettings, + ], + ): user_data = {"dataset_id": str(setting.dataset_id)} if isinstance(setting, (Field, Question)): user_data["required"] = setting.required user_data.update(setting.settings) - elif isinstance(setting, MetadataPropertySettings): + elif isinstance( + setting, (FloatMetadataPropertySettings, TermsMetadataPropertySettings, IntegerMetadataPropertySettings) + ): user_data["type"] = setting.type elif isinstance(setting, VectorSettings): user_data["dimensions"] = setting.dimensions @@ -129,7 +143,7 @@ def _process_user_model(user: User): "user_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=user.username)), } - def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): + async def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): if not self.enable_telemetry: return @@ -147,7 +161,7 @@ async def track_user_login(self, request: Request, user: User): topic = "user/login" user_agent = self._process_user_model(user=user) user_agent.update(**self._process_request_info(request)) - self.track_data(topic=topic, user_agent=user_agent) + await self.track_data(topic=topic, user_agent=user_agent) async def track_crud_user( self, @@ -157,59 +171,75 @@ async def track_crud_user( count: Union[int, None] = None, ): topic = f"user/{action}" + user_agent = {} if user: - user_agent = self._process_user_model(user=user) + user_agent.update(self._process_user_model(user=user)) if is_oauth is not None: user_agent["is_oauth"] = is_oauth - self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_workspace( self, action: str, workspace: Union[Workspace, None] = None, count: Union[int, None] = None ): topic: str = f"workspace/{action}" + user_agent = {} if workspace: - user_agent = self._process_workspace_model(workspace=workspace) - self.track_data(topic=topic, user_agent=user_agent, count=count) + user_agent.update(self._process_workspace_model(workspace=workspace)) + await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_dataset( self, action: str, dataset: Union[Dataset, None] = None, count: Union[int, None] = None ): topic = f"dataset/{action}" + user_agent = {} if dataset: - user_agent = self._process_dataset_model(dataset=dataset) + user_agent.update(self._process_dataset_model(dataset=dataset)) user_agent.update(self._process_dataset_settings(dataset=dataset)) - self.track_data(topic=topic, user_agent=user_agent, count=count) - - for field in dataset.fields: - self.track_crud_dataset_setting(action=action, setting_name="fields", dataset=dataset, setting=field) - for question in dataset.questions: - self.track_crud_dataset_setting(action=action, setting_name="questions", dataset=dataset, setting=question) - for vector in dataset.vectors_settings: - self.track_crud_dataset_setting( - action=action, setting_name="vectors_settings", dataset=dataset, setting=vector - ) - for meta_data in dataset.metadata_properties: - self.track_crud_dataset_setting( - action=action, setting_name="metadata_properties", dataset=dataset, setting=meta_data - ) + await self.track_data(topic=topic, user_agent=user_agent, count=count) + + if dataset: + for field in dataset.fields: + self.track_crud_dataset_setting(action=action, setting_name="fields", dataset=dataset, setting=field) + for question in dataset.questions: + self.track_crud_dataset_setting( + action=action, setting_name="questions", dataset=dataset, setting=question + ) + for vector in dataset.vectors_settings: + self.track_crud_dataset_setting( + action=action, setting_name="vectors_settings", dataset=dataset, setting=vector + ) + for meta_data in dataset.metadata_properties: + self.track_crud_dataset_setting( + action=action, setting_name="metadata_properties", dataset=dataset, setting=meta_data + ) async def track_crud_dataset_setting( self, action: str, setting_name: str, dataset: Dataset, - setting: Union[Field, VectorSettings, Question, MetadataPropertySettings], + setting: Union[Field, VectorSettings, Question, MetadataPropertySettings, None] = None, count: Union[int, None] = None, ): - topic = f"dataset/{setting_name}/{setting.settings.type}/{action}" + topic = f"dataset/{setting_name}" + if setting: + if hasattr(setting, "settings"): + topic = f"{topic}/{setting.settings['type']}" + topic = f"{topic}/{action}" user_agent = self._process_dataset_model(dataset=dataset) - user_agent.update(self._process_dataset_setting_settings(setting=setting)) - self.track_data(topic=topic, user_agent=user_agent, count=count) + if setting: + user_agent.update(self._process_dataset_setting_settings(setting=setting)) + await self.track_data(topic=topic, user_agent=user_agent, count=count) - async def track_crud_records(self, action: str, record: Union[Record, None] = None, count: Union[int, None] = None): + async def track_crud_records( + self, action: str, record_or_dataset: Union[Record, None] = None, count: Union[int, None] = None + ): topic = f"dataset/records/{action}" - user_agent = self._process_record_model(record=record) - self.track_data(topic=topic, user_agent=user_agent, count=count) + if isinstance(record_or_dataset, Record): + user_agent = self._process_record_model(record=record_or_dataset) + else: + user_agent = self._process_dataset_model(dataset=record_or_dataset) + await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_records_subtopic( self, @@ -220,11 +250,11 @@ async def track_crud_records_subtopic( ): topic = f"dataset/records/{sub_topic}/{action}" user_agent = {"record_id": record_id} - self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, user_agent=user_agent, count=count) -def get_telemetry_client() -> TelemetryClient: - return _TELEMETRY_CLIENT +_TELEMETRY_CLIENT = TelemetryClient() -_TELEMETRY_CLIENT = TelemetryClient() +def get_telemetry_client() -> TelemetryClient: + return _TELEMETRY_CLIENT diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 4ac9a714c4..e1002d55d9 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -16,7 +16,6 @@ from unittest.mock import MagicMock import pytest -from argilla_server import telemetry from argilla_server.enums import UserRole from argilla_server.models import User from argilla_server.telemetry import TelemetryClient, get_telemetry_client @@ -32,19 +31,11 @@ def test_disable_telemetry(): @pytest.mark.asyncio -async def test_track_login(test_telemetry: MagicMock): +async def test_track_user_login(test_telemetry: MagicMock): user = User(id=uuid.uuid4(), username="argilla") - await telemetry.track_login(request=mock_request, user=user) + await get_telemetry_client().track_user_login(request=mock_request, user=user) - current_server_id = get_telemetry_client().server_id - expected_event_data = { - "accept-language": None, - "is_default_user": True, - "user_id": str(user.id), - "user-agent": None, - "user_hash": str(uuid.uuid5(current_server_id, name="argilla")), - } - test_telemetry.track_data.assert_called_once_with(action="UserInfoRequested", data=expected_event_data) + test_telemetry.track_user_login.assert_called_once_with(request=mock_request, user=user) @pytest.mark.parametrize("is_oauth", [True, False]) @@ -52,13 +43,5 @@ async def test_track_login(test_telemetry: MagicMock): def test_user_created(test_telemetry, username: str, is_oauth: bool): user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) - telemetry.track_user_created(user=user, is_oauth=is_oauth) - test_telemetry.track_data.assert_called_once_with( - action="UserCreated", - data={ - "is_default_user": username == "argilla", - "is_oauth": is_oauth, - "role": user.role, - "user_id": str(user.id), - }, - ) + get_telemetry_client().track_crud_user(action="create", user=user, is_oauth=is_oauth) + test_telemetry.track_crud_user.assert_called_once_with(action="create", user=user, is_oauth=is_oauth) diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index fdda7fd540..c05a3644cc 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest.mock import MagicMock + import pytest from argilla_server.api.schemas.v1.datasets import Dataset from argilla_server.errors.base_errors import ( @@ -69,7 +71,15 @@ class TestAPIErrorHandler: ), ], ) - async def test_track_error(self, test_telemetry, error, expected_event): + async def test_track_error(self, test_telemetry: MagicMock, error, expected_event): await APIErrorHandler.track_error(error, request=mock_request) - test_telemetry.track_data.assert_called_once_with(action="ServerErrorFound", data=expected_event) + user_agent = { + "code": error.code, + "user-agent": mock_request.headers.get("user-agent"), + "accept-language": mock_request.headers.get("accept-language"), + } + if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): + user_agent["type"] = error.type + + test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) From ac7601cb855292cb10afc44208c61d6cb004e013 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 14:31:46 +0200 Subject: [PATCH 26/63] Update `test_telemetry` fixture --- argilla-server/tests/unit/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argilla-server/tests/unit/conftest.py b/argilla-server/tests/unit/conftest.py index fe3479ea6d..3988d235f9 100644 --- a/argilla-server/tests/unit/conftest.py +++ b/argilla-server/tests/unit/conftest.py @@ -102,8 +102,8 @@ def test_telemetry(mocker: "MockerFixture") -> "MagicMock": mock_telemetry = mocker.Mock(TelemetryClient) mock_telemetry.server_id = uuid.uuid4() - telemetry._CLIENT = mock_telemetry - return telemetry._CLIENT + telemetry._TELEMETRY_CLIENT = mock_telemetry + return mock_telemetry @pytest_asyncio.fixture(scope="function") From c72c4b05df508b0b920af9ed2ed850ecb2cc8ab9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 16 Jul 2024 17:21:30 +0200 Subject: [PATCH 27/63] Update disable telemetry env var --- examples/deployments/docker/traefik/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/deployments/docker/traefik/docker-compose.yaml b/examples/deployments/docker/traefik/docker-compose.yaml index ea5a9bf97e..7215f59c1a 100644 --- a/examples/deployments/docker/traefik/docker-compose.yaml +++ b/examples/deployments/docker/traefik/docker-compose.yaml @@ -19,7 +19,7 @@ services: argilla: image: argilla/argilla-quickstart:latest environment: - HF_HUB_DISABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 1 ARGILLA_BASE_URL: /argilla ARGILLA_AUTH_SECRET_KEY: ${ARGILLA_AUTH_SECRET_KEY:? Please generate a 32 character random string with `openssl rand -hex 32`} labels: From 538c2684a9e62bcf89fbeca1cd3e9f3defcb7fbd Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Wed, 17 Jul 2024 11:22:53 +0200 Subject: [PATCH 28/63] Fix tests dataset creation --- .../api/handlers/v1/datasets/datasets.py | 1 - .../tests/unit/api/handlers/v1/test_datasets.py | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index 707a7f96a3..36bd94af67 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -232,7 +232,6 @@ async def create_dataset( *, db: AsyncSession = Depends(get_async_db), telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, dataset_create: DatasetCreate, current_user: User = Security(auth.get_current_user), ): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 650e9f3808..38554ae5a1 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -78,7 +78,6 @@ LabelSelectionQuestionFactory, MetadataPropertyFactory, MultiLabelSelectionQuestionFactory, - OwnerFactory, QuestionFactory, RatingQuestionFactory, RecordFactory, @@ -1775,8 +1774,8 @@ async def test_create_dataset_records( records = (await db.execute(select(Record))).scalars().all() mock_search_engine.index_records.assert_called_once_with(dataset, records) - test_telemetry.track_data.assert_called_once_with( - action="DatasetRecordsCreated", data={"records": len(records_json["items"])} + test_telemetry.track_crud_records.assert_called_once_with( + action="create", record_or_dataset=dataset, count=len(records) ) async def test_create_dataset_records_with_response_for_multiple_users( @@ -2556,8 +2555,8 @@ async def test_create_dataset_records_as_admin( records = (await db.execute(select(Record))).scalars().all() mock_search_engine.index_records.assert_called_once_with(dataset, records) - test_telemetry.track_data.assert_called_once_with( - action="DatasetRecordsCreated", data={"records": len(records_json["items"])} + test_telemetry.track_crud_records.assert_called_once_with( + action="create", record_or_dataset=dataset, count=len(records) ) async def test_create_dataset_records_as_annotator(self, async_client: "AsyncClient", db: "AsyncSession"): @@ -4618,7 +4617,7 @@ async def test_publish_dataset( response_body = response.json() assert response_body["status"] == "ready" - test_telemetry.track_data.assert_called_once_with(action="PublishedDataset", data={"questions": ["rating"]}) + test_telemetry.track_crud_dataset.assert_called_once_with(action="create", dataset=dataset) mock_search_engine.create_index.assert_called_once_with(dataset) async def test_publish_dataset_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): From 0b167ebf37fb5e653e87af06f4630fdd2a5ddf58 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Wed, 17 Jul 2024 11:57:12 +0200 Subject: [PATCH 29/63] Fix failing tests due to unloaded DatabaseModels --- .../src/argilla_server/api/handlers/v1/records.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index f08626a516..a195615cfe 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -196,9 +196,7 @@ async def upsert_suggestion( suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - await telemetry_client.track_crud_records_subtopic( - action="update", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) - ) + await telemetry_client.track_crud_records_subtopic(action="update", sub_topic="suggestions", record_id=record_id) return suggestion @@ -240,7 +238,7 @@ async def delete_record_suggestions( await datasets.delete_suggestions(db, search_engine, record, suggestion_ids) await telemetry_client.track_crud_records_subtopic( - action="delete", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + action="delete", sub_topic="suggestions", record_id=record_id, count=num_suggestions ) From 4fcbbd65ea994676dc13a817ea566036f17e66cf Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Wed, 17 Jul 2024 13:29:12 +0200 Subject: [PATCH 30/63] Add tests telemetry crud datasets --- .../tests/unit/api/handlers/v1/test_datasets.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 38554ae5a1..8723bb9223 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -654,7 +654,7 @@ async def test_list_dataset_vectors_settings_without_authentication(self, async_ # Helper function to create records with responses - async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: dict): + async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock): dataset = await DatasetFactory.create(name="dataset") response = await async_client.get(f"/api/v1/datasets/{dataset.id}", headers=owner_auth_header) @@ -672,6 +672,8 @@ async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: "updated_at": dataset.updated_at.isoformat(), } + test_telemetry.track_crud_dataset.assert_called_once_with(action="read", dataset=dataset) + async def test_get_dataset_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -4724,7 +4726,9 @@ async def test_publish_dataset_with_nonexistent_dataset_id( ], ) @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) - async def test_update_dataset(self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict): + async def test_update_dataset( + self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create( name="Current Name", guidelines="Current Guidelines", status=DatasetStatus.ready ) @@ -4761,6 +4765,7 @@ async def test_update_dataset(self, async_client: "AsyncClient", db: "AsyncSessi assert dataset.name == name assert dataset.guidelines == guidelines assert dataset.allow_extra_metadata is allow_extra_metadata + test_telemetry.track_crud_dataset.assert_called_once_with(action="update", dataset=dataset) @pytest.mark.parametrize( "dataset_json", @@ -4860,6 +4865,7 @@ async def test_delete_dataset( mock_search_engine: SearchEngine, owner: User, owner_auth_header: dict, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() await TextFieldFactory.create(dataset=dataset) @@ -4886,6 +4892,7 @@ async def test_delete_dataset( # ] mock_search_engine.delete_index.assert_called_once_with(dataset) + test_telemetry.track_crud_dataset.assert_called_once_with(action="delete", dataset=dataset) async def test_delete_published_dataset( self, async_client: "AsyncClient", db: "AsyncSession", owner: User, owner_auth_header: dict From 8428939961f840a462cc45d899d0a473faaa1b18 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Wed, 17 Jul 2024 15:03:43 +0200 Subject: [PATCH 31/63] Update tests coverage telemetry tracking --- .../api/handlers/v1/datasets/datasets.py | 18 +++--- .../api/handlers/v1/datasets/questions.py | 6 +- .../api/handlers/v1/datasets/records.py | 2 +- .../argilla_server/api/handlers/v1/records.py | 2 +- .../argilla_server/api/handlers/v1/users.py | 2 +- .../unit/api/handlers/v1/test_datasets.py | 57 +++++++++++++++++-- .../tests/unit/api/handlers/v1/test_fields.py | 19 ++++++- .../handlers/v1/test_list_dataset_records.py | 8 ++- .../handlers/v1/test_metadata_properties.py | 22 ++++++- .../unit/api/handlers/v1/test_questions.py | 14 ++++- .../unit/api/handlers/v1/test_records.py | 54 +++++++++++++++--- .../unit/api/handlers/v1/test_responses.py | 15 ++++- .../unit/api/handlers/v1/test_suggestions.py | 11 +++- .../tests/unit/api/handlers/v1/test_users.py | 6 +- .../api/handlers/v1/test_vectors_settings.py | 11 +++- .../unit/api/handlers/v1/test_workspaces.py | 14 ++++- 16 files changed, 218 insertions(+), 43 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index 36bd94af67..af33002713 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -105,13 +105,13 @@ async def list_dataset_fields( await authorize(current_user, DatasetPolicy.get(dataset)) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) - ) for field in dataset.fields: await telemetry_client.track_crud_dataset_setting( action="read", dataset=dataset, setting_name="fields", setting=field ) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) + ) return Fields(items=dataset.fields) @@ -128,13 +128,13 @@ async def list_dataset_vector_settings( await authorize(current_user, DatasetPolicy.get(dataset)) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) - ) for vectors_setting in dataset.vectors_settings: await telemetry_client.track_crud_dataset_setting( action="read", dataset=dataset, setting_name="vectors_settings", setting=vectors_setting ) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) + ) return VectorsSettings(items=dataset.vectors_settings) @@ -155,13 +155,13 @@ async def list_current_user_dataset_metadata_properties( current_user, dataset.metadata_properties ) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) - ) for metadata_property in filtered_metadata_properties: await telemetry_client.track_crud_dataset_setting( action="read", dataset=dataset, setting_name="metadata_properties", setting=metadata_property ) + await telemetry_client.track_crud_dataset_setting( + action="list", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) + ) return MetadataProperties(items=filtered_metadata_properties) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index 2150ecb367..90c56b894c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -42,13 +42,13 @@ async def list_dataset_questions( await authorize(current_user, DatasetPolicy.get(dataset)) - await telemetry_client.track_crud_dataset_setting( - action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) - ) for question in dataset.questions: await telemetry_client.track_crud_dataset_setting( action="read", dataset=dataset, setting_name="questions", setting=question ) + await telemetry_client.track_crud_dataset_setting( + action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) + ) return Questions(items=dataset.questions) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index 7ba5217510..e1f8651d8c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -524,7 +524,7 @@ async def delete_dataset_records( await datasets.delete_records(db, search_engine, dataset, record_ids) - await telemetry_client.track_crud_dataset(action="delete", dataset=dataset, count=len(record_ids)) + await telemetry_client.track_crud_records(action="delete", record_or_dataset=dataset, count=num_records) @router.post( diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index a195615cfe..44070b9505 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -196,7 +196,7 @@ async def upsert_suggestion( suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - await telemetry_client.track_crud_records_subtopic(action="update", sub_topic="suggestions", record_id=record_id) + await telemetry_client.track_crud_records_subtopic(action="create", sub_topic="suggestions", record_id=record_id) return suggestion diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 34cc2c68fa..4ffbeb4ad2 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -129,8 +129,8 @@ async def list_user_workspaces( else: workspaces = await accounts.list_workspaces_by_user_id(db, user_id) - await telemetry_client.track_crud_workspace(action="list", workspace=None, count=len(workspaces)) for workspace in workspaces: await telemetry_client.track_crud_workspace(action="read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="list", workspace=None, count=len(workspaces)) return Workspaces(items=workspaces) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 8723bb9223..9eccdd18f3 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -193,7 +193,9 @@ async def test_list_current_user_datasets_by_workspace_id( response_body = response.json() assert [dataset["name"] for dataset in response_body["items"]] == ["dataset-a"] - async def test_list_dataset_fields(self, async_client: "AsyncClient", owner_auth_header: dict): + async def test_list_dataset_fields( + self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create() text_field_a = await TextFieldFactory.create( name="text-field-a", title="Text Field A", required=True, dataset=dataset @@ -231,6 +233,10 @@ async def test_list_dataset_fields(self, async_client: "AsyncClient", owner_auth ], } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="list", dataset=dataset, setting_name="fields", count=len(response.json()["items"]) + ) + async def test_list_dataset_fields_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -286,7 +292,9 @@ async def test_list_dataset_fields_with_nonexistent_dataset_id( assert response.status_code == 404 assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} - async def test_list_dataset_questions(self, async_client: "AsyncClient", owner_auth_header: dict): + async def test_list_dataset_questions( + self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create() text_question = await TextQuestionFactory.create( name="text-question", @@ -348,6 +356,10 @@ async def test_list_dataset_questions(self, async_client: "AsyncClient", owner_a ] } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="list", dataset=dataset, setting_name="questions", count=len(response.json()["items"]) + ) + @pytest.mark.parametrize( "QuestionFactory, settings", [ @@ -607,7 +619,9 @@ async def test_list_current_user_dataset_metadata_properties_with_nonexistent_da assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_list_dataset_vectors_settings(self, async_client: "AsyncClient", role: UserRole): + async def test_list_dataset_vectors_settings( + self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create() vectors_settings = await VectorSettingsFactory.create_batch(size=3, dataset=dataset) user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -632,6 +646,10 @@ async def test_list_dataset_vectors_settings(self, async_client: "AsyncClient", ] } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="list", dataset=dataset, setting_name="vectors_settings", count=len(response.json()["items"]) + ) + @pytest.mark.parametrize("role", [UserRole.annotator, UserRole.admin]) async def test_list_dataset_vectors_settings_as_user_from_another_workspace( self, async_client: "AsyncClient", role: UserRole @@ -981,6 +999,7 @@ async def test_create_dataset_field( owner_auth_header: dict, settings: dict, expected_settings: dict, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() field_json = {"name": "name", "title": "title", "settings": settings} @@ -1005,6 +1024,10 @@ async def test_create_dataset_field( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_once_with( + action="create", setting_name="fields", dataset=dataset, setting=ANY + ) + async def test_create_dataset_field_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() field_json = { @@ -1227,6 +1250,7 @@ async def test_create_dataset_metadata_property( owner_auth_header: dict, settings: dict, expected_settings: dict, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() metadata_property_json = {"name": "name", "title": "title", "settings": settings} @@ -1251,6 +1275,10 @@ async def test_create_dataset_metadata_property( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_once_with( + action="create", setting_name="metadata_properties", dataset=dataset, setting=ANY + ) + async def test_create_dataset_metadata_property_with_dataset_ready( self, async_client: "AsyncClient", @@ -1526,6 +1554,7 @@ async def test_create_dataset_vector_settings( mock_search_engine: SearchEngine, role: UserRole, dataset_status: DatasetStatus, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create(status=dataset_status) user = await UserFactory.create(role=role, workspaces=[dataset.workspace]) @@ -1560,6 +1589,10 @@ async def test_create_dataset_vector_settings( else: mock_search_engine.configure_index_vectors.assert_called_once_with(vector_settings) + test_telemetry.track_crud_dataset_setting.assert_called_once_with( + action="create", setting_name="vectors_settings", dataset=dataset, setting=ANY + ) + @pytest.mark.parametrize( "payload", [ @@ -2933,7 +2966,11 @@ async def test_create_dataset_records_with_nonexistent_dataset_id( @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_update_dataset_records( - self, async_client: "AsyncClient", mock_search_engine: "SearchEngine", role: UserRole + self, + async_client: "AsyncClient", + mock_search_engine: "SearchEngine", + role: UserRole, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -3021,6 +3058,8 @@ async def test_update_dataset_records( # it should be called only with the first three records (metadata was updated for them) mock_search_engine.index_records.assert_called_once_with(dataset, records[:3]) + test_telemetry.track_crud_records.assert_called_once_with(action="update", record_or_dataset=dataset, count=4) + async def test_update_dataset_records_with_suggestions( self, async_client: "AsyncClient", mock_search_engine: "SearchEngine", owner_auth_header: dict ): @@ -3519,7 +3558,12 @@ async def test_update_dataset_records_without_authentication(self, async_client: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_delete_dataset_records( - self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, role: UserRole + self, + async_client: "AsyncClient", + db: "AsyncSession", + mock_search_engine: SearchEngine, + role: UserRole, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -3541,6 +3585,9 @@ async def test_delete_dataset_records( # `delete_records` is called with the records returned by the delete statement, which are different ORM objects # than the ones created by the factory mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=ANY) + test_telemetry.track_crud_records.assert_called_once_with( + action="delete", record_or_dataset=dataset, count=len(records_ids) + len(random_uuids) + ) async def test_delete_dataset_records_with_no_ids(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_fields.py b/argilla-server/tests/unit/api/handlers/v1/test_fields.py index 17713ade47..3a8e384d94 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_fields.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_fields.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from datetime import datetime from typing import TYPE_CHECKING +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -51,7 +51,12 @@ @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) @pytest.mark.asyncio async def test_update_field( - async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, expected_settings: dict + async_client: "AsyncClient", + db: "AsyncSession", + role: UserRole, + payload: dict, + expected_settings: dict, + test_telemetry: MagicMock, ): field = await TextFieldFactory.create() user = await UserFactory.create(role=role, workspaces=[field.dataset.workspace]) @@ -77,6 +82,9 @@ async def test_update_field( field = await db.get(Field, field.id) assert field.title == title assert field.settings == expected_settings + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="update", dataset=field.dataset, setting_name="fields", setting=field + ) @pytest.mark.parametrize("title", [None, "", "t" * (FIELD_CREATE_TITLE_MAX_LENGTH + 1)]) @@ -180,7 +188,9 @@ async def test_update_field_without_authentication(async_client: "AsyncClient"): @pytest.mark.asyncio -async def test_delete_field(async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict): +async def test_delete_field( + async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict, test_telemetry: MagicMock +): field = await TextFieldFactory.create(name="name", title="title") response = await async_client.delete(f"/api/v1/fields/{field.id}", headers=owner_auth_header) @@ -199,6 +209,9 @@ async def test_delete_field(async_client: "AsyncClient", db: "AsyncSession", own "inserted_at": datetime.fromisoformat(response_body["inserted_at"]).isoformat(), "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="delete", dataset=field.dataset, setting_name="fields", setting=field + ) @pytest.mark.asyncio diff --git a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py index f088cfcda9..7cb55406c5 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import List, Optional, Tuple, Type, Union +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -55,7 +56,9 @@ @pytest.mark.asyncio class TestSuiteListDatasetRecords: @pytest.mark.skip(reason="Factory integration with search engine") - async def test_list_dataset_records(self, async_client: "AsyncClient", owner_auth_header: dict): + async def test_list_dataset_records( + self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create() record_a = await RecordFactory.create(fields={"record_a": "value_a"}, dataset=dataset) record_b = await RecordFactory.create( @@ -98,6 +101,9 @@ async def test_list_dataset_records(self, async_client: "AsyncClient", owner_aut }, ], } + test_telemetry.track_crud_records.assert_called_with( + action="list", record_or_dataset=field.dataset, count=response.json()["total"] + ) @pytest.mark.parametrize( "includes", diff --git a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py index 9c77ac6765..8d6c7b4f42 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING, Type +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -160,7 +161,9 @@ async def test_get_metadata_property_metrics_as_restricted_user_role_from_differ @pytest.mark.asyncio -async def test_update_metadata_property(async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict): +async def test_update_metadata_property( + async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict, test_telemetry: MagicMock +): metadata_property = await IntegerMetadataPropertyFactory.create( name="name", title="title", allowed_roles=[UserRole.admin, UserRole.annotator] ) @@ -190,6 +193,13 @@ async def test_update_metadata_property(async_client: "AsyncClient", db: "AsyncS assert metadata_property.visible_for_annotators == False assert metadata_property.allowed_roles == [UserRole.admin] + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="update", + setting_name="metadata_properties", + dataset=metadata_property.dataset, + setting=metadata_property, + ) + @pytest.mark.asyncio async def test_update_metadata_property_without_authentication(async_client: "AsyncClient", db: "AsyncSession"): @@ -389,7 +399,9 @@ async def test_update_metadata_property_with_nonexistent_metadata_property_id( @pytest.mark.parametrize("user_role", [UserRole.owner, UserRole.admin]) @pytest.mark.asyncio -async def test_delete_metadata_property(async_client: "AsyncClient", db: "AsyncSession", user_role: UserRole): +async def test_delete_metadata_property( + async_client: "AsyncClient", db: "AsyncSession", user_role: UserRole, test_telemetry: MagicMock +): metadata_property = await IntegerMetadataPropertyFactory.create(name="name", title="title") user = await UserFactory.create(role=user_role, workspaces=[metadata_property.dataset.workspace]) @@ -412,6 +424,12 @@ async def test_delete_metadata_property(async_client: "AsyncClient", db: "AsyncS } assert (await db.execute(select(func.count(MetadataProperty.id)))).scalar() == 0 + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="delete", + setting_name="metadata_properties", + dataset=metadata_property.dataset, + setting=metadata_property, + ) @pytest.mark.asyncio diff --git a/argilla-server/tests/unit/api/handlers/v1/test_questions.py b/argilla-server/tests/unit/api/handlers/v1/test_questions.py index d4baacb5d7..3d739b8cd3 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_questions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_questions.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING, Type +from unittest.mock import ANY, MagicMock from uuid import uuid4 import pytest @@ -248,6 +249,7 @@ async def test_update_question( payload: dict, expected_settings: dict, role: UserRole, + test_telemetry: MagicMock, ): question = await QuestionFactory.create() user = await UserFactory.create(role=role, workspaces=[question.dataset.workspace]) @@ -278,6 +280,10 @@ async def test_update_question( assert question.description == description assert question.settings == expected_settings + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="update", dataset=question.dataset, setting_name="questions", setting=ANY + ) + @pytest.mark.parametrize("title", [None, "", "t" * (QUESTION_CREATE_TITLE_MAX_LENGTH + 1)]) @pytest.mark.asyncio @@ -480,7 +486,9 @@ async def test_update_question_as_annotator(async_client: "AsyncClient"): @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) @pytest.mark.asyncio -async def test_delete_question(async_client: "AsyncClient", db: "AsyncSession", role: UserRole): +async def test_delete_question( + async_client: "AsyncClient", db: "AsyncSession", role: UserRole, test_telemetry: MagicMock +): question = await TextQuestionFactory.create(name="name", title="title", description="description") user = await UserFactory.create(role=role, workspaces=[question.dataset.workspace]) @@ -504,6 +512,10 @@ async def test_delete_question(async_client: "AsyncClient", db: "AsyncSession", "updated_at": question.updated_at.isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="delete", dataset=question.dataset, setting_name="questions", setting=ANY + ) + @pytest.mark.asyncio async def test_delete_question_as_admin_from_different_workspace(async_client: "AsyncClient", db: "AsyncSession"): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_records.py b/argilla-server/tests/unit/api/handlers/v1/test_records.py index ed7d9f8cc2..81b1c0370e 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_records.py @@ -14,7 +14,7 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Awaitable, Callable, Type -from unittest.mock import call +from unittest.mock import MagicMock, call from uuid import UUID, uuid4 import pytest @@ -82,7 +82,7 @@ async def create_ranking_question(dataset: "Dataset") -> None: @pytest.mark.asyncio class TestSuiteRecords: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin, UserRole.annotator]) - async def test_get_record(self, async_client: "AsyncClient", role: UserRole): + async def test_get_record(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): dataset = await DatasetFactory.create() record = await RecordFactory.create(dataset=dataset) user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -103,6 +103,8 @@ async def test_get_record(self, async_client: "AsyncClient", role: UserRole): "updated_at": record.updated_at.isoformat(), } + test_telemetry.track_crud_records.assert_called_with(action="read", record_or_dataset=record) + async def test_get_record_without_authentication(self, async_client: "AsyncClient"): record = await RecordFactory.create() @@ -136,7 +138,9 @@ async def test_get_record_with_nonexistent_record_id(self, async_client: "AsyncC assert response.json() == {"detail": f"Record with id `{record_id}` not found"} @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_update_record(self, async_client: "AsyncClient", mock_search_engine: SearchEngine, role: UserRole): + async def test_update_record( + self, async_client: "AsyncClient", mock_search_engine: SearchEngine, role: UserRole, test_telemetry: MagicMock + ): dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) question_0 = await TextQuestionFactory.create(dataset=dataset) @@ -229,6 +233,7 @@ async def test_update_record(self, async_client: "AsyncClient", mock_search_engi "updated_at": record.updated_at.isoformat(), } mock_search_engine.index_records.assert_called_once_with(dataset, [record]) + test_telemetry.track_crud_records.assert_called_with(action="update", record_or_dataset=record) async def test_update_record_with_null_metadata( self, async_client: "AsyncClient", mock_search_engine: SearchEngine, owner_auth_header: dict @@ -979,7 +984,13 @@ async def test_create_record_response_without_authentication(self, async_client: @pytest.mark.parametrize("status", ["submitted", "discarded", "draft"]) async def test_create_record_response( - self, async_client: "AsyncClient", db: "AsyncSession", owner: User, owner_auth_header: dict, status: str + self, + async_client: "AsyncClient", + db: "AsyncSession", + owner: User, + owner_auth_header: dict, + status: str, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() await TextQuestionFactory.create(name="input_ok", dataset=dataset) @@ -1022,6 +1033,10 @@ async def test_create_record_response( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="create", sub_topic="responses", record_id=record.id + ) + @pytest.mark.parametrize( "status, expected_status_code, expected_response_count", [("submitted", 422, 0), ("discarded", 201, 1), ("draft", 201, 1)], @@ -1221,7 +1236,7 @@ async def test_create_record_response_with_nonexistent_record_id( assert (await db.execute(select(func.count(Response.id)))).scalar() == 0 @pytest.mark.parametrize("role", [UserRole.annotator, UserRole.admin, UserRole.owner]) - async def test_get_record_suggestions(self, async_client: "AsyncClient", role: UserRole): + async def test_get_record_suggestions(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): dataset = await DatasetFactory.create() user = await UserFactory.create(role=role, workspaces=[dataset.workspace]) record = await RecordFactory.create(dataset=dataset) @@ -1263,6 +1278,9 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U }, ] } + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="read", sub_topic="suggestions", record_id=record.id, count=len(response.json()["items"]) + ) @pytest.mark.parametrize( "payload", @@ -1283,7 +1301,7 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U ) @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_create_record_suggestion( - self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict + self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, test_telemetry: MagicMock ): dataset = await DatasetFactory.create() question = await TextQuestionFactory.create(dataset=dataset) @@ -1312,6 +1330,9 @@ async def test_create_record_suggestion( } assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 1 + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="create", sub_topic="suggestions", record_id=record.id + ) async def test_create_record_suggestion_update( self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, owner_auth_header: dict @@ -1400,7 +1421,12 @@ async def test_create_record_suggestion_as_annotator(self, async_client: "AsyncC @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_delete_record( - self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: "SearchEngine", role: UserRole + self, + async_client: "AsyncClient", + db: "AsyncSession", + mock_search_engine: "SearchEngine", + role: UserRole, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() record = await RecordFactory.create(dataset=dataset) @@ -1422,6 +1448,7 @@ async def test_delete_record( } assert (await db.execute(select(func.count(Record.id)))).scalar() == 0 mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=[record]) + test_telemetry.track_crud_records.assert_called_with(action="delete", record_or_dataset=record) async def test_delete_record_as_admin_from_another_workspace(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() @@ -1458,7 +1485,12 @@ async def test_delete_record_non_existent(self, async_client: "AsyncClient", own @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_delete_record_suggestions( - self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, role: UserRole + self, + async_client: "AsyncClient", + db: "AsyncSession", + mock_search_engine: SearchEngine, + role: UserRole, + test_telemetry: MagicMock, ) -> None: dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -1481,6 +1513,12 @@ async def test_delete_record_suggestions( expected_calls = [call(suggestion) for suggestion in suggestions] mock_search_engine.delete_record_suggestion.assert_has_calls(expected_calls) + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="delete", + sub_topic="suggestions", + record_id=record.id, + count=len(suggestions_ids) + len(random_uuids), + ) async def test_delete_record_suggestions_with_no_ids( self, async_client: "AsyncClient", owner_auth_header: dict diff --git a/argilla-server/tests/unit/api/handlers/v1/test_responses.py b/argilla-server/tests/unit/api/handlers/v1/test_responses.py index ceddf5201d..b2a9f56baf 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_responses.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_responses.py @@ -14,6 +14,7 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Type +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -71,6 +72,7 @@ async def test_update_response( mock_search_engine: SearchEngine, owner_auth_header: dict, response_json: dict, + test_telemetry: MagicMock, ): dataset = await DatasetFactory.create(status=DatasetStatus.ready) await TextQuestionFactory.create(name="input_ok", dataset=dataset, required=True) @@ -106,6 +108,9 @@ async def test_update_response( assert dataset.updated_at == dataset_previous_updated_at mock_search_engine.update_record_response.assert_called_once_with(response) + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="update", sub_topic="responses", record_id=record.id + ) async def test_update_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create( @@ -408,7 +413,12 @@ async def test_update_response_with_nonexistent_response_id( } async def test_delete_response( - self, async_client: "AsyncClient", mock_search_engine: SearchEngine, db: "AsyncSession", owner_auth_header: dict + self, + async_client: "AsyncClient", + mock_search_engine: SearchEngine, + db: "AsyncSession", + owner_auth_header: dict, + test_telemetry: MagicMock, ): response = await ResponseFactory.create() dataset = response.record.dataset @@ -425,6 +435,9 @@ async def test_delete_response( assert dataset.updated_at == dataset_previous_updated_at mock_search_engine.delete_record_response.assert_called_once_with(response) + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="delete", sub_topic="responses", record_id=response.record.id + ) async def test_delete_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py index 5f83800df0..f40f44f429 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py @@ -14,6 +14,7 @@ from datetime import datetime from typing import TYPE_CHECKING +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -33,7 +34,12 @@ class TestSuiteSuggestions: @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_delete_suggestion( - self, async_client: "AsyncClient", mock_search_engine: SearchEngine, db: "AsyncSession", role: UserRole + self, + async_client: "AsyncClient", + mock_search_engine: SearchEngine, + db: "AsyncSession", + role: UserRole, + test_telemetry: MagicMock, ) -> None: suggestion = await SuggestionFactory.create() user = await UserFactory.create(role=role, workspaces=[suggestion.record.dataset.workspace]) @@ -60,6 +66,9 @@ async def test_delete_suggestion( assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 0 mock_search_engine.delete_record_suggestion.assert_called_once_with(suggestion) + test_telemetry.track_crud_records_subtopic.assert_called_with( + action="delete", sub_topic="suggestions", record_id=suggestion.record.id + ) async def test_delete_suggestion_non_existent(self, async_client: "AsyncClient", owner_auth_header: dict) -> None: suggestion_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_users.py b/argilla-server/tests/unit/api/handlers/v1/test_users.py index 03851897d5..3975d122f6 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_users.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_users.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -27,7 +28,9 @@ @pytest.mark.asyncio class TestsUsersV1Endpoints: - async def test_list_user_workspaces(self, async_client: "AsyncClient", owner_auth_header: dict): + async def test_list_user_workspaces( + self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock + ): workspaces = await WorkspaceFactory.create_batch(3) user = await UserFactory.create(workspaces=workspaces) @@ -45,6 +48,7 @@ async def test_list_user_workspaces(self, async_client: "AsyncClient", owner_aut for workspace in workspaces ] } + test_telemetry.track_crud_workspace.assert_called_with(action="list", workspace=None, count=len(workspaces)) async def test_list_user_workspaces_for_owner(self, async_client: "AsyncClient"): workspaces = await WorkspaceFactory.create_batch(5) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py index fb58a34e67..60b8479b4a 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -29,7 +30,7 @@ @pytest.mark.asyncio class TestSuiteVectorsSettings: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_update_vector_settings(self, async_client: "AsyncClient", role: UserRole): + async def test_update_vector_settings(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): vector_settings = await VectorSettingsFactory.create() user = await UserFactory.create(role=role, workspaces=[vector_settings.dataset.workspace]) @@ -51,6 +52,9 @@ async def test_update_vector_settings(self, async_client: "AsyncClient", role: U } assert vector_settings.title == "New Title" + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="update", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings + ) @pytest.mark.parametrize("title", [None, "", "t" * (VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH + 1)]) async def test_update_vector_settings_with_invalid_title( @@ -118,7 +122,7 @@ async def test_update_vector_settings_as_annotator(self, async_client: "AsyncCli assert response.status_code == 403 @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_delete_vector_settings(self, async_client: "AsyncClient", role: UserRole): + async def test_delete_vector_settings(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): vector_settings = await VectorSettingsFactory.create() user = await UserFactory.create(role=role, workspaces=[vector_settings.dataset.workspace]) @@ -136,6 +140,9 @@ async def test_delete_vector_settings(self, async_client: "AsyncClient", role: U "inserted_at": vector_settings.inserted_at.isoformat(), "updated_at": vector_settings.updated_at.isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_with( + action="delete", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings + ) async def test_delete_vector_settings_non_existing(self, async_client: "AsyncClient", owner_auth_header: dict): vector_settings_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py index 9696326ca1..76bbe76006 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -23,7 +24,7 @@ @pytest.mark.asyncio class TestSuiteWorkspaces: - async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: dict): + async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: dict, test_telemetry: MagicMock): workspace = await WorkspaceFactory.create(name="workspace") response = await async_client.get(f"/api/v1/workspaces/{workspace.id}", headers=owner_auth_header) @@ -35,6 +36,7 @@ async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: "inserted_at": workspace.inserted_at.isoformat(), "updated_at": workspace.updated_at.isoformat(), } + test_telemetry.track_crud_workspace(action="read", workspace=workspace) async def test_get_workspace_without_authentication(self, async_client: AsyncClient): workspace = await WorkspaceFactory.create() @@ -80,7 +82,9 @@ async def test_get_workspace_with_nonexistent_workspace_id( assert response.status_code == 404 assert response.json() == {"detail": f"Workspace with id `{workspace_id}` not found"} - async def test_delete_workspace(self, async_client: AsyncClient, owner_auth_header: dict): + async def test_delete_workspace( + self, async_client: AsyncClient, owner_auth_header: dict, test_telemetry: MagicMock + ): workspace = await WorkspaceFactory.create(name="workspace_delete") other_workspace = await WorkspaceFactory.create() @@ -89,6 +93,7 @@ async def test_delete_workspace(self, async_client: AsyncClient, owner_auth_head response = await async_client.delete(f"/api/v1/workspaces/{workspace.id}", headers=owner_auth_header) assert response.status_code == 200 + test_telemetry.track_crud_workspace(action="delete", workspace=workspace) async def test_delete_workspace_with_feedback_datasets(self, async_client: AsyncClient, owner_auth_header: dict): workspace = await WorkspaceFactory.create(name="workspace_delete") @@ -124,7 +129,9 @@ async def test_delete_workspace_without_permissions(self, async_client: AsyncCli assert response.status_code == 403 @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin, UserRole.annotator]) - async def test_list_workspaces_me(self, async_client: AsyncClient, role: UserRole) -> None: + async def test_list_workspaces_me( + self, async_client: AsyncClient, role: UserRole, test_telemetry: MagicMock + ) -> None: workspaces = await WorkspaceFactory.create_batch(size=5) user = await UserFactory.create(role=role, workspaces=workspaces if role != UserRole.owner else []) @@ -139,6 +146,7 @@ async def test_list_workspaces_me(self, async_client: AsyncClient, role: UserRol "inserted_at": workspace.inserted_at.isoformat(), "updated_at": workspace.updated_at.isoformat(), } in response.json()["items"] + test_telemetry.track_crud_workspace(action="list", workspace=None, count=list(workspaces)) async def test_list_workspaces_me_without_authentication(self, async_client: AsyncClient) -> None: response = await async_client.get("/api/v1/me/workspaces") From 18c5d0f1eb229210c5ed7c7423df88715a9cca91 Mon Sep 17 00:00:00 2001 From: David Berenstein Date: Wed, 17 Jul 2024 15:27:35 +0200 Subject: [PATCH 32/63] Update argilla-server/src/argilla_server/settings.py --- argilla-server/src/argilla_server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index 1b9016edfe..6b0ffcf17e 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -162,7 +162,7 @@ class Settings(BaseSettings): ) # See also the telemetry.py module - @validator("database_url", pre=True, always=True) + @validator("enable_telemetry", pre=True, always=True) def set_enable_telemetry(cls, enable_telemetry: bool) -> bool: if os.getenv("HF_HUB_DISABLE_TELEMETRY") == "1" or os.getenv("HF_HUB_OFFLINE") == "1": enable_telemetry = False From 0230bfa3108d4a4ea57c3e6399d90a3c944bfeac Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 18 Jul 2024 09:10:07 +0200 Subject: [PATCH 33/63] Remove Python version from sytem info --- argilla-server/src/argilla_server/telemetry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index aa682cad58..9d3b5ddc59 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -63,7 +63,6 @@ def __post_init__(self, enable_telemetry: bool): "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), - "python_version": platform.python_version(), "sys_version": platform.version(), "deployment": server_deployment_type(), "docker": is_running_on_docker_container(), From cbacf353bf2e64f3b302c2f6d508cde77fc83ff1 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 18 Jul 2024 15:23:33 +0200 Subject: [PATCH 34/63] Update tests to also check assert call track_data --- .../src/argilla_server/telemetry.py | 42 +++++++++---------- .../unit/api/handlers/v1/test_datasets.py | 16 +++++++ .../tests/unit/api/handlers/v1/test_fields.py | 4 ++ .../handlers/v1/test_list_dataset_records.py | 2 + .../handlers/v1/test_metadata_properties.py | 3 ++ .../unit/api/handlers/v1/test_questions.py | 2 + .../unit/api/handlers/v1/test_records.py | 12 ++++++ .../unit/api/handlers/v1/test_responses.py | 4 ++ .../unit/api/handlers/v1/test_suggestions.py | 2 + .../tests/unit/api/handlers/v1/test_users.py | 2 + .../api/handlers/v1/test_vectors_settings.py | 4 ++ .../unit/api/handlers/v1/test_workspaces.py | 9 ++-- .../tests/unit/commons/test_telemetry.py | 5 ++- argilla-server/tests/unit/conftest.py | 24 ++++++----- .../tests/unit/errors/test_api_errors.py | 1 + 15 files changed, 94 insertions(+), 38 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 9d3b5ddc59..7924e561d9 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -100,14 +100,19 @@ def _process_record_model(record: Record): @staticmethod def _process_dataset_settings(dataset: Dataset): - return { - "count_fields": len(dataset.fields), - "count_questions": len(dataset.questions), - "count_vector_settings": len(dataset.vectors_settings), - "count_metadata_properties": len(dataset.metadata_properties), - "allow_extra_metadata": dataset.allow_extra_metadata, - "guidelines": True if dataset.guidelines else False, - } + attributes = [ + "fields", + "questions", + "vectors_settings", + "metadata_properties", + "allow_extra_metadata", + "guidelines", + ] + user_data = {} + for attr in attributes: + if dataset.is_relationship_loaded(attr): + user_data[attr] = getattr(dataset, attr) + return user_data @staticmethod def _process_dataset_setting_settings( @@ -196,21 +201,14 @@ async def track_crud_dataset( user_agent.update(self._process_dataset_settings(dataset=dataset)) await self.track_data(topic=topic, user_agent=user_agent, count=count) + attributes: list[str] = ["fields", "questions", "vectors_settings", "metadata_properties"] if dataset: - for field in dataset.fields: - self.track_crud_dataset_setting(action=action, setting_name="fields", dataset=dataset, setting=field) - for question in dataset.questions: - self.track_crud_dataset_setting( - action=action, setting_name="questions", dataset=dataset, setting=question - ) - for vector in dataset.vectors_settings: - self.track_crud_dataset_setting( - action=action, setting_name="vectors_settings", dataset=dataset, setting=vector - ) - for meta_data in dataset.metadata_properties: - self.track_crud_dataset_setting( - action=action, setting_name="metadata_properties", dataset=dataset, setting=meta_data - ) + for attr in attributes: + if dataset.is_relationship_loaded(attr): + for obtained_attr in getattr(dataset, attr): + self.track_crud_dataset_setting( + action=action, setting_name=attr, dataset=dataset, setting=obtained_attr + ) async def track_crud_dataset_setting( self, diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 9eccdd18f3..442f01af2d 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -236,6 +236,7 @@ async def test_list_dataset_fields( test_telemetry.track_crud_dataset_setting.assert_called_with( action="list", dataset=dataset, setting_name="fields", count=len(response.json()["items"]) ) + test_telemetry.track_data.assert_called() async def test_list_dataset_fields_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -359,6 +360,7 @@ async def test_list_dataset_questions( test_telemetry.track_crud_dataset_setting.assert_called_with( action="list", dataset=dataset, setting_name="questions", count=len(response.json()["items"]) ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "QuestionFactory, settings", @@ -649,6 +651,7 @@ async def test_list_dataset_vectors_settings( test_telemetry.track_crud_dataset_setting.assert_called_with( action="list", dataset=dataset, setting_name="vectors_settings", count=len(response.json()["items"]) ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize("role", [UserRole.annotator, UserRole.admin]) async def test_list_dataset_vectors_settings_as_user_from_another_workspace( @@ -691,6 +694,7 @@ async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: } test_telemetry.track_crud_dataset.assert_called_once_with(action="read", dataset=dataset) + test_telemetry.track_data.assert_called() async def test_get_dataset_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -1027,6 +1031,7 @@ async def test_create_dataset_field( test_telemetry.track_crud_dataset_setting.assert_called_once_with( action="create", setting_name="fields", dataset=dataset, setting=ANY ) + test_telemetry.track_data.assert_called() async def test_create_dataset_field_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() @@ -1278,6 +1283,7 @@ async def test_create_dataset_metadata_property( test_telemetry.track_crud_dataset_setting.assert_called_once_with( action="create", setting_name="metadata_properties", dataset=dataset, setting=ANY ) + test_telemetry.track_data.assert_called() async def test_create_dataset_metadata_property_with_dataset_ready( self, @@ -1592,6 +1598,7 @@ async def test_create_dataset_vector_settings( test_telemetry.track_crud_dataset_setting.assert_called_once_with( action="create", setting_name="vectors_settings", dataset=dataset, setting=ANY ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "payload", @@ -1812,6 +1819,7 @@ async def test_create_dataset_records( test_telemetry.track_crud_records.assert_called_once_with( action="create", record_or_dataset=dataset, count=len(records) ) + test_telemetry.track_data.assert_called() async def test_create_dataset_records_with_response_for_multiple_users( self, @@ -2593,6 +2601,7 @@ async def test_create_dataset_records_as_admin( test_telemetry.track_crud_records.assert_called_once_with( action="create", record_or_dataset=dataset, count=len(records) ) + test_telemetry.track_data.assert_called() async def test_create_dataset_records_as_annotator(self, async_client: "AsyncClient", db: "AsyncSession"): annotator = await AnnotatorFactory.create() @@ -3059,6 +3068,7 @@ async def test_update_dataset_records( mock_search_engine.index_records.assert_called_once_with(dataset, records[:3]) test_telemetry.track_crud_records.assert_called_once_with(action="update", record_or_dataset=dataset, count=4) + test_telemetry.track_data.assert_called() async def test_update_dataset_records_with_suggestions( self, async_client: "AsyncClient", mock_search_engine: "SearchEngine", owner_auth_header: dict @@ -3585,9 +3595,11 @@ async def test_delete_dataset_records( # `delete_records` is called with the records returned by the delete statement, which are different ORM objects # than the ones created by the factory mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=ANY) + test_telemetry.track_crud_records.assert_called_once_with( action="delete", record_or_dataset=dataset, count=len(records_ids) + len(random_uuids) ) + test_telemetry.track_data.assert_called() async def test_delete_dataset_records_with_no_ids(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() @@ -4812,7 +4824,9 @@ async def test_update_dataset( assert dataset.name == name assert dataset.guidelines == guidelines assert dataset.allow_extra_metadata is allow_extra_metadata + test_telemetry.track_crud_dataset.assert_called_once_with(action="update", dataset=dataset) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "dataset_json", @@ -4939,7 +4953,9 @@ async def test_delete_dataset( # ] mock_search_engine.delete_index.assert_called_once_with(dataset) + test_telemetry.track_crud_dataset.assert_called_once_with(action="delete", dataset=dataset) + test_telemetry.track_data.assert_called() async def test_delete_published_dataset( self, async_client: "AsyncClient", db: "AsyncSession", owner: User, owner_auth_header: dict diff --git a/argilla-server/tests/unit/api/handlers/v1/test_fields.py b/argilla-server/tests/unit/api/handlers/v1/test_fields.py index 3a8e384d94..98a94f640f 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_fields.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_fields.py @@ -82,9 +82,11 @@ async def test_update_field( field = await db.get(Field, field.id) assert field.title == title assert field.settings == expected_settings + test_telemetry.track_crud_dataset_setting.assert_called_with( action="update", dataset=field.dataset, setting_name="fields", setting=field ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize("title", [None, "", "t" * (FIELD_CREATE_TITLE_MAX_LENGTH + 1)]) @@ -209,9 +211,11 @@ async def test_delete_field( "inserted_at": datetime.fromisoformat(response_body["inserted_at"]).isoformat(), "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_with( action="delete", dataset=field.dataset, setting_name="fields", setting=field ) + test_telemetry.track_data.assert_called() @pytest.mark.asyncio diff --git a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py index 7cb55406c5..56f1786dc4 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py @@ -101,9 +101,11 @@ async def test_list_dataset_records( }, ], } + test_telemetry.track_crud_records.assert_called_with( action="list", record_or_dataset=field.dataset, count=response.json()["total"] ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "includes", diff --git a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py index 8d6c7b4f42..7beb4f4485 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py @@ -199,6 +199,7 @@ async def test_update_metadata_property( dataset=metadata_property.dataset, setting=metadata_property, ) + test_telemetry.track_data.assert_called() @pytest.mark.asyncio @@ -424,12 +425,14 @@ async def test_delete_metadata_property( } assert (await db.execute(select(func.count(MetadataProperty.id)))).scalar() == 0 + test_telemetry.track_crud_dataset_setting.assert_called_with( action="delete", setting_name="metadata_properties", dataset=metadata_property.dataset, setting=metadata_property, ) + test_telemetry.track_data.assert_called() @pytest.mark.asyncio diff --git a/argilla-server/tests/unit/api/handlers/v1/test_questions.py b/argilla-server/tests/unit/api/handlers/v1/test_questions.py index 3d739b8cd3..879a920215 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_questions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_questions.py @@ -283,6 +283,7 @@ async def test_update_question( test_telemetry.track_crud_dataset_setting.assert_called_with( action="update", dataset=question.dataset, setting_name="questions", setting=ANY ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize("title", [None, "", "t" * (QUESTION_CREATE_TITLE_MAX_LENGTH + 1)]) @@ -515,6 +516,7 @@ async def test_delete_question( test_telemetry.track_crud_dataset_setting.assert_called_with( action="delete", dataset=question.dataset, setting_name="questions", setting=ANY ) + test_telemetry.track_data.assert_called() @pytest.mark.asyncio diff --git a/argilla-server/tests/unit/api/handlers/v1/test_records.py b/argilla-server/tests/unit/api/handlers/v1/test_records.py index 81b1c0370e..ee75fe7f01 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_records.py @@ -104,6 +104,7 @@ async def test_get_record(self, async_client: "AsyncClient", role: UserRole, tes } test_telemetry.track_crud_records.assert_called_with(action="read", record_or_dataset=record) + test_telemetry.track_data.assert_called() async def test_get_record_without_authentication(self, async_client: "AsyncClient"): record = await RecordFactory.create() @@ -233,7 +234,9 @@ async def test_update_record( "updated_at": record.updated_at.isoformat(), } mock_search_engine.index_records.assert_called_once_with(dataset, [record]) + test_telemetry.track_crud_records.assert_called_with(action="update", record_or_dataset=record) + test_telemetry.track_data.assert_called() async def test_update_record_with_null_metadata( self, async_client: "AsyncClient", mock_search_engine: SearchEngine, owner_auth_header: dict @@ -1036,6 +1039,7 @@ async def test_create_record_response( test_telemetry.track_crud_records_subtopic.assert_called_with( action="create", sub_topic="responses", record_id=record.id ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "status, expected_status_code, expected_response_count", @@ -1278,9 +1282,11 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U }, ] } + test_telemetry.track_crud_records_subtopic.assert_called_with( action="read", sub_topic="suggestions", record_id=record.id, count=len(response.json()["items"]) ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize( "payload", @@ -1330,9 +1336,11 @@ async def test_create_record_suggestion( } assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 1 + test_telemetry.track_crud_records_subtopic.assert_called_with( action="create", sub_topic="suggestions", record_id=record.id ) + test_telemetry.track_data.assert_called() async def test_create_record_suggestion_update( self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, owner_auth_header: dict @@ -1448,7 +1456,9 @@ async def test_delete_record( } assert (await db.execute(select(func.count(Record.id)))).scalar() == 0 mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=[record]) + test_telemetry.track_crud_records.assert_called_with(action="delete", record_or_dataset=record) + test_telemetry.track_data.assert_called() async def test_delete_record_as_admin_from_another_workspace(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() @@ -1513,12 +1523,14 @@ async def test_delete_record_suggestions( expected_calls = [call(suggestion) for suggestion in suggestions] mock_search_engine.delete_record_suggestion.assert_has_calls(expected_calls) + test_telemetry.track_crud_records_subtopic.assert_called_with( action="delete", sub_topic="suggestions", record_id=record.id, count=len(suggestions_ids) + len(random_uuids), ) + test_telemetry.track_data.assert_called() async def test_delete_record_suggestions_with_no_ids( self, async_client: "AsyncClient", owner_auth_header: dict diff --git a/argilla-server/tests/unit/api/handlers/v1/test_responses.py b/argilla-server/tests/unit/api/handlers/v1/test_responses.py index b2a9f56baf..7295e87712 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_responses.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_responses.py @@ -108,9 +108,11 @@ async def test_update_response( assert dataset.updated_at == dataset_previous_updated_at mock_search_engine.update_record_response.assert_called_once_with(response) + test_telemetry.track_crud_records_subtopic.assert_called_with( action="update", sub_topic="responses", record_id=record.id ) + test_telemetry.track_data.assert_called() async def test_update_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create( @@ -435,9 +437,11 @@ async def test_delete_response( assert dataset.updated_at == dataset_previous_updated_at mock_search_engine.delete_record_response.assert_called_once_with(response) + test_telemetry.track_crud_records_subtopic.assert_called_with( action="delete", sub_topic="responses", record_id=response.record.id ) + test_telemetry.track_data.assert_called() async def test_delete_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py index f40f44f429..2507dd609b 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py @@ -66,9 +66,11 @@ async def test_delete_suggestion( assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 0 mock_search_engine.delete_record_suggestion.assert_called_once_with(suggestion) + test_telemetry.track_crud_records_subtopic.assert_called_with( action="delete", sub_topic="suggestions", record_id=suggestion.record.id ) + test_telemetry.track_data.assert_called() async def test_delete_suggestion_non_existent(self, async_client: "AsyncClient", owner_auth_header: dict) -> None: suggestion_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_users.py b/argilla-server/tests/unit/api/handlers/v1/test_users.py index 3975d122f6..15ad3b6b40 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_users.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_users.py @@ -48,7 +48,9 @@ async def test_list_user_workspaces( for workspace in workspaces ] } + test_telemetry.track_crud_workspace.assert_called_with(action="list", workspace=None, count=len(workspaces)) + test_telemetry.track_data.assert_called() async def test_list_user_workspaces_for_owner(self, async_client: "AsyncClient"): workspaces = await WorkspaceFactory.create_batch(5) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py index 60b8479b4a..df448d10f3 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py @@ -52,9 +52,11 @@ async def test_update_vector_settings(self, async_client: "AsyncClient", role: U } assert vector_settings.title == "New Title" + test_telemetry.track_crud_dataset_setting.assert_called_with( action="update", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings ) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize("title", [None, "", "t" * (VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH + 1)]) async def test_update_vector_settings_with_invalid_title( @@ -140,9 +142,11 @@ async def test_delete_vector_settings(self, async_client: "AsyncClient", role: U "inserted_at": vector_settings.inserted_at.isoformat(), "updated_at": vector_settings.updated_at.isoformat(), } + test_telemetry.track_crud_dataset_setting.assert_called_with( action="delete", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings ) + test_telemetry.track_data.assert_called() async def test_delete_vector_settings_non_existing(self, async_client: "AsyncClient", owner_auth_header: dict): vector_settings_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py index 76bbe76006..4b8c39b78b 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py @@ -36,7 +36,9 @@ async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: "inserted_at": workspace.inserted_at.isoformat(), "updated_at": workspace.updated_at.isoformat(), } + test_telemetry.track_crud_workspace(action="read", workspace=workspace) + test_telemetry.track_data.assert_called() async def test_get_workspace_without_authentication(self, async_client: AsyncClient): workspace = await WorkspaceFactory.create() @@ -93,7 +95,9 @@ async def test_delete_workspace( response = await async_client.delete(f"/api/v1/workspaces/{workspace.id}", headers=owner_auth_header) assert response.status_code == 200 + test_telemetry.track_crud_workspace(action="delete", workspace=workspace) + test_telemetry.track_data.assert_called() async def test_delete_workspace_with_feedback_datasets(self, async_client: AsyncClient, owner_auth_header: dict): workspace = await WorkspaceFactory.create(name="workspace_delete") @@ -129,9 +133,7 @@ async def test_delete_workspace_without_permissions(self, async_client: AsyncCli assert response.status_code == 403 @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin, UserRole.annotator]) - async def test_list_workspaces_me( - self, async_client: AsyncClient, role: UserRole, test_telemetry: MagicMock - ) -> None: + async def test_list_workspaces_me(self, async_client: AsyncClient, role: UserRole) -> None: workspaces = await WorkspaceFactory.create_batch(size=5) user = await UserFactory.create(role=role, workspaces=workspaces if role != UserRole.owner else []) @@ -146,7 +148,6 @@ async def test_list_workspaces_me( "inserted_at": workspace.inserted_at.isoformat(), "updated_at": workspace.updated_at.isoformat(), } in response.json()["items"] - test_telemetry.track_crud_workspace(action="list", workspace=None, count=list(workspaces)) async def test_list_workspaces_me_without_authentication(self, async_client: AsyncClient) -> None: response = await async_client.get("/api/v1/me/workspaces") diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index e1002d55d9..6f1b1ffc84 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -36,12 +36,13 @@ async def test_track_user_login(test_telemetry: MagicMock): await get_telemetry_client().track_user_login(request=mock_request, user=user) test_telemetry.track_user_login.assert_called_once_with(request=mock_request, user=user) + test_telemetry.track_data.assert_called() @pytest.mark.parametrize("is_oauth", [True, False]) @pytest.mark.parametrize("username", ["argilla", "john"]) -def test_user_created(test_telemetry, username: str, is_oauth: bool): +def test_user_created(test_telemetry: MagicMock, username: str, is_oauth: bool): user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) - get_telemetry_client().track_crud_user(action="create", user=user, is_oauth=is_oauth) test_telemetry.track_crud_user.assert_called_once_with(action="create", user=user, is_oauth=is_oauth) + test_telemetry.track_data.assert_called() diff --git a/argilla-server/tests/unit/conftest.py b/argilla-server/tests/unit/conftest.py index 3988d235f9..df53145cb2 100644 --- a/argilla-server/tests/unit/conftest.py +++ b/argilla-server/tests/unit/conftest.py @@ -11,14 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import contextlib -import uuid from typing import TYPE_CHECKING, Dict, Generator import pytest import pytest_asyncio -from argilla_server import telemetry from argilla_server.api.routes import api_v1 from argilla_server.constants import API_KEY_HEADER_NAME, DEFAULT_API_KEY from argilla_server.database import get_async_db @@ -33,8 +30,6 @@ from tests.factories import AnnotatorFactory, OwnerFactory, UserFactory if TYPE_CHECKING: - from unittest.mock import MagicMock - from pytest_mock import MockerFixture @@ -98,12 +93,21 @@ async def override_get_search_engine(): @pytest.fixture(autouse=True) -def test_telemetry(mocker: "MockerFixture") -> "MagicMock": - mock_telemetry = mocker.Mock(TelemetryClient) - mock_telemetry.server_id = uuid.uuid4() +def test_telemetry(mocker: "MockerFixture") -> "TelemetryClient": + # Create a real instance TelemetryClient + real_telemetry = TelemetryClient() + + # Create a wrapper to track calls to other methods + for attr_name in dir(real_telemetry): + attr = getattr(real_telemetry, attr_name) + if callable(attr) and not attr_name.startswith("__"): + wrapped = mocker.Mock(wraps=attr) + setattr(real_telemetry, attr_name, wrapped) + + # Patch the _TELEMETRY_CLIENT to use the real_telemetry + mocker.patch("argilla_server.telemetry._TELEMETRY_CLIENT", new=real_telemetry) - telemetry._TELEMETRY_CLIENT = mock_telemetry - return mock_telemetry + return real_telemetry @pytest_asyncio.fixture(scope="function") diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index c05a3644cc..fc593a024a 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -83,3 +83,4 @@ async def test_track_error(self, test_telemetry: MagicMock, error, expected_even user_agent["type"] = error.type test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) + test_telemetry.track_data.assert_called() From 2473c824ed9d88a6aa34cff670eea9e090b7b7ec Mon Sep 17 00:00:00 2001 From: David Berenstein Date: Mon, 22 Jul 2024 18:02:23 +0200 Subject: [PATCH 35/63] Add documentation for telemetry information (#5253) # Description NA **Type of change** - Documentation update **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- argilla/docs/reference/telemetry.md | 68 +++++++++++++++++++++++++++++ argilla/mkdocs.yml | 2 + 2 files changed, 70 insertions(+) create mode 100644 argilla/docs/reference/telemetry.md diff --git a/argilla/docs/reference/telemetry.md b/argilla/docs/reference/telemetry.md new file mode 100644 index 0000000000..d2e907f402 --- /dev/null +++ b/argilla/docs/reference/telemetry.md @@ -0,0 +1,68 @@ +# Server Telemetry + +Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. This is done through the [Hugging Face Hub library](https://github.com/huggingface/huggingface_hub) telemetry implementations. + +## How to opt-out + +You can opt out of telemetry reporting using the `ENV` variable `HF_HUB_DISABLE_TELEMETRY` before launching the server. Setting this variable to `1` will completely disable telemetry reporting. + +If you are a Linux/MacOs user, you should run: + +```bash +export HF_HUB_DISABLE_TELEMETRY=1 +``` + +If you are a Windows user, you should run: + +```bash +set HF_HUB_DISABLE_TELEMETRY=1 +``` + +To opt in again, you can set the variable to `0`. + +## Why reporting telemetry + +Anonymous telemetry information enables us to continuously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information on users' data records, datasets, or metadata information. + +## Sensitive data + +We do not collect any piece of information related to the source data you store in Argilla. We don't identify individual users. Your data does not leave your server at any time: + +* No dataset record is collected. +* No dataset names or metadata are collected. + +## Information reported + +The following usage and error information is reported: + +* The code of the raised error and the entity type related to the error, if any (Dataset, Workspace,...) +* The `user-agent` and `accept-language` http headers +* Task name and number of records for bulk operations +* An anonymous generated user uuid +* The Argilla version running the server +* The Python version, e.g. `3.8.13` +* The system/OS name, such as `Linux`, `Darwin`, `Windows` +* The system’s release version, e.g. `Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020` +* The machine type, e.g. `AMD64` +* The underlying platform spec with as much useful information as possible. (eg. `macOS-10.16-x86_64-i386-64bit`) +* The type of deployment: `quickstart` or `server`, and if it is deployed on Hugging Face spaces. +* The dockerized deployment flag: `True` or `False` + +This is performed by registering counters for the create, read, update, delete (CRUD) and list operations for different API resources: + +* Users +* Workspaces +* Datasets + * Settings + * Fields + * Questions + * Vector Settings + * Metadata Properties + * Records + * Suggestions + * Responses +* Raised server API errors + +For transparency, you can inspect the source code where this is performed [here](https://github.com/argilla-io/argilla/argilla-server/src/argilla_server/telemetry.py). + +If you have any doubts, don't hesitate to join our [Discord channel](http://hf.co/join/discord) or open a GitHub issue. We'd be very happy to discuss how we can improve this. diff --git a/argilla/mkdocs.yml b/argilla/mkdocs.yml index c9a25aa8ea..2493bb40d8 100644 --- a/argilla/mkdocs.yml +++ b/argilla/mkdocs.yml @@ -151,6 +151,8 @@ nav: - Text classification task: tutorials/text_classification.ipynb - API Reference: - Python SDK: reference/argilla/ + - Telemetry: + - Server Telemetry: reference/telemetry.md - Community: - community/index.md - How to contribute?: community/contributor.md From 35c9e439357f455704e30a2b76d4dfcb0eafd923 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 22 Jul 2024 18:08:18 +0200 Subject: [PATCH 36/63] Add async telemetry client --- .../tests/unit/commons/test_telemetry.py | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 6f1b1ffc84..394580df14 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -18,7 +18,7 @@ import pytest from argilla_server.enums import UserRole from argilla_server.models import User -from argilla_server.telemetry import TelemetryClient, get_telemetry_client +from argilla_server.telemetry import TelemetryClient from fastapi import Request mock_request = Request(scope={"type": "http", "headers": {}}) @@ -30,19 +30,36 @@ def test_disable_telemetry(): assert telemetry_client.enable_telemetry == False +__CRUD__ = ["create", "read", "update", "delete"] + + @pytest.mark.asyncio -async def test_track_user_login(test_telemetry: MagicMock): - user = User(id=uuid.uuid4(), username="argilla") - await get_telemetry_client().track_user_login(request=mock_request, user=user) +class TestSuiteTelemetry: + async def test_track_user_login(self, test_telemetry: MagicMock): + user = User(id=uuid.uuid4(), username="argilla") + await test_telemetry.track_user_login(request=mock_request, user=user) + + test_telemetry.track_user_login.assert_called_once_with(request=mock_request, user=user) + test_telemetry.track_data.assert_called() + + @pytest.mark.parametrize("is_oauth", [True, False]) + @pytest.mark.parametrize("username", ["argilla", "john"]) + @pytest.mark.parametrize("action", __CRUD__) + async def test_user_crud(self, test_telemetry: MagicMock, username: str, is_oauth: bool, action: str): + user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) + + await test_telemetry.track_crud_user(action=action, user=user, is_oauth=is_oauth) - test_telemetry.track_user_login.assert_called_once_with(request=mock_request, user=user) - test_telemetry.track_data.assert_called() + test_telemetry.track_crud_user.assert_called_once_with(action=action, user=user, is_oauth=is_oauth) + test_telemetry.track_data.assert_called() + @pytest.mark.parametrize("is_oauth", [True, False]) + @pytest.mark.parametrize("username", ["argilla", "john"]) + @pytest.mark.parametrize("action", __CRUD__) + async def track_crud_workspace(self, test_telemetry: MagicMock, username: str, is_oauth: bool, action: str): + user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) -@pytest.mark.parametrize("is_oauth", [True, False]) -@pytest.mark.parametrize("username", ["argilla", "john"]) -def test_user_created(test_telemetry: MagicMock, username: str, is_oauth: bool): - user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) + await test_telemetry.track_crud_user(action=action, user=user, is_oauth=is_oauth) - test_telemetry.track_crud_user.assert_called_once_with(action="create", user=user, is_oauth=is_oauth) - test_telemetry.track_data.assert_called() + test_telemetry.track_crud_user.assert_called_once_with(action=action, user=user, is_oauth=is_oauth) + test_telemetry.track_data.assert_called() From df9a0fc9ad6db8e23df9d57dfcfe14a3b3fe7b87 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 22 Jul 2024 20:16:33 +0200 Subject: [PATCH 37/63] Update `test_telemetry` --- .../src/argilla_server/telemetry.py | 29 +++-- .../tests/unit/commons/test_telemetry.py | 101 +++++++++++++----- 2 files changed, 94 insertions(+), 36 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 7924e561d9..50805d4865 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -29,6 +29,7 @@ Field, FloatMetadataPropertySettings, IntegerMetadataPropertySettings, + MetadataProperty, MetadataPropertySettings, Question, Record, @@ -128,13 +129,21 @@ def _process_dataset_setting_settings( user_data = {"dataset_id": str(setting.dataset_id)} if isinstance(setting, (Field, Question)): user_data["required"] = setting.required - user_data.update(setting.settings) + user_data["type"] = setting.settings["type"] elif isinstance( - setting, (FloatMetadataPropertySettings, TermsMetadataPropertySettings, IntegerMetadataPropertySettings) + setting, + ( + FloatMetadataPropertySettings, + TermsMetadataPropertySettings, + IntegerMetadataPropertySettings, + MetadataProperty, + ), ): - user_data["type"] = setting.type + user_data["type"] = setting.type.value elif isinstance(setting, VectorSettings): user_data["dimensions"] = setting.dimensions + else: + raise NotImplementedError("Expected a setting to be processed.") return user_data @@ -206,7 +215,7 @@ async def track_crud_dataset( for attr in attributes: if dataset.is_relationship_loaded(attr): for obtained_attr in getattr(dataset, attr): - self.track_crud_dataset_setting( + await self.track_crud_dataset_setting( action=action, setting_name=attr, dataset=dataset, setting=obtained_attr ) @@ -218,24 +227,22 @@ async def track_crud_dataset_setting( setting: Union[Field, VectorSettings, Question, MetadataPropertySettings, None] = None, count: Union[int, None] = None, ): - topic = f"dataset/{setting_name}" - if setting: - if hasattr(setting, "settings"): - topic = f"{topic}/{setting.settings['type']}" - topic = f"{topic}/{action}" + topic = f"dataset/{setting_name}/{action}" user_agent = self._process_dataset_model(dataset=dataset) if setting: user_agent.update(self._process_dataset_setting_settings(setting=setting)) await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_records( - self, action: str, record_or_dataset: Union[Record, None] = None, count: Union[int, None] = None + self, action: str, record_or_dataset: Union[Record, Dataset, None] = None, count: Union[int, None] = None ): topic = f"dataset/records/{action}" if isinstance(record_or_dataset, Record): user_agent = self._process_record_model(record=record_or_dataset) - else: + elif isinstance(record_or_dataset, Dataset): user_agent = self._process_dataset_model(dataset=record_or_dataset) + else: + raise NotImplementedError("Expected element of `Dataset` or `Record`") await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_records_subtopic( diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 394580df14..0add7aed08 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -13,53 +13,104 @@ # limitations under the License. import uuid +from typing import Union from unittest.mock import MagicMock import pytest -from argilla_server.enums import UserRole -from argilla_server.models import User +from argilla_server.models import ( + Record, + User, +) from argilla_server.telemetry import TelemetryClient from fastapi import Request -mock_request = Request(scope={"type": "http", "headers": {}}) - - -def test_disable_telemetry(): - telemetry_client = TelemetryClient(enable_telemetry=False) - - assert telemetry_client.enable_telemetry == False +from tests.factories import ( + DatasetFactory, + IntegerMetadataPropertyFactory, + LabelSelectionQuestionFactory, + MultiLabelSelectionQuestionFactory, + RankingQuestionFactory, + RatingQuestionFactory, + RecordFactory, + ResponseFactory, + SpanQuestionFactory, + SuggestionFactory, + TextFieldFactory, + TextQuestionFactory, + UserFactory, + VectorSettingsFactory, + WorkspaceFactory, +) +mock_request = Request(scope={"type": "http", "headers": {}}) __CRUD__ = ["create", "read", "update", "delete"] @pytest.mark.asyncio class TestSuiteTelemetry: + async def test_disable_telemetry(self): + telemetry_client = TelemetryClient(enable_telemetry=False) + + assert telemetry_client.enable_telemetry == False + async def test_track_user_login(self, test_telemetry: MagicMock): user = User(id=uuid.uuid4(), username="argilla") await test_telemetry.track_user_login(request=mock_request, user=user) - - test_telemetry.track_user_login.assert_called_once_with(request=mock_request, user=user) test_telemetry.track_data.assert_called() @pytest.mark.parametrize("is_oauth", [True, False]) - @pytest.mark.parametrize("username", ["argilla", "john"]) - @pytest.mark.parametrize("action", __CRUD__) - async def test_user_crud(self, test_telemetry: MagicMock, username: str, is_oauth: bool, action: str): - user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) - - await test_telemetry.track_crud_user(action=action, user=user, is_oauth=is_oauth) + async def test_track_crud_user(self, test_telemetry: MagicMock, is_oauth: bool): + user = await UserFactory.create() + await test_telemetry.track_crud_user(action="create", user=user, is_oauth=is_oauth) + test_telemetry.track_data.assert_called() - test_telemetry.track_crud_user.assert_called_once_with(action=action, user=user, is_oauth=is_oauth) + async def test_track_track_crud_workspace(self, test_telemetry: MagicMock): + workspace = await WorkspaceFactory.create() + await test_telemetry.track_crud_workspace(action="create", workspace=workspace) test_telemetry.track_data.assert_called() - @pytest.mark.parametrize("is_oauth", [True, False]) - @pytest.mark.parametrize("username", ["argilla", "john"]) - @pytest.mark.parametrize("action", __CRUD__) - async def track_crud_workspace(self, test_telemetry: MagicMock, username: str, is_oauth: bool, action: str): - user = User(id=uuid.uuid4(), username=username, role=UserRole.owner) + async def test_track_track_crud_dataset( + self, + test_telemetry: MagicMock, + ): + dataset = await DatasetFactory.create() + await test_telemetry.track_crud_dataset(action="create", dataset=dataset) + test_telemetry.track_data.assert_called() - await test_telemetry.track_crud_user(action=action, user=user, is_oauth=is_oauth) + @pytest.mark.parametrize("record_or_dataset_factory", [RecordFactory, DatasetFactory]) + async def test_track_track_crud_records( + self, test_telemetry: MagicMock, record_or_dataset_factory: Union[DatasetFactory, RecordFactory] + ): + record_or_dataset = await record_or_dataset_factory.create() + if isinstance(record_or_dataset, Record): + await ResponseFactory.create(record=record_or_dataset) + await SuggestionFactory.create(record=record_or_dataset) + await test_telemetry.track_crud_records(action="create", record_or_dataset=record_or_dataset) + test_telemetry.track_data.assert_called() - test_telemetry.track_crud_user.assert_called_once_with(action=action, user=user, is_oauth=is_oauth) + @pytest.mark.parametrize("action", __CRUD__) + @pytest.mark.parametrize( + "setting_factory_config", + [ + ("vectors_settings", VectorSettingsFactory), + ("metadata_properties", IntegerMetadataPropertyFactory), + ("fields", TextFieldFactory), + ("questions", RankingQuestionFactory), + ("questions", RatingQuestionFactory), + ("questions", LabelSelectionQuestionFactory), + ("questions", MultiLabelSelectionQuestionFactory), + ("questions", SpanQuestionFactory), + ("questions", TextQuestionFactory), + ], + ) + async def test_track_crud_dataset_setting(self, test_telemetry: MagicMock, action: str, setting_factory_config): + setting_name, setting_factory = setting_factory_config + setting = await setting_factory.create_batch(size=1) + setting_config = {setting_name: setting} + dataset = await DatasetFactory.create(**setting_config) + await test_telemetry.track_crud_dataset(action=action, dataset=dataset) + test_telemetry.track_crud_dataset_setting.assert_called_once_with( + action=action, setting_name=setting_name, setting=setting[0], dataset=dataset + ) test_telemetry.track_data.assert_called() From c64cc29467a8ec264c47b53133bdf8767a893ec5 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 23 Jul 2024 10:34:26 +0200 Subject: [PATCH 38/63] Update list-like to basic CRUD Fix upsert behavior suggestions --- .../api/handlers/v1/datasets/datasets.py | 11 ++--- .../api/handlers/v1/datasets/questions.py | 2 +- .../api/handlers/v1/datasets/records.py | 4 +- .../argilla_server/api/handlers/v1/records.py | 4 +- .../api/handlers/v1/responses.py | 10 ++++- .../argilla_server/api/handlers/v1/users.py | 9 +--- .../api/handlers/v1/workspaces.py | 4 ++ .../src/argilla_server/telemetry.py | 45 +++++++++++-------- .../unit/api/handlers/v1/test_datasets.py | 6 +-- .../handlers/v1/test_list_dataset_records.py | 2 +- .../tests/unit/api/handlers/v1/test_users.py | 8 +--- .../tests/unit/commons/test_telemetry.py | 2 +- .../tests/unit/errors/test_api_errors.py | 2 + 13 files changed, 56 insertions(+), 53 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index af33002713..2377743e5f 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -71,7 +71,6 @@ async def _filter_metadata_properties_by_policy( async def list_current_user_datasets( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), workspace_id: Optional[UUID] = None, current_user: User = Security(auth.get_current_user), ): @@ -86,10 +85,6 @@ async def list_current_user_datasets( else: dataset_list = await datasets.list_datasets_by_workspace_id(db, workspace_id) - await telemetry_client.track_crud_dataset(action="list", count=len(dataset_list)) - for dataset in dataset_list: - await telemetry_client.track_crud_dataset(action="read", dataset=dataset) - return Datasets(items=dataset_list) @@ -110,7 +105,7 @@ async def list_dataset_fields( action="read", dataset=dataset, setting_name="fields", setting=field ) await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) + action="read", dataset=dataset, setting_name="fields", count=len(dataset.fields) ) return Fields(items=dataset.fields) @@ -133,7 +128,7 @@ async def list_dataset_vector_settings( action="read", dataset=dataset, setting_name="vectors_settings", setting=vectors_setting ) await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) + action="read", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) ) return VectorsSettings(items=dataset.vectors_settings) @@ -160,7 +155,7 @@ async def list_current_user_dataset_metadata_properties( action="read", dataset=dataset, setting_name="metadata_properties", setting=metadata_property ) await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) + action="read", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) ) return MetadataProperties(items=filtered_metadata_properties) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index 90c56b894c..d3921a5553 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -47,7 +47,7 @@ async def list_dataset_questions( action="read", dataset=dataset, setting_name="questions", setting=question ) await telemetry_client.track_crud_dataset_setting( - action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) + action="read", setting_name="questions", dataset=dataset, count=len(dataset.questions) ) return Questions(items=dataset.questions) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index e1f8651d8c..3fa96b7a6c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -389,8 +389,6 @@ async def list_current_user_dataset_records( record.dataset = dataset record.metadata_ = await _filter_record_metadata_for_user(record, current_user) - await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) - return Records(items=records, total=total) @@ -425,7 +423,7 @@ async def list_dataset_records( sort_by_query_param=sort_by_query_param or LIST_DATASET_RECORDS_DEFAULT_SORT_BY, ) - await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) + await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=len(records)) return Records(items=records, total=total) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index 44070b9505..4037d1822e 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -191,12 +191,14 @@ async def upsert_suggestion( # NOTE: If there is already a suggestion for this record and question, we update it instead of creating a new one. # So we set the correct status code here. + action = "create" if await Suggestion.get_by(db, record_id=record_id, question_id=suggestion_create.question_id): response.status_code = status.HTTP_200_OK + action = "update" suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - await telemetry_client.track_crud_records_subtopic(action="create", sub_topic="suggestions", record_id=record_id) + await telemetry_client.track_crud_records_subtopic(action=action, sub_topic="suggestions", record_id=record_id) return suggestion diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index d91091414e..6cd1071611 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -48,12 +48,18 @@ async def create_current_user_responses_bulk( current_user: User = Security(auth.get_current_user), use_case: UpsertResponsesInBulkUseCase = Depends(UpsertResponsesInBulkUseCaseFactory()), telemetry_client: TelemetryClient = Depends(get_telemetry_client), -): +) -> ResponsesBulk: responses_bulk_items = await use_case.execute(body.items, user=current_user) + responses_bulk_items_filtered = [resp for resp in responses_bulk_items if resp.item] await telemetry_client.track_crud_records_subtopic( - action="create", sub_topic="responses", record_id=None, count=len(responses_bulk_items) + action="create", sub_topic="responses", record_id=None, count=len(responses_bulk_items_filtered) ) + for response in responses_bulk_items_filtered: + if response.item: + await telemetry_client.track_crud_records_subtopic( + action="create", sub_topic="responses", record_id=response.item.record_id + ) return ResponsesBulk(items=responses_bulk_items) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 4ffbeb4ad2..9acfbb0e6d 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -36,7 +36,7 @@ async def get_current_user( current_user: User = Security(auth.get_current_user), telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): - await telemetry_client.track_user_login(request, current_user) + await telemetry_client.track_user_login(request=request, user=current_user) return current_user @@ -69,7 +69,7 @@ async def list_users( users = await accounts.list_users(db) - await telemetry_client.track_crud_user(action="list", user=None, is_oauth=False, count=len(users)) + await telemetry_client.track_crud_user(action="read", user=None, is_oauth=False, count=len(users)) for user in users: await telemetry_client.track_crud_user(action="read", user=user, is_oauth=False) @@ -118,7 +118,6 @@ async def list_user_workspaces( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.list_workspaces) @@ -129,8 +128,4 @@ async def list_user_workspaces( else: workspaces = await accounts.list_workspaces_by_user_id(db, user_id) - for workspace in workspaces: - await telemetry_client.track_crud_workspace(action="read", workspace=workspace) - await telemetry_client.track_crud_workspace(action="list", workspace=None, count=len(workspaces)) - return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index de8e67b265..44c370a2a1 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -105,6 +105,10 @@ async def list_workspaces_me( else: workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) + for workspace in workspaces: + await telemetry_client.track_crud_workspace(action="read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="read", workspace=None, count=len(workspaces)) + return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 50805d4865..05566e7fa6 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -78,18 +78,19 @@ def __post_init__(self, enable_telemetry: bool): def _process_request_info(request: Request): return {header: request.headers.get(header) for header in ["user-agent", "accept-language"]} + @staticmethod + def _process_user_model(user: User): + return {"user_id": str(user.id), "role": user.role, "is_default_user": user.username == DEFAULT_USERNAME} + @staticmethod def _process_workspace_model(workspace: Workspace): - return { - "workspace_id": str(workspace.id), - "workspace_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=workspace.name)), - } + return {"workspace_id": str(workspace.id)} @staticmethod def _process_dataset_model(dataset: Dataset): return { "dataset_id": str(dataset.id), - "dataset_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=dataset.name)), + "workspace_id": str(dataset.workspace_id), } @staticmethod @@ -102,10 +103,6 @@ def _process_record_model(record: Record): @staticmethod def _process_dataset_settings(dataset: Dataset): attributes = [ - "fields", - "questions", - "vectors_settings", - "metadata_properties", "allow_extra_metadata", "guidelines", ] @@ -113,6 +110,17 @@ def _process_dataset_settings(dataset: Dataset): for attr in attributes: if dataset.is_relationship_loaded(attr): user_data[attr] = getattr(dataset, attr) + + attributes = [ + "fields", + "questions", + "vectors_settings", + "metadata_properties", + ] + for attr in attributes: + if dataset.is_relationship_loaded(attr): + user_data[f"count_{attr}"] = len(getattr(dataset, attr)) + return user_data @staticmethod @@ -142,20 +150,12 @@ def _process_dataset_setting_settings( user_data["type"] = setting.type.value elif isinstance(setting, VectorSettings): user_data["dimensions"] = setting.dimensions + user_data["type"] = "default" else: raise NotImplementedError("Expected a setting to be processed.") return user_data - @staticmethod - def _process_user_model(user: User): - return { - "user_id": str(user.id), - "role": user.role, - "is_default_user": user.username == DEFAULT_USERNAME, - "user_hash": str(uuid.uuid5(namespace=_TELEMETRY_CLIENT.server_id, name=user.username)), - } - async def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): if not self.enable_telemetry: return @@ -181,6 +181,7 @@ async def track_crud_user( action: str, user: Union[User, None] = None, is_oauth: Union[bool, None] = None, + is_login: Union[bool, None] = None, count: Union[int, None] = None, ): topic = f"user/{action}" @@ -189,6 +190,8 @@ async def track_crud_user( user_agent.update(self._process_user_model(user=user)) if is_oauth is not None: user_agent["is_oauth"] = is_oauth + if is_login is not None: + user_agent["is_login"] = is_login await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_crud_workspace( @@ -214,7 +217,11 @@ async def track_crud_dataset( if dataset: for attr in attributes: if dataset.is_relationship_loaded(attr): - for obtained_attr in getattr(dataset, attr): + obtained_attr_list = getattr(dataset, attr) + await self.track_crud_dataset_setting( + action=action, setting_name=attr, dataset=dataset, setting=None, count=len(obtained_attr_list) + ) + for obtained_attr in obtained_attr_list: await self.track_crud_dataset_setting( action=action, setting_name=attr, dataset=dataset, setting=obtained_attr ) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 442f01af2d..39c9077071 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -234,7 +234,7 @@ async def test_list_dataset_fields( } test_telemetry.track_crud_dataset_setting.assert_called_with( - action="list", dataset=dataset, setting_name="fields", count=len(response.json()["items"]) + action="read", dataset=dataset, setting_name="fields", count=len(response.json()["items"]) ) test_telemetry.track_data.assert_called() @@ -358,7 +358,7 @@ async def test_list_dataset_questions( } test_telemetry.track_crud_dataset_setting.assert_called_with( - action="list", dataset=dataset, setting_name="questions", count=len(response.json()["items"]) + action="read", dataset=dataset, setting_name="questions", count=len(response.json()["items"]) ) test_telemetry.track_data.assert_called() @@ -649,7 +649,7 @@ async def test_list_dataset_vectors_settings( } test_telemetry.track_crud_dataset_setting.assert_called_with( - action="list", dataset=dataset, setting_name="vectors_settings", count=len(response.json()["items"]) + action="read", dataset=dataset, setting_name="vectors_settings", count=len(response.json()["items"]) ) test_telemetry.track_data.assert_called() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py index 56f1786dc4..a46cf1353a 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py @@ -103,7 +103,7 @@ async def test_list_dataset_records( } test_telemetry.track_crud_records.assert_called_with( - action="list", record_or_dataset=field.dataset, count=response.json()["total"] + action="read", record_or_dataset=response.dataset, count=response.json()["total"] ) test_telemetry.track_data.assert_called() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_users.py b/argilla-server/tests/unit/api/handlers/v1/test_users.py index 15ad3b6b40..03851897d5 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_users.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_users.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import TYPE_CHECKING -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -28,9 +27,7 @@ @pytest.mark.asyncio class TestsUsersV1Endpoints: - async def test_list_user_workspaces( - self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock - ): + async def test_list_user_workspaces(self, async_client: "AsyncClient", owner_auth_header: dict): workspaces = await WorkspaceFactory.create_batch(3) user = await UserFactory.create(workspaces=workspaces) @@ -49,9 +46,6 @@ async def test_list_user_workspaces( ] } - test_telemetry.track_crud_workspace.assert_called_with(action="list", workspace=None, count=len(workspaces)) - test_telemetry.track_data.assert_called() - async def test_list_user_workspaces_for_owner(self, async_client: "AsyncClient"): workspaces = await WorkspaceFactory.create_batch(5) owner = await UserFactory.create(role=UserRole.owner) diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 0add7aed08..8863b0ea8e 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -110,7 +110,7 @@ async def test_track_crud_dataset_setting(self, test_telemetry: MagicMock, actio setting_config = {setting_name: setting} dataset = await DatasetFactory.create(**setting_config) await test_telemetry.track_crud_dataset(action=action, dataset=dataset) - test_telemetry.track_crud_dataset_setting.assert_called_once_with( + test_telemetry.track_crud_dataset_setting.assert_called_with( action=action, setting_name=setting_name, setting=setting[0], dataset=dataset ) test_telemetry.track_data.assert_called() diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index fc593a024a..2fc4f96e40 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -78,9 +78,11 @@ async def test_track_error(self, test_telemetry: MagicMock, error, expected_even "code": error.code, "user-agent": mock_request.headers.get("user-agent"), "accept-language": mock_request.headers.get("accept-language"), + "count": 1, } if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): user_agent["type"] = error.type + user_agent.update(test_telemetry._system_info) test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) test_telemetry.track_data.assert_called() From 0ed746b6601f130901d455dce142062bd9af8926 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:39:12 +0000 Subject: [PATCH 39/63] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- argilla-server/src/argilla_server/_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 08c12b2bd4..a978ee32ac 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -177,7 +177,7 @@ def show_telemetry_warning(): f'{"#set ARGILLA_ENABLE_TELEMETRY=0" if os.name == "nt" else "$>export ARGILLA_ENABLE_TELEMETRY=0"}' ) _LOGGER.warning(message) - + message += "\n\n " message += "#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1" message += "\n" From fef47b885e8328cac70060e3b3309ab956f958a9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 13:41:04 +0200 Subject: [PATCH 40/63] Resolved utcnow() deprecation --- argilla-server/src/argilla_server/_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 08c12b2bd4..71c350d68f 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -97,9 +97,9 @@ def configure_middleware(app: FastAPI): @app.middleware("http") async def add_server_timing_header(request: Request, call_next): - start_time = datetime.utcnow() + start_time = datetime.now(datetime.UTC) response = await call_next(request) - response_time_ms = (datetime.utcnow() - start_time).total_seconds() * 1000 + response_time_ms = (datetime.now(datetime.UTC) - start_time).total_seconds() * 1000 response.headers["Server-Timing"] = f"total;dur={response_time_ms}" @@ -177,7 +177,7 @@ def show_telemetry_warning(): f'{"#set ARGILLA_ENABLE_TELEMETRY=0" if os.name == "nt" else "$>export ARGILLA_ENABLE_TELEMETRY=0"}' ) _LOGGER.warning(message) - + message += "\n\n " message += "#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1" message += "\n" From e332696e0d2d46064e4c8984c79b8a6271140149 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 13:49:24 +0200 Subject: [PATCH 41/63] Update to always add error type to overview --- argilla-server/src/argilla_server/errors/error_handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index c2fb522ad7..b48c0a3015 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -57,9 +57,8 @@ async def track_error(cls, error: ServerError, request: Request): "code": error.code, "user-agent": request.headers.get("user-agent"), "accept-language": request.headers.get("accept-language"), + "type": error.__class__.__name__, } - if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): - user_agent["type"] = error.type await get_telemetry_client().track_data(topic="error/server", user_agent=user_agent) From a1c83a1cfe74e323bb75d2fa393a3fffaa4ed9c9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 14:18:22 +0200 Subject: [PATCH 42/63] Add dataset distribution tracking to telemetry --- argilla-server/src/argilla_server/telemetry.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 05566e7fa6..29bfe11ec1 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -102,14 +102,16 @@ def _process_record_model(record: Record): @staticmethod def _process_dataset_settings(dataset: Dataset): - attributes = [ - "allow_extra_metadata", - "guidelines", - ] user_data = {} - for attr in attributes: - if dataset.is_relationship_loaded(attr): - user_data[attr] = getattr(dataset, attr) + if dataset.is_relationship_loaded("guidelines"): + user_data["guidelines"] = True if getattr(dataset, "guidelines") else False + if dataset.is_relationship_loaded("guidelines"): + user_data["allow_extra_metadata"] = getattr(dataset, "allow_extra_metadata") + if dataset.is_relationship_loaded("distribution"): + distribution = getattr(dataset, "distribution") + user_data["distribution_strategy"] = distribution["strategy"] + if "min_submitted" in distribution: + user_data["distribution_min_submitted"] = distribution["min_submitted"] attributes = [ "fields", From 17b0403f28b613f5fc46dd2cd814f1359c2531bd Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 14:21:07 +0200 Subject: [PATCH 43/63] Revert UTC change --- argilla-server/src/argilla_server/_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 71c350d68f..a978ee32ac 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -97,9 +97,9 @@ def configure_middleware(app: FastAPI): @app.middleware("http") async def add_server_timing_header(request: Request, call_next): - start_time = datetime.now(datetime.UTC) + start_time = datetime.utcnow() response = await call_next(request) - response_time_ms = (datetime.now(datetime.UTC) - start_time).total_seconds() * 1000 + response_time_ms = (datetime.utcnow() - start_time).total_seconds() * 1000 response.headers["Server-Timing"] = f"total;dur={response_time_ms}" From f6e9f3819596df94e852d9aa8dba8680af4db253 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 16:22:48 +0200 Subject: [PATCH 44/63] fix failing tests --- .../unit/api/handlers/v1/test_datasets.py | 22 +++++++++---------- .../tests/unit/errors/test_api_errors.py | 5 +---- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index dc25a94716..1e2a17c9e2 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -19,8 +19,6 @@ from uuid import UUID, uuid4 import pytest -from sqlalchemy import func, inspect, select - from argilla_server.api.handlers.v1.datasets.records import LIST_DATASET_RECORDS_LIMIT_DEFAULT from argilla_server.api.schemas.v1.datasets import DATASET_GUIDELINES_MAX_LENGTH, DATASET_NAME_MAX_LENGTH from argilla_server.api.schemas.v1.fields import FIELD_CREATE_NAME_MAX_LENGTH, FIELD_CREATE_TITLE_MAX_LENGTH @@ -40,9 +38,9 @@ DatasetStatus, OptionsOrder, RecordInclude, + RecordStatus, ResponseStatusFilter, SimilarityOrder, - RecordStatus, SortOrder, ) from argilla_server.models import ( @@ -60,18 +58,20 @@ VectorSettings, ) from argilla_server.search_engine import ( - SearchEngine, - SearchResponseItem, - SearchResponses, - TextQuery, AndFilter, - TermsFilter, MetadataFilterScope, - RangeFilter, - ResponseFilterScope, Order, + RangeFilter, RecordFilterScope, + ResponseFilterScope, + SearchEngine, + SearchResponseItem, + SearchResponses, + TermsFilter, + TextQuery, ) +from sqlalchemy import func, inspect, select + from tests.factories import ( AdminFactory, AnnotatorFactory, @@ -3087,7 +3087,7 @@ async def test_update_dataset_records( mock_search_engine.index_records.assert_called_once_with(dataset, records[:4]) - test_telemetry.track_crud_records.assert_called_once_with(action="update", record_or_dataset=dataset, count=4) + test_telemetry.track_crud_records.assert_called_with(action="update", record_or_dataset=dataset, count=4) test_telemetry.track_data.assert_called() async def test_update_dataset_records_with_suggestions( diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index 2fc4f96e40..82a56b24fe 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -78,11 +78,8 @@ async def test_track_error(self, test_telemetry: MagicMock, error, expected_even "code": error.code, "user-agent": mock_request.headers.get("user-agent"), "accept-language": mock_request.headers.get("accept-language"), - "count": 1, + "type": error.__class__.__name__, } - if isinstance(error, (GenericServerError, EntityNotFoundError, EntityAlreadyExistsError)): - user_agent["type"] = error.type - user_agent.update(test_telemetry._system_info) test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) test_telemetry.track_data.assert_called() From 68d6e534ef6f2d1b63d6b9d65ed6f775d9deff2c Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 16:30:51 +0200 Subject: [PATCH 45/63] fix failing tests --- argilla-server/src/argilla_server/telemetry.py | 3 --- argilla-server/tests/unit/errors/test_api_errors.py | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 29bfe11ec1..9e1f245e22 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -159,9 +159,6 @@ def _process_dataset_setting_settings( return user_data async def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): - if not self.enable_telemetry: - return - library_name = "argilla" topic = f"{library_name}/{topic}" diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index 82a56b24fe..5daba73d21 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -79,7 +79,8 @@ async def test_track_error(self, test_telemetry: MagicMock, error, expected_even "user-agent": mock_request.headers.get("user-agent"), "accept-language": mock_request.headers.get("accept-language"), "type": error.__class__.__name__, + "count": 1, } + user_agent.update(test_telemetry._system_info) test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) - test_telemetry.track_data.assert_called() From de181a9eb95e227c720862921fa78b1d144aaf2c Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 19 Aug 2024 16:45:04 +0200 Subject: [PATCH 46/63] Remove server id from telemetry to be more GDPR compliant --- argilla-server/src/argilla_server/telemetry.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 9e1f245e22..39dda99f1a 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -16,8 +16,7 @@ import json import logging import platform -import uuid -from typing import Optional, Union +from typing import Union from fastapi import Request from huggingface_hub.utils import send_telemetry @@ -51,16 +50,8 @@ class TelemetryClient: enable_telemetry: dataclasses.InitVar[bool] = settings.enable_telemetry - _server_id: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) - - @property - def server_id(self) -> uuid.UUID: - return self._server_id - def __post_init__(self, enable_telemetry: bool): - self._server_id = uuid.UUID(int=uuid.getnode()) self._system_info = { - "server_id": str(self._server_id), "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), @@ -70,7 +61,6 @@ def __post_init__(self, enable_telemetry: bool): } _LOGGER.info("System Info:") - _LOGGER.info(f"Server id: {self.server_id}") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") self.enable_telemetry = enable_telemetry From b823e2e620eeca67fcbbc552a094aef592a7677d Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 20 Aug 2024 08:02:02 +0200 Subject: [PATCH 47/63] Update tlemetry workflow --- argilla-server/src/argilla_server/_app.py | 2 +- argilla-server/src/argilla_server/settings.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index a978ee32ac..425d4081e2 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -174,7 +174,7 @@ def show_telemetry_warning(): " https://docs.argilla.io/latest/reference/argilla-server/telemetry/\n\n" "Telemetry is currently enabled. If you want to disable it, you can configure\n" "the environment variable before relaunching the server:\n\n" - f'{"#set ARGILLA_ENABLE_TELEMETRY=0" if os.name == "nt" else "$>export ARGILLA_ENABLE_TELEMETRY=0"}' + f'{"#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1"}' ) _LOGGER.warning(message) diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index fae571807c..ac900762f6 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -31,7 +31,6 @@ DEFAULT_DATABASE_POSTGRESQL_POOL_SIZE, DEFAULT_DATABASE_SQLITE_TIMEOUT, DEFAULT_LABEL_SELECTION_OPTIONS_MAX_ITEMS, - DEFAULT_MAX_KEYWORD_LENGTH, DEFAULT_SPAN_OPTIONS_MAX_ITEMS, SEARCH_ENGINE_ELASTICSEARCH, SEARCH_ENGINE_OPENSEARCH, @@ -136,7 +135,8 @@ class Settings(BaseSettings): def set_enable_telemetry(cls, enable_telemetry: bool) -> bool: if os.getenv("HF_HUB_DISABLE_TELEMETRY") == "1" or os.getenv("HF_HUB_OFFLINE") == "1": enable_telemetry = False - elif os.getenv("ARGILLA_ENABLE_TELEMETRY") == "0": + if os.getenv("ARGILLA_ENABLE_TELEMETRY") == "0": + os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" warnings.warn( "environment vairbale ARGILLA_ENABLE_TELEMETRY is deprecated, use HF_HUB_DISABLE_TELEMETRY or HF_HUB_OFFLINE instead." ) From cad43a17704a18d8aa8982c677346789bc41f8a5 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 26 Aug 2024 13:09:55 +0200 Subject: [PATCH 48/63] chore: add huggingface_hub to dependencies --- argilla-server/pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/argilla-server/pyproject.toml b/argilla-server/pyproject.toml index d881f4a9ec..10f87751ea 100644 --- a/argilla-server/pyproject.toml +++ b/argilla-server/pyproject.toml @@ -57,6 +57,8 @@ dependencies = [ "typer >= 0.6.0, < 0.10.0", # spaCy only supports typer<0.10.0 "packaging>=23.2", "psycopg2-binary>=2.9.9", + # For Telemetry + "huggingface_hub>=0.13,<1", ] [project.optional-dependencies] From 659d9a248e34847afc85c03311d9b2c8daef2928 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 27 Aug 2024 11:43:04 +0200 Subject: [PATCH 49/63] docs: update telemetry sections --- .../reference/argilla-server/configuration.md | 2 +- .../reference/argilla-server/telemetry.md | 32 ++++++--- argilla/docs/reference/telemetry.md | 68 ------------------- 3 files changed, 24 insertions(+), 78 deletions(-) delete mode 100644 argilla/docs/reference/telemetry.md diff --git a/argilla/docs/reference/argilla-server/configuration.md b/argilla/docs/reference/argilla-server/configuration.md index e240409b0a..47c87a08c3 100644 --- a/argilla/docs/reference/argilla-server/configuration.md +++ b/argilla/docs/reference/argilla-server/configuration.md @@ -46,7 +46,7 @@ You can set the following environment variables to further configure your server - `ARGILLA_DOCS_ENABLED`: If False, disables openapi docs endpoint at _/api/docs_. -- `ARGILLA_ENABLE_TELEMETRY`: If False, disables telemetry for usage metrics. +- `HF_HUB_DISABLE_TELEMETRY`: If True, disables telemetry for usage metrics. Alternatively, you can disable telemetry by setting `HF_HUB_OFFLINE=1`. #### Authentication diff --git a/argilla/docs/reference/argilla-server/telemetry.md b/argilla/docs/reference/argilla-server/telemetry.md index 903d7cc856..d2e907f402 100644 --- a/argilla/docs/reference/argilla-server/telemetry.md +++ b/argilla/docs/reference/argilla-server/telemetry.md @@ -1,24 +1,24 @@ # Server Telemetry -Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. +Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. This is done through the [Hugging Face Hub library](https://github.com/huggingface/huggingface_hub) telemetry implementations. -## How to opt out +## How to opt-out -You can opt out of telemetry reporting using the `ENV` variable `ARGILLA_ENABLE_TELEMETRY` before launching the server. Setting this variable to `0` will completely disable telemetry reporting. +You can opt out of telemetry reporting using the `ENV` variable `HF_HUB_DISABLE_TELEMETRY` before launching the server. Setting this variable to `1` will completely disable telemetry reporting. If you are a Linux/MacOs user, you should run: ```bash -export ARGILLA_ENABLE_TELEMETRY=0 +export HF_HUB_DISABLE_TELEMETRY=1 ``` If you are a Windows user, you should run: ```bash -set ARGILLA_ENABLE_TELEMETRY=0 +set HF_HUB_DISABLE_TELEMETRY=1 ``` -To opt in again, you can set the variable to `1`. +To opt in again, you can set the variable to `0`. ## Why reporting telemetry @@ -45,10 +45,24 @@ The following usage and error information is reported: * The system’s release version, e.g. `Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020` * The machine type, e.g. `AMD64` * The underlying platform spec with as much useful information as possible. (eg. `macOS-10.16-x86_64-i386-64bit`) -* The type of deployment: `huggingface_space` or `server` +* The type of deployment: `quickstart` or `server`, and if it is deployed on Hugging Face spaces. * The dockerized deployment flag: `True` or `False` - -For transparency, you can inspect the source code where this is performed [here](https://github.com/argilla-io/argilla/blob/main/argilla-server/src/argilla_server/utils/_telemetry.py). +This is performed by registering counters for the create, read, update, delete (CRUD) and list operations for different API resources: + +* Users +* Workspaces +* Datasets + * Settings + * Fields + * Questions + * Vector Settings + * Metadata Properties + * Records + * Suggestions + * Responses +* Raised server API errors + +For transparency, you can inspect the source code where this is performed [here](https://github.com/argilla-io/argilla/argilla-server/src/argilla_server/telemetry.py). If you have any doubts, don't hesitate to join our [Discord channel](http://hf.co/join/discord) or open a GitHub issue. We'd be very happy to discuss how we can improve this. diff --git a/argilla/docs/reference/telemetry.md b/argilla/docs/reference/telemetry.md deleted file mode 100644 index d2e907f402..0000000000 --- a/argilla/docs/reference/telemetry.md +++ /dev/null @@ -1,68 +0,0 @@ -# Server Telemetry - -Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. This is done through the [Hugging Face Hub library](https://github.com/huggingface/huggingface_hub) telemetry implementations. - -## How to opt-out - -You can opt out of telemetry reporting using the `ENV` variable `HF_HUB_DISABLE_TELEMETRY` before launching the server. Setting this variable to `1` will completely disable telemetry reporting. - -If you are a Linux/MacOs user, you should run: - -```bash -export HF_HUB_DISABLE_TELEMETRY=1 -``` - -If you are a Windows user, you should run: - -```bash -set HF_HUB_DISABLE_TELEMETRY=1 -``` - -To opt in again, you can set the variable to `0`. - -## Why reporting telemetry - -Anonymous telemetry information enables us to continuously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information on users' data records, datasets, or metadata information. - -## Sensitive data - -We do not collect any piece of information related to the source data you store in Argilla. We don't identify individual users. Your data does not leave your server at any time: - -* No dataset record is collected. -* No dataset names or metadata are collected. - -## Information reported - -The following usage and error information is reported: - -* The code of the raised error and the entity type related to the error, if any (Dataset, Workspace,...) -* The `user-agent` and `accept-language` http headers -* Task name and number of records for bulk operations -* An anonymous generated user uuid -* The Argilla version running the server -* The Python version, e.g. `3.8.13` -* The system/OS name, such as `Linux`, `Darwin`, `Windows` -* The system’s release version, e.g. `Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020` -* The machine type, e.g. `AMD64` -* The underlying platform spec with as much useful information as possible. (eg. `macOS-10.16-x86_64-i386-64bit`) -* The type of deployment: `quickstart` or `server`, and if it is deployed on Hugging Face spaces. -* The dockerized deployment flag: `True` or `False` - -This is performed by registering counters for the create, read, update, delete (CRUD) and list operations for different API resources: - -* Users -* Workspaces -* Datasets - * Settings - * Fields - * Questions - * Vector Settings - * Metadata Properties - * Records - * Suggestions - * Responses -* Raised server API errors - -For transparency, you can inspect the source code where this is performed [here](https://github.com/argilla-io/argilla/argilla-server/src/argilla_server/telemetry.py). - -If you have any doubts, don't hesitate to join our [Discord channel](http://hf.co/join/discord) or open a GitHub issue. We'd be very happy to discuss how we can improve this. From c725352d5597995bc0824c87824133bfd7507ffa Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 27 Aug 2024 11:53:01 +0200 Subject: [PATCH 50/63] update: usage from record_subtopic to record_suggestions and record_responses --- .../argilla_server/api/handlers/v1/records.py | 12 +++++------ .../api/handlers/v1/responses.py | 16 +++++--------- .../api/handlers/v1/suggestions.py | 4 +--- .../src/argilla_server/telemetry.py | 21 +++++++++++++------ .../unit/api/handlers/v1/test_records.py | 15 +++++-------- .../unit/api/handlers/v1/test_responses.py | 8 ++----- .../unit/api/handlers/v1/test_suggestions.py | 4 ++-- 7 files changed, 35 insertions(+), 45 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index 4037d1822e..edc86dab25 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -117,7 +117,7 @@ async def create_record_response( response = await datasets.create_response(db, search_engine, record, current_user, response_create) - await telemetry_client.track_crud_records_subtopic(action="create", sub_topic="responses", record_id=record_id) + await telemetry_client.track_crud_records_responses(action="create", record_id=record_id) return response @@ -142,8 +142,8 @@ async def get_record_suggestions( await authorize(current_user, RecordPolicy.get_suggestions(record)) - await telemetry_client.track_crud_records_subtopic( - action="read", sub_topic="suggestions", record_id=record_id, count=len(record.suggestions) + await telemetry_client.track_crud_records_suggestions( + action="read", record_id=record_id, count=len(record.suggestions) ) return Suggestions(items=record.suggestions) @@ -198,7 +198,7 @@ async def upsert_suggestion( suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - await telemetry_client.track_crud_records_subtopic(action=action, sub_topic="suggestions", record_id=record_id) + await telemetry_client.track_crud_records_suggestions(action=action, record_id=record_id) return suggestion @@ -239,9 +239,7 @@ async def delete_record_suggestions( await datasets.delete_suggestions(db, search_engine, record, suggestion_ids) - await telemetry_client.track_crud_records_subtopic( - action="delete", sub_topic="suggestions", record_id=record_id, count=num_suggestions - ) + await telemetry_client.track_crud_records_suggestions(action="delete", record_id=record_id, count=num_suggestions) @router.delete("/records/{record_id}", response_model=RecordSchema, response_model_exclude_unset=True) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index 5efd3c8b87..c2ad11202e 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -52,14 +52,12 @@ async def create_current_user_responses_bulk( responses_bulk_items = await use_case.execute(body.items, user=current_user) responses_bulk_items_filtered = [resp for resp in responses_bulk_items if resp.item] - await telemetry_client.track_crud_records_subtopic( - action="create", sub_topic="responses", record_id=None, count=len(responses_bulk_items_filtered) + await telemetry_client.track_crud_records_responses( + action="create", record_id=None, count=len(responses_bulk_items_filtered) ) for response in responses_bulk_items_filtered: if response.item: - await telemetry_client.track_crud_records_subtopic( - action="create", sub_topic="responses", record_id=response.item.record_id - ) + await telemetry_client.track_crud_records_responses(action="create", record_id=response.item.record_id) return ResponsesBulk(items=responses_bulk_items) @@ -86,9 +84,7 @@ async def update_response( response = await datasets.update_response(db, search_engine, response, response_update) - await telemetry_client.track_crud_records_subtopic( - action="update", sub_topic="responses", record_id=response.record_id - ) + await telemetry_client.track_crud_records_responses(action="update", record_id=response.record_id) return response @@ -114,8 +110,6 @@ async def delete_response( response = await datasets.delete_response(db, search_engine, response) - await telemetry_client.track_crud_records_subtopic( - action="delete", sub_topic="responses", record_id=response.record_id - ) + await telemetry_client.track_crud_records_responses(action="delete", record_id=response.record_id) return response diff --git a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py index 14c4bf55d5..212c7a3dbb 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py @@ -52,8 +52,6 @@ async def delete_suggestion( suggestion = await datasets.delete_suggestion(db, search_engine, suggestion) - await telemetry_client.track_crud_records_subtopic( - action="delete", sub_topic="suggestions", record_id=suggestion.record_id - ) + await telemetry_client.track_crud_records_suggestions(action="delete", record_id=suggestion.record_id) return suggestion diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 39dda99f1a..dab8284fbb 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -91,7 +91,7 @@ def _process_record_model(record: Record): } @staticmethod - def _process_dataset_settings(dataset: Dataset): + def _process_dataset_settings(dataset: Dataset) -> dict: user_data = {} if dataset.is_relationship_loaded("guidelines"): user_data["guidelines"] = True if getattr(dataset, "guidelines") else False @@ -103,7 +103,7 @@ def _process_dataset_settings(dataset: Dataset): if "min_submitted" in distribution: user_data["distribution_min_submitted"] = distribution["min_submitted"] - attributes = [ + attributes: list[str] = [ "fields", "questions", "vectors_settings", @@ -125,7 +125,7 @@ def _process_dataset_setting_settings( TermsMetadataPropertySettings, IntegerMetadataPropertySettings, ], - ): + ) -> dict: user_data = {"dataset_id": str(setting.dataset_id)} if isinstance(setting, (Field, Question)): user_data["required"] = setting.required @@ -241,14 +241,23 @@ async def track_crud_records( raise NotImplementedError("Expected element of `Dataset` or `Record`") await self.track_data(topic=topic, user_agent=user_agent, count=count) - async def track_crud_records_subtopic( + async def track_crud_records_responses( + self, + action: str, + record_id: str, + count: Union[int, None] = None, + ): + topic = f"dataset/records/responses/{action}" + user_agent = {"record_id": record_id} + await self.track_data(topic=topic, user_agent=user_agent, count=count) + + async def track_crud_records_suggestions( self, action: str, - sub_topic: str, record_id: str, count: Union[int, None] = None, ): - topic = f"dataset/records/{sub_topic}/{action}" + topic = f"dataset/records/suggestions/{action}" user_agent = {"record_id": record_id} await self.track_data(topic=topic, user_agent=user_agent, count=count) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_records.py b/argilla-server/tests/unit/api/handlers/v1/test_records.py index b44697f2bf..2d53476c9e 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_records.py @@ -1043,9 +1043,7 @@ async def test_create_record_response( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="create", sub_topic="responses", record_id=record.id - ) + test_telemetry.track_crud_records_responses.assert_called_with(action="create", record_id=record.id) test_telemetry.track_data.assert_called() @pytest.mark.parametrize( @@ -1290,8 +1288,8 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U ] } - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="read", sub_topic="suggestions", record_id=record.id, count=len(response.json()["items"]) + test_telemetry.track_crud_records_suggestions.assert_called_with( + action="read", record_id=record.id, count=len(response.json()["items"]) ) test_telemetry.track_data.assert_called() @@ -1344,9 +1342,7 @@ async def test_create_record_suggestion( assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 1 - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="create", sub_topic="suggestions", record_id=record.id - ) + test_telemetry.track_crud_records_suggestions.assert_called_with(action="create", record_id=record.id) test_telemetry.track_data.assert_called() async def test_create_record_suggestion_update( @@ -1532,9 +1528,8 @@ async def test_delete_record_suggestions( expected_calls = [call(suggestion) for suggestion in suggestions] mock_search_engine.delete_record_suggestion.assert_has_calls(expected_calls) - test_telemetry.track_crud_records_subtopic.assert_called_with( + test_telemetry.track_crud_records_suggestions.assert_called_with( action="delete", - sub_topic="suggestions", record_id=record.id, count=len(suggestions_ids) + len(random_uuids), ) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_responses.py b/argilla-server/tests/unit/api/handlers/v1/test_responses.py index 7295e87712..89d65a0f60 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_responses.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_responses.py @@ -109,9 +109,7 @@ async def test_update_response( mock_search_engine.update_record_response.assert_called_once_with(response) - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="update", sub_topic="responses", record_id=record.id - ) + test_telemetry.track_crud_records_responses.assert_called_with(action="update", record_id=record.id) test_telemetry.track_data.assert_called() async def test_update_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): @@ -438,9 +436,7 @@ async def test_delete_response( mock_search_engine.delete_record_response.assert_called_once_with(response) - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="delete", sub_topic="responses", record_id=response.record.id - ) + test_telemetry.track_crud_records_responses.assert_called_with(action="delete", record_id=response.record.id) test_telemetry.track_data.assert_called() async def test_delete_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py index 2507dd609b..d1e36dc18f 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py @@ -67,8 +67,8 @@ async def test_delete_suggestion( mock_search_engine.delete_record_suggestion.assert_called_once_with(suggestion) - test_telemetry.track_crud_records_subtopic.assert_called_with( - action="delete", sub_topic="suggestions", record_id=suggestion.record.id + test_telemetry.track_crud_records_suggestions.assert_called_with( + action="delete", record_id=suggestion.record.id ) test_telemetry.track_data.assert_called() From 93d46b1f3508955b687133e5449e061f9e5e72b0 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 27 Aug 2024 12:15:37 +0200 Subject: [PATCH 51/63] refactor: introduced track_error specific method --- argilla-server/pdm.lock | 13 ++++++++----- .../src/argilla_server/errors/error_handler.py | 9 +-------- argilla-server/src/argilla_server/telemetry.py | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/argilla-server/pdm.lock b/argilla-server/pdm.lock index d509b10d01..f52c3d30c7 100644 --- a/argilla-server/pdm.lock +++ b/argilla-server/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "postgresql", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:fc41d2f6356e480655b11ad3b6dc544bc19aaa84d8494aeb319771bc9dd54261" +content_hash = "sha256:8a73b464d000b58444fc97b9e7ba74d8449774921f2bba250a0f155a311dead4" [[metadata.targets]] requires_python = ">=3.8,<3.11" @@ -152,6 +152,7 @@ version = "4.0.3" requires_python = ">=3.7" summary = "Timeout context manager for asyncio programs" groups = ["default", "postgresql", "test"] +marker = "python_version < \"3.12.0\"" dependencies = [ "typing-extensions>=3.6.5; python_version < \"3.8\"", ] @@ -777,6 +778,7 @@ version = "1.2.0" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" groups = ["default", "test"] +marker = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, @@ -832,7 +834,7 @@ name = "filelock" version = "3.13.1" requires_python = ">=3.8" summary = "A platform independent file lock." -groups = ["test"] +groups = ["default", "test"] files = [ {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, @@ -899,7 +901,7 @@ name = "fsspec" version = "2023.10.0" requires_python = ">=3.8" summary = "File-system specification" -groups = ["test"] +groups = ["default", "test"] files = [ {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"}, {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"}, @@ -1044,7 +1046,7 @@ name = "huggingface-hub" version = "0.20.2" requires_python = ">=3.8.0" summary = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -groups = ["test"] +groups = ["default", "test"] dependencies = [ "filelock", "fsspec>=2023.5.0", @@ -2377,6 +2379,7 @@ version = "2.0.1" requires_python = ">=3.7" summary = "A lil' TOML parser" groups = ["test"] +marker = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -2387,7 +2390,7 @@ name = "tqdm" version = "4.66.1" requires_python = ">=3.7" summary = "Fast, Extensible Progress Meter" -groups = ["test"] +groups = ["default", "test"] dependencies = [ "colorama; platform_system == \"Windows\"", ] diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index b48c0a3015..a2161bb7fb 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -53,14 +53,7 @@ def __init__(self, error: ServerError): class APIErrorHandler: @classmethod async def track_error(cls, error: ServerError, request: Request): - user_agent = { - "code": error.code, - "user-agent": request.headers.get("user-agent"), - "accept-language": request.headers.get("accept-language"), - "type": error.__class__.__name__, - } - - await get_telemetry_client().track_data(topic="error/server", user_agent=user_agent) + await get_telemetry_client().track_error(error=error, request=request) @classmethod async def common_exception_handler(cls, request: Request, error: Exception): diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index dab8284fbb..7e2b72d672 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -23,6 +23,9 @@ from argilla_server._version import __version__ from argilla_server.constants import DEFAULT_USERNAME +from argilla_server.errors.base_errors import ( + ServerError, +) from argilla_server.models import ( Dataset, Field, @@ -261,6 +264,17 @@ async def track_crud_records_suggestions( user_agent = {"record_id": record_id} await self.track_data(topic=topic, user_agent=user_agent, count=count) + async def track_error(self, error: ServerError, request: Request): + topic = "error/server" + user_agent = { + "code": error.code, + "user-agent": request.headers.get("user-agent"), + "accept-language": request.headers.get("accept-language"), + "type": error.__class__.__name__, + } + + await self.track_data(topic=topic, user_agent=user_agent) + _TELEMETRY_CLIENT = TelemetryClient() From f5901b9043f2644bab3f15c415b5c8e526a055fe Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 27 Aug 2024 12:55:14 +0200 Subject: [PATCH 52/63] refactor: name search operation like "search" rafactor: add "me" to user operations refactor: add "list" to like-like operations --- .../api/handlers/v1/datasets/datasets.py | 13 ++++---- .../api/handlers/v1/datasets/questions.py | 2 +- .../api/handlers/v1/datasets/records.py | 30 ++++++++++++------- .../api/handlers/v1/responses.py | 2 +- .../argilla_server/api/handlers/v1/users.py | 2 +- .../api/handlers/v1/workspaces.py | 4 +-- .../src/argilla_server/telemetry.py | 6 ++-- 7 files changed, 37 insertions(+), 22 deletions(-) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index f64e53d632..e3ac2a9041 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -22,7 +22,6 @@ from argilla_server.api.policies.v1 import DatasetPolicy, MetadataPropertyPolicy, authorize, is_authorized from argilla_server.api.schemas.v1.datasets import ( Dataset as DatasetSchema, - UsersProgress, ) from argilla_server.api.schemas.v1.datasets import ( DatasetCreate, @@ -30,6 +29,7 @@ DatasetProgress, Datasets, DatasetUpdate, + UsersProgress, ) from argilla_server.api.schemas.v1.fields import Field, FieldCreate, Fields from argilla_server.api.schemas.v1.metadata_properties import ( @@ -71,6 +71,7 @@ async def _filter_metadata_properties_by_policy( async def list_current_user_datasets( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), workspace_id: Optional[UUID] = None, current_user: User = Security(auth.get_current_user), ): @@ -85,6 +86,8 @@ async def list_current_user_datasets( else: dataset_list = await datasets.list_datasets_by_workspace_id(db, workspace_id) + await telemetry_client.track_crud_dataset(action="me/list", dataset=None, count=len(dataset_list)) + return Datasets(items=dataset_list) @@ -105,7 +108,7 @@ async def list_dataset_fields( action="read", dataset=dataset, setting_name="fields", setting=field ) await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="fields", count=len(dataset.fields) + action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) ) return Fields(items=dataset.fields) @@ -128,7 +131,7 @@ async def list_dataset_vector_settings( action="read", dataset=dataset, setting_name="vectors_settings", setting=vectors_setting ) await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) + action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) ) return VectorsSettings(items=dataset.vectors_settings) @@ -152,10 +155,10 @@ async def list_current_user_dataset_metadata_properties( for metadata_property in filtered_metadata_properties: await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="metadata_properties", setting=metadata_property + action="read", dataset=dataset, setting_name="me/metadata_properties", setting=metadata_property ) await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="metadata_properties", count=len(filtered_metadata_properties) + action="list", dataset=dataset, setting_name="me/metadata_properties", count=len(filtered_metadata_properties) ) return MetadataProperties(items=filtered_metadata_properties) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index d3921a5553..90c56b894c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -47,7 +47,7 @@ async def list_dataset_questions( action="read", dataset=dataset, setting_name="questions", setting=question ) await telemetry_client.track_crud_dataset_setting( - action="read", setting_name="questions", dataset=dataset, count=len(dataset.questions) + action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) ) return Questions(items=dataset.questions) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index 863f7bff59..35f21bdae1 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re from typing import Any, Dict, List, Optional, Tuple, Union from uuid import UUID @@ -46,7 +45,7 @@ ) from argilla_server.contexts import datasets, search from argilla_server.database import get_async_db -from argilla_server.enums import RecordSortField, ResponseStatusFilter, SortOrder +from argilla_server.enums import RecordSortField, ResponseStatusFilter from argilla_server.errors.future import MissingVectorError, NotFoundError, UnprocessableEntityError from argilla_server.errors.future.base_errors import MISSING_VECTOR_ERROR_CODE from argilla_server.models import Dataset, Field, Record, User, VectorSettings @@ -268,7 +267,7 @@ async def list_dataset_records( include=include, ) - await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=len(records)) + await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) return Records(items=records, total=total) @@ -364,13 +363,17 @@ async def search_current_user_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=search_responses.total) - - return SearchRecordsResult( + searc_record_results = SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, ) + await telemetry_client.track_crud_records( + action="me/search", record_or_dataset=dataset, count=search_responses.total + ) + + return searc_record_results + @router.post( "/datasets/{dataset_id}/records/search", @@ -423,13 +426,15 @@ async def search_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - await telemetry_client.track_crud_records(action="read", record_or_dataset=dataset, count=search_responses.total) - - return SearchRecordsResult( + search_record_results = SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, ) + await telemetry_client.track_crud_records(action="search", record_or_dataset=dataset, count=search_responses.total) + + return search_record_results + @router.get( "/datasets/{dataset_id}/records/search/suggestions/options", @@ -439,6 +444,7 @@ async def search_dataset_records( async def list_dataset_records_search_suggestions_options( *, db: AsyncSession = Depends(get_async_db), + telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -448,7 +454,7 @@ async def list_dataset_records_search_suggestions_options( suggestion_agents_by_question = await search.get_dataset_suggestion_agents_by_question(db, dataset.id) - return SearchSuggestionsOptions( + search_suggestion_options = SearchSuggestionsOptions( items=[ SearchSuggestionOptions( question=SearchSuggestionOptionsQuestion(id=sa["question_id"], name=sa["question_name"]), @@ -458,6 +464,10 @@ async def list_dataset_records_search_suggestions_options( ] ) + await telemetry_client.track_crud_records_suggestions(action="search") + + return search_suggestion_options + async def _filter_record_metadata_for_user(record: Record, user: User) -> Optional[Dict[str, Any]]: if record.metadata_ is None: diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index c2ad11202e..4ce9d6cfb6 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -57,7 +57,7 @@ async def create_current_user_responses_bulk( ) for response in responses_bulk_items_filtered: if response.item: - await telemetry_client.track_crud_records_responses(action="create", record_id=response.item.record_id) + await telemetry_client.track_crud_records_responses(action="me/create", record_id=response.item.record_id) return ResponsesBulk(items=responses_bulk_items) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 9acfbb0e6d..ab819e7599 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -69,7 +69,7 @@ async def list_users( users = await accounts.list_users(db) - await telemetry_client.track_crud_user(action="read", user=None, is_oauth=False, count=len(users)) + await telemetry_client.track_crud_user(action="list", user=None, is_oauth=False, count=len(users)) for user in users: await telemetry_client.track_crud_user(action="read", user=user, is_oauth=False) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index 44c370a2a1..b2885e4678 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -106,8 +106,8 @@ async def list_workspaces_me( workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) for workspace in workspaces: - await telemetry_client.track_crud_workspace(action="read", workspace=workspace) - await telemetry_client.track_crud_workspace(action="read", workspace=None, count=len(workspaces)) + await telemetry_client.track_crud_workspace(action="me/read", workspace=workspace) + await telemetry_client.track_crud_workspace(action="me/list", workspace=None, count=len(workspaces)) return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 7e2b72d672..942060a6b3 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -257,11 +257,13 @@ async def track_crud_records_responses( async def track_crud_records_suggestions( self, action: str, - record_id: str, + record_id: Union[str, None] = None, count: Union[int, None] = None, ): topic = f"dataset/records/suggestions/{action}" - user_agent = {"record_id": record_id} + user_agent = {} + if record_id: + user_agent["record_id"] = record_id await self.track_data(topic=topic, user_agent=user_agent, count=count) async def track_error(self, error: ServerError, request: Request): From f0019ccecd5447a3df00a4364f8939195cb2d910 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Mon, 2 Sep 2024 11:46:30 +0200 Subject: [PATCH 53/63] [FEAT] argilla server: add basic endpoints telemetry support (#5437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds a middleware component to track the API endpoint's usage. **Type of change** - New feature (non-breaking change which adds functionality) - Refactor (change restructuring the codebase without changing functionality) - Improvement (change adding some improvement to an existing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: José Francisco Calvo --- argilla-server/src/argilla_server/_app.py | 29 ++++-- .../api/errors/v1/exception_handlers.py | 36 +++++++ .../argilla_server/errors/error_handler.py | 3 +- .../src/argilla_server/telemetry.py | 66 ++++++++++--- .../src/argilla_server/utils/_fastapi.py | 48 ++++++++++ .../unit/api/handlers/v1/test_datasets.py | 3 + .../tests/unit/commons/test_telemetry.py | 93 ++++++++++++++++++- .../tests/unit/errors/test_api_errors.py | 1 + .../tests/unit/test_api_telemetry.py | 54 +++++++++++ argilla-server/tests/unit/utils/__init__.py | 14 +++ .../tests/unit/utils/test_fastapi_utils.py | 78 ++++++++++++++++ 11 files changed, 404 insertions(+), 21 deletions(-) create mode 100644 argilla-server/src/argilla_server/utils/_fastapi.py create mode 100644 argilla-server/tests/unit/test_api_telemetry.py create mode 100644 argilla-server/tests/unit/utils/__init__.py create mode 100644 argilla-server/tests/unit/utils/test_fastapi_utils.py diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 425d4081e2..0279919dad 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -40,6 +40,8 @@ from argilla_server.search_engine import get_search_engine from argilla_server.settings import settings from argilla_server.static_rewrite import RewriteStaticFiles +from argilla_server.telemetry import get_telemetry_client +from argilla_server.utils._fastapi import resolve_endpoint_path_for_request _LOGGER = logging.getLogger("argilla") @@ -67,8 +69,9 @@ def create_server_app() -> FastAPI: ) configure_logging() - configure_middleware(app) + configure_common_middleware(app) configure_api_router(app) + configure_telemetry(app) configure_app_statics(app) configure_api_docs(app) @@ -92,7 +95,7 @@ async def redirect_api(): return RedirectResponse(url=f"{settings.base_url}api/v1/docs") -def configure_middleware(app: FastAPI): +def configure_common_middleware(app: FastAPI): """Configures fastapi middleware""" @app.middleware("http") @@ -121,6 +124,24 @@ def configure_api_router(app: FastAPI): app.mount("/api/v1", api_v1) +def configure_telemetry(app: FastAPI): + """ + Configures telemetry middleware for the app if telemetry is enabled + """ + if not settings.enable_telemetry: + return + + @app.middleware("http") + async def track_api_requests(request: Request, call_next): + response = await call_next(request) + try: + await get_telemetry_client().track_api_request(request, response) + except Exception as e: + _LOGGER.warning(f"Error tracking request: {e}") + finally: + return response + + def configure_app_statics(app: FastAPI): """Configure static folder for app""" @@ -178,10 +199,6 @@ def show_telemetry_warning(): ) _LOGGER.warning(message) - message += "\n\n " - message += "#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1" - message += "\n" - async def _create_oauth_allowed_workspaces(db: AsyncSession): from argilla_server.security.settings import settings as security_settings diff --git a/argilla-server/src/argilla_server/api/errors/v1/exception_handlers.py b/argilla-server/src/argilla_server/api/errors/v1/exception_handlers.py index c3bf0f4633..eee244af09 100644 --- a/argilla-server/src/argilla_server/api/errors/v1/exception_handlers.py +++ b/argilla-server/src/argilla_server/api/errors/v1/exception_handlers.py @@ -11,16 +11,46 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional from fastapi import FastAPI, status from fastapi.responses import JSONResponse +from fastapi import Request import argilla_server.errors.future as errors +def set_request_error(request: Request, error: Exception) -> None: + """ + Store the error in the request scope for further processing (Telemetry, etc) + + Parameters: + error (Exception): The error to store + request (Request): The request to store the error in + + """ + + request.state.error = error + + +def get_request_error(request: Request) -> Optional[Exception]: + """ + Get the error stored in the request scope + + Parameters: + request (Request): The request to get the error from + + Returns: + Optional[Exception]: The error stored in the request scope, or None if no error is stored + """ + + return getattr(request.state, "error", None) + + def add_exception_handlers(app: FastAPI): @app.exception_handler(errors.AuthenticationError) async def authentication_error(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_401_UNAUTHORIZED, # TODO: Once we move to v2.0 we can remove the content using detail attribute @@ -31,6 +61,7 @@ async def authentication_error(request, exc): @app.exception_handler(errors.NotFoundError) async def not_found_error_exception_handler(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_404_NOT_FOUND, # TODO: Once we move to v2.0 we can remove the content using detail attribute @@ -41,6 +72,7 @@ async def not_found_error_exception_handler(request, exc): @app.exception_handler(errors.NotUniqueError) async def not_unique_error_exception_handler(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_409_CONFLICT, # TODO: Once we move to v2.0 we can remove the content using detail attribute @@ -51,6 +83,7 @@ async def not_unique_error_exception_handler(request, exc): @app.exception_handler(errors.UnprocessableEntityError) async def unprocessable_entity_error_exception_handler(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, # TODO: Once we move to v2.0 we can remove the content using detail attribute @@ -65,6 +98,7 @@ async def unprocessable_entity_error_exception_handler(request, exc): # This exception handler should be removed once we move to v2.0 and we use UnprocessableEntityError. @app.exception_handler(ValueError) async def value_error_exception_handler(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, content={"detail": str(exc)}, @@ -73,6 +107,7 @@ async def value_error_exception_handler(request, exc): # TODO: Once we move to v2.0 we can remove this exception handler and use UnprocessableEntityError @app.exception_handler(errors.MissingVectorError) async def missing_vector_error_exception_handler(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, content={"code": exc.code, "message": exc.message}, @@ -81,6 +116,7 @@ async def missing_vector_error_exception_handler(request, exc): # TODO: Once we move to v2.0 we can remove this exception handler and use UnprocessableEntityError @app.exception_handler(errors.UpdateDistributionWithExistingResponsesError) async def update_distribution_with_existing_responses(request, exc): + set_request_error(request, exc) return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, content={"code": exc.code, "message": exc.message}, diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index a2161bb7fb..0bc76f7425 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -18,6 +18,7 @@ from fastapi.exception_handlers import http_exception_handler from fastapi.exceptions import RequestValidationError +from argilla_server.api.errors.v1.exception_handlers import set_request_error from argilla_server.errors.base_errors import ( BadRequestError, ClosedDatasetError, @@ -59,7 +60,7 @@ async def track_error(cls, error: ServerError, request: Request): async def common_exception_handler(cls, request: Request, error: Exception): """Wraps errors as custom generic error""" argilla_error = cls._exception_to_argilla_error(error) - await cls.track_error(argilla_error, request=request) + set_request_error(request, argilla_error) return await http_exception_handler(request, ServerHTTPException(argilla_error)) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 942060a6b3..75d6027c59 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -18,10 +18,11 @@ import platform from typing import Union -from fastapi import Request +from fastapi import Request, Response from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ +from argilla_server.api.errors.v1.exception_handlers import get_request_error from argilla_server.constants import DEFAULT_USERNAME from argilla_server.errors.base_errors import ( ServerError, @@ -41,6 +42,7 @@ Workspace, ) from argilla_server.settings import settings +from argilla_server.utils._fastapi import resolve_endpoint_path_for_request from argilla_server.utils._telemetry import ( is_running_on_docker_container, server_deployment_type, @@ -151,10 +153,11 @@ def _process_dataset_setting_settings( return user_data - async def track_data(self, topic: str, user_agent: dict, include_system_info: bool = True, count: int = 1): - library_name = "argilla" + async def track_data(self, topic: str, data: dict, include_system_info: bool = True, count: int = 1): + library_name = "argilla/server" topic = f"{library_name}/{topic}" + user_agent = {**data} if include_system_info: user_agent.update(self._system_info) if count is not None: @@ -162,11 +165,50 @@ async def track_data(self, topic: str, user_agent: dict, include_system_info: bo send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) + async def track_api_request(self, request: Request, response: Response) -> None: + """ + Track the endpoint usage. This method is called after the endpoint is processed. + The method will track the endpoint usage, the user-agent, and the response status code. If an error is raised + during the endpoint processing, the error will be tracked as well. + + Parameters: + request (Request): The incoming request + response (Response): The outgoing response + + """ + + endpoint_path = resolve_endpoint_path_for_request(request) + if endpoint_path is None: + return + + topic = f"endpoints" + + data = { + "endpoint": f"{request.method} {endpoint_path}", + "request.user-agent": request.headers.get("user-agent"), + "request.method": request.method, + "request.accept-language": request.headers.get("accept-language"), + "response.status": str(response.status_code), + } + + if "Server-Timing" in response.headers: + duration_in_ms = response.headers["Server-Timing"] + duration_in_ms = duration_in_ms.removeprefix("total;dur=") + + data["duration_in_milliseconds"] = duration_in_ms + + if response.status_code >= 400: + argilla_error: Exception = get_request_error(request=request) + if argilla_error: + data["response.error_code"] = argilla_error.code # noqa + + await self.track_data(topic=topic, data=data) + async def track_user_login(self, request: Request, user: User): topic = "user/login" user_agent = self._process_user_model(user=user) user_agent.update(**self._process_request_info(request)) - await self.track_data(topic=topic, user_agent=user_agent) + await self.track_data(topic=topic, data=user_agent) async def track_crud_user( self, @@ -184,7 +226,7 @@ async def track_crud_user( user_agent["is_oauth"] = is_oauth if is_login is not None: user_agent["is_login"] = is_login - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_crud_workspace( self, action: str, workspace: Union[Workspace, None] = None, count: Union[int, None] = None @@ -193,7 +235,7 @@ async def track_crud_workspace( user_agent = {} if workspace: user_agent.update(self._process_workspace_model(workspace=workspace)) - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_crud_dataset( self, action: str, dataset: Union[Dataset, None] = None, count: Union[int, None] = None @@ -203,7 +245,7 @@ async def track_crud_dataset( if dataset: user_agent.update(self._process_dataset_model(dataset=dataset)) user_agent.update(self._process_dataset_settings(dataset=dataset)) - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) attributes: list[str] = ["fields", "questions", "vectors_settings", "metadata_properties"] if dataset: @@ -230,7 +272,7 @@ async def track_crud_dataset_setting( user_agent = self._process_dataset_model(dataset=dataset) if setting: user_agent.update(self._process_dataset_setting_settings(setting=setting)) - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_crud_records( self, action: str, record_or_dataset: Union[Record, Dataset, None] = None, count: Union[int, None] = None @@ -242,7 +284,7 @@ async def track_crud_records( user_agent = self._process_dataset_model(dataset=record_or_dataset) else: raise NotImplementedError("Expected element of `Dataset` or `Record`") - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_crud_records_responses( self, @@ -252,7 +294,7 @@ async def track_crud_records_responses( ): topic = f"dataset/records/responses/{action}" user_agent = {"record_id": record_id} - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_crud_records_suggestions( self, @@ -264,7 +306,7 @@ async def track_crud_records_suggestions( user_agent = {} if record_id: user_agent["record_id"] = record_id - await self.track_data(topic=topic, user_agent=user_agent, count=count) + await self.track_data(topic=topic, data=user_agent, count=count) async def track_error(self, error: ServerError, request: Request): topic = "error/server" @@ -275,7 +317,7 @@ async def track_error(self, error: ServerError, request: Request): "type": error.__class__.__name__, } - await self.track_data(topic=topic, user_agent=user_agent) + await self.track_data(topic=topic, data=user_agent) _TELEMETRY_CLIENT = TelemetryClient() diff --git a/argilla-server/src/argilla_server/utils/_fastapi.py b/argilla-server/src/argilla_server/utils/_fastapi.py new file mode 100644 index 0000000000..5e29c1703a --- /dev/null +++ b/argilla-server/src/argilla_server/utils/_fastapi.py @@ -0,0 +1,48 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, List + +from fastapi import Request +from starlette.routing import Route, Mount + + +def resolve_endpoint_path_for_request(request: Request) -> Optional[str]: + """ + Resolves the configured route endpoint path for the incoming request + + Parameters: + request (Request): The incoming request + + Returns: + The route path for the incoming request. None if the route path cannot be resolved. + """ + + all_routes = request.scope.get("router").routes or [] + + for route in all_routes: + parent = None + routes: List[Route] = [route] + + if isinstance(route, Mount): + parent = route + routes = [route for route in route.routes if isinstance(route, Route)] + + for route in routes: + if route.endpoint == request.scope.get("endpoint"): + route_path = route.path + if parent: + route_path = f"{parent.path}{route_path}" + + return route_path diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 1e2a17c9e2..761efd5a7c 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -209,6 +209,7 @@ async def test_list_current_user_datasets_by_workspace_id( response_body = response.json() assert [dataset["name"] for dataset in response_body["items"]] == ["dataset-a"] + @pytest.mark.skip async def test_list_dataset_fields( self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock ): @@ -309,6 +310,7 @@ async def test_list_dataset_fields_with_nonexistent_dataset_id( assert response.status_code == 404 assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} + @pytest.mark.skip async def test_list_dataset_questions( self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock ): @@ -636,6 +638,7 @@ async def test_list_current_user_dataset_metadata_properties_with_nonexistent_da assert response.status_code == 404 assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} + @pytest.mark.skip @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_list_dataset_vectors_settings( self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 8863b0ea8e..b5d140580c 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -17,13 +17,18 @@ from unittest.mock import MagicMock import pytest +from fastapi import Request, APIRouter +from fastapi.routing import APIRoute +from pytest_mock import mocker, MockerFixture +from starlette.responses import JSONResponse + +from argilla_server.api.errors.v1.exception_handlers import set_request_error +from argilla_server.errors import ServerError from argilla_server.models import ( Record, User, ) from argilla_server.telemetry import TelemetryClient -from fastapi import Request - from tests.factories import ( DatasetFactory, IntegerMetadataPropertyFactory, @@ -54,6 +59,90 @@ async def test_disable_telemetry(self): assert telemetry_client.enable_telemetry == False + async def test_track_api_request(self, test_telemetry: TelemetryClient, mocker: MockerFixture): + mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + + request = Request( + scope={ + "type": "http", + "path": "/api/test/endpoint", + "headers": [ + (b"accept-language", b"en-US"), + (b"user-agent", b"test"), + ], + "method": "GET", + } + ) + response = JSONResponse(content={"test": "test"}, status_code=201, headers={"Server-Timing": "total;dur=50"}) + await test_telemetry.track_api_request(request=request, response=response) + + test_telemetry.track_data.assert_called_once_with( + topic="endpoints", + data={ + "endpoint": "GET /api/test/endpoint", + "request.method": "GET", + "request.user-agent": "test", + "request.accept-language": "en-US", + "response.status": "201", + "duration_in_milliseconds": "50", + }, + ) + + async def test_track_api_request_call_with_error(self, test_telemetry: TelemetryClient, mocker: MockerFixture): + mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + + request = Request( + scope={ + "type": "http", + "path": "/api/test/endpoint", + "headers": {}, + "method": "POST", + } + ) + response = JSONResponse(content={"test": "test"}, status_code=500) + await test_telemetry.track_api_request(request=request, response=response) + + test_telemetry.track_data.assert_called_once_with( + topic="endpoints", + data={ + "endpoint": "POST /api/test/endpoint", + "request.method": "POST", + "request.user-agent": None, + "request.accept-language": None, + "response.status": "500", + }, + ) + + async def test_track_api_request_call_with_error_and_exception( + self, test_telemetry: TelemetryClient, mocker: MockerFixture + ): + mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + + request = Request( + scope={ + "type": "http", + "path": "/api/test/endpoint", + "headers": {}, + "method": "POST", + } + ) + response = JSONResponse(content={"test": "test"}, status_code=500) + set_request_error(request, ServerError("Test exception")) + + await test_telemetry.track_api_request(request=request, response=response) + + test_telemetry.track_data.assert_called_once_with( + topic="endpoints", + data={ + "endpoint": "POST /api/test/endpoint", + "request.method": "POST", + "request.user-agent": None, + "request.accept-language": None, + "response.status": "500", + "response.error_code": "argilla.api.errors::ServerError", + }, + ) + async def test_track_user_login(self, test_telemetry: MagicMock): user = User(id=uuid.uuid4(), username="argilla") await test_telemetry.track_user_login(request=mock_request, user=user) diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py index 5daba73d21..b85d7791f7 100644 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ b/argilla-server/tests/unit/errors/test_api_errors.py @@ -30,6 +30,7 @@ @pytest.mark.asyncio class TestAPIErrorHandler: + @pytest.mark.skip @pytest.mark.asyncio @pytest.mark.parametrize( ["error", "expected_event"], diff --git a/argilla-server/tests/unit/test_api_telemetry.py b/argilla-server/tests/unit/test_api_telemetry.py new file mode 100644 index 0000000000..9f10aa14ae --- /dev/null +++ b/argilla-server/tests/unit/test_api_telemetry.py @@ -0,0 +1,54 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from unittest import mock +from unittest.mock import MagicMock, ANY + +import pytest +from starlette.testclient import TestClient + +from argilla_server._app import create_server_app +from argilla_server.settings import settings +from argilla_server.telemetry import TelemetryClient + + +class TestAPITelemetry: + def test_track_api_request_call(self, test_telemetry: TelemetryClient): + settings.enable_telemetry = True # Forcing telemetry to be enabled for this test + + client = TestClient(create_server_app()) + + client.get("/api/v1/version") + + test_telemetry.track_api_request.assert_called_once() + + def test_track_api_request_call_on_error(self, test_telemetry: TelemetryClient): + settings.enable_telemetry = True + + client = TestClient(create_server_app()) + + response = client.post("/api/v1/datasets") + assert response.status_code == 401 + + test_telemetry.track_api_request.assert_called_once() + + def test_not_track_api_request_call_when_disabled_telemetry(self, test_telemetry: TelemetryClient): + settings.enable_telemetry = False + + client = TestClient(create_server_app()) + + client.get("/api/v1/version") + + test_telemetry.track_api_request.assert_not_called() diff --git a/argilla-server/tests/unit/utils/__init__.py b/argilla-server/tests/unit/utils/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/utils/test_fastapi_utils.py b/argilla-server/tests/unit/utils/test_fastapi_utils.py new file mode 100644 index 0000000000..22912a02bc --- /dev/null +++ b/argilla-server/tests/unit/utils/test_fastapi_utils.py @@ -0,0 +1,78 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fastapi import Request, APIRouter +from fastapi.routing import APIRoute +from starlette.routing import Mount + +from argilla_server.utils._fastapi import resolve_endpoint_path_for_request + + +def mock_endpoint(*args, **kwargs): + pass + + +class TestFastAPIUTils: + def test_resolve_endpoint_path_for_request(self): + request = Request( + scope={ + "type": "http", + "router": APIRouter(routes=[APIRoute(path="/api/endpoint", endpoint=mock_endpoint)]), + "endpoint": mock_endpoint, + } + ) + + endpoint_path = resolve_endpoint_path_for_request(request) + assert endpoint_path == "/api/endpoint" + + def test_resolve_endpoint_path_for_request_with_mount(self): + request = Request( + scope={ + "type": "http", + "router": APIRouter( + routes=[Mount(path="/api", routes=[APIRoute(path="/endpoint", endpoint=mock_endpoint)])], + ), + "endpoint": mock_endpoint, + } + ) + + endpoint_path = resolve_endpoint_path_for_request(request) + assert endpoint_path == "/api/endpoint" + + def test_resolve_endpoint_path_for_request_with_different_endpoint(self): + request = Request( + scope={ + "type": "http", + "router": APIRouter( + routes=[APIRoute(path="/api/endpoint", endpoint=mock_endpoint)], + ), + "endpoint": lambda x: x, + } + ) + + endpoint_path = resolve_endpoint_path_for_request(request) + assert endpoint_path is None + + def test_resolve_endpoint_path_for_request_with_missing_endpoint(self): + request = Request( + scope={ + "type": "http", + "router": APIRouter( + routes=[APIRoute(path="/api/endpoint", endpoint=mock_endpoint)], + ), + } + ) + + endpoint_path = resolve_endpoint_path_for_request(request) + assert endpoint_path is None From 8ce3621b9a643aff8a661d862020fc54d923d565 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 2 Sep 2024 14:05:30 +0200 Subject: [PATCH 54/63] chore: Remove all non-general endpoint telemetry related-code --- argilla-server/src/argilla_server/_app.py | 1 - .../api/handlers/v1/datasets/datasets.py | 104 +-------- .../api/handlers/v1/datasets/questions.py | 23 +- .../api/handlers/v1/datasets/records.py | 28 +-- .../api/handlers/v1/datasets/records_bulk.py | 5 - .../argilla_server/api/handlers/v1/fields.py | 19 +- .../api/handlers/v1/metadata_properties.py | 25 +- .../argilla_server/api/handlers/v1/oauth2.py | 4 +- .../api/handlers/v1/questions.py | 19 +- .../argilla_server/api/handlers/v1/records.py | 42 +--- .../api/handlers/v1/responses.py | 26 +-- .../api/handlers/v1/suggestions.py | 8 +- .../argilla_server/api/handlers/v1/users.py | 27 +-- .../api/handlers/v1/vectors_settings.py | 19 +- .../api/handlers/v1/workspaces.py | 27 +-- .../argilla_server/errors/error_handler.py | 5 - .../src/argilla_server/telemetry.py | 218 ------------------ .../unit/api/handlers/v1/test_datasets.py | 99 +------- .../tests/unit/api/handlers/v1/test_fields.py | 23 +- .../handlers/v1/test_list_dataset_records.py | 13 +- .../handlers/v1/test_metadata_properties.py | 25 +- .../unit/api/handlers/v1/test_questions.py | 16 +- .../unit/api/handlers/v1/test_records.py | 61 +---- .../unit/api/handlers/v1/test_responses.py | 15 +- .../unit/api/handlers/v1/test_suggestions.py | 13 +- .../api/handlers/v1/test_vectors_settings.py | 15 +- .../unit/api/handlers/v1/test_workspaces.py | 13 +- .../tests/unit/commons/test_telemetry.py | 85 ------- argilla-server/tests/unit/conftest.py | 5 - .../tests/unit/errors/test_api_errors.py | 87 ------- 30 files changed, 66 insertions(+), 1004 deletions(-) delete mode 100644 argilla-server/tests/unit/errors/test_api_errors.py diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index 0279919dad..a980586d27 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -41,7 +41,6 @@ from argilla_server.settings import settings from argilla_server.static_rewrite import RewriteStaticFiles from argilla_server.telemetry import get_telemetry_client -from argilla_server.utils._fastapi import resolve_endpoint_path_for_request _LOGGER = logging.getLogger("argilla") diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index e3ac2a9041..a52fb4e6ee 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -71,7 +71,6 @@ async def _filter_metadata_properties_by_policy( async def list_current_user_datasets( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), workspace_id: Optional[UUID] = None, current_user: User = Security(auth.get_current_user), ): @@ -86,64 +85,34 @@ async def list_current_user_datasets( else: dataset_list = await datasets.list_datasets_by_workspace_id(db, workspace_id) - await telemetry_client.track_crud_dataset(action="me/list", dataset=None, count=len(dataset_list)) - return Datasets(items=dataset_list) @router.get("/datasets/{dataset_id}/fields", response_model=Fields) async def list_dataset_fields( - *, - db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, - current_user: User = Security(auth.get_current_user), + *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.fields)]) await authorize(current_user, DatasetPolicy.get(dataset)) - for field in dataset.fields: - await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="fields", setting=field - ) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="fields", count=len(dataset.fields) - ) - return Fields(items=dataset.fields) @router.get("/datasets/{dataset_id}/vectors-settings", response_model=VectorsSettings) async def list_dataset_vector_settings( - *, - db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, - current_user: User = Security(auth.get_current_user), + *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.vectors_settings)]) await authorize(current_user, DatasetPolicy.get(dataset)) - for vectors_setting in dataset.vectors_settings: - await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="vectors_settings", setting=vectors_setting - ) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="vectors_settings", count=len(dataset.vectors_settings) - ) - return VectorsSettings(items=dataset.vectors_settings) @router.get("/me/datasets/{dataset_id}/metadata-properties", response_model=MetadataProperties) async def list_current_user_dataset_metadata_properties( - *, - db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, - current_user: User = Security(auth.get_current_user), + *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.metadata_properties)]) @@ -153,31 +122,17 @@ async def list_current_user_dataset_metadata_properties( current_user, dataset.metadata_properties ) - for metadata_property in filtered_metadata_properties: - await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="me/metadata_properties", setting=metadata_property - ) - await telemetry_client.track_crud_dataset_setting( - action="list", dataset=dataset, setting_name="me/metadata_properties", count=len(filtered_metadata_properties) - ) - return MetadataProperties(items=filtered_metadata_properties) @router.get("/datasets/{dataset_id}", response_model=DatasetSchema) async def get_dataset( - *, - db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, - current_user: User = Security(auth.get_current_user), + *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) ): dataset = await Dataset.get_or_raise(db, dataset_id) await authorize(current_user, DatasetPolicy.get(dataset)) - await telemetry_client.track_crud_dataset(action="read", dataset=dataset) - return dataset @@ -229,24 +184,18 @@ async def get_dataset_users_progress( async def create_dataset( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_create: DatasetCreate, current_user: User = Security(auth.get_current_user), ): await authorize(current_user, DatasetPolicy.create(dataset_create.workspace_id)) - dataset = await datasets.create_dataset(db, dataset_create.dict()) - - await telemetry_client.track_crud_dataset(action="create", dataset=dataset) - - return dataset + return await datasets.create_dataset(db, dataset_create.dict()) @router.post("/datasets/{dataset_id}/fields", status_code=status.HTTP_201_CREATED, response_model=Field) async def create_dataset_field( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, field_create: FieldCreate, current_user: User = Security(auth.get_current_user), @@ -255,13 +204,7 @@ async def create_dataset_field( await authorize(current_user, DatasetPolicy.create_field(dataset)) - field = await datasets.create_field(db, dataset, field_create) - - await telemetry_client.track_crud_dataset_setting( - action="create", setting_name="fields", dataset=dataset, setting=field - ) - - return field + return await datasets.create_field(db, dataset, field_create) @router.post( @@ -271,7 +214,6 @@ async def create_dataset_metadata_property( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, metadata_property_create: MetadataPropertyCreate, current_user: User = Security(auth.get_current_user), @@ -280,13 +222,7 @@ async def create_dataset_metadata_property( await authorize(current_user, DatasetPolicy.create_metadata_property(dataset)) - metadata_property = await datasets.create_metadata_property(db, search_engine, dataset, metadata_property_create) - - await telemetry_client.track_crud_dataset_setting( - action="create", setting_name="metadata_properties", dataset=dataset, setting=metadata_property - ) - - return metadata_property + return await datasets.create_metadata_property(db, search_engine, dataset, metadata_property_create) @router.post( @@ -296,7 +232,6 @@ async def create_dataset_vector_settings( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, vector_settings_create: VectorSettingsCreate, current_user: User = Security(auth.get_current_user), @@ -305,13 +240,7 @@ async def create_dataset_vector_settings( await authorize(current_user, DatasetPolicy.create_vector_settings(dataset)) - vector_setting = await datasets.create_vector_settings(db, search_engine, dataset, vector_settings_create) - - await telemetry_client.track_crud_dataset_setting( - action="create", setting_name="vectors_settings", dataset=dataset, setting=vector_setting - ) - - return vector_setting + return await datasets.create_vector_settings(db, search_engine, dataset, vector_settings_create) @router.put("/datasets/{dataset_id}/publish", response_model=DatasetSchema) @@ -319,7 +248,6 @@ async def publish_dataset( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ) -> Dataset: @@ -338,8 +266,6 @@ async def publish_dataset( dataset = await datasets.publish_dataset(db, search_engine, dataset) - await telemetry_client.track_crud_dataset(action="create", dataset=dataset) - return dataset @@ -348,7 +274,6 @@ async def delete_dataset( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -356,18 +281,13 @@ async def delete_dataset( await authorize(current_user, DatasetPolicy.delete(dataset)) - dataset = await datasets.delete_dataset(db, search_engine, dataset) - - await telemetry_client.track_crud_dataset(action="delete", dataset=dataset) - - return dataset + return await datasets.delete_dataset(db, search_engine, dataset) @router.patch("/datasets/{dataset_id}", response_model=DatasetSchema) async def update_dataset( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, dataset_update: DatasetUpdate, current_user: User = Security(auth.get_current_user), @@ -376,8 +296,4 @@ async def update_dataset( await authorize(current_user, DatasetPolicy.update(dataset)) - dataset = await datasets.update_dataset(db, dataset, dataset_update.dict(exclude_unset=True)) - - await telemetry_client.track_crud_dataset(action="update", dataset=dataset) - - return dataset + return await datasets.update_dataset(db, dataset, dataset_update.dict(exclude_unset=True)) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py index 90c56b894c..b0b08b22e9 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/questions.py @@ -32,24 +32,12 @@ @router.get("/datasets/{dataset_id}/questions", response_model=Questions) async def list_dataset_questions( - *, - db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), - dataset_id: UUID, - current_user: User = Security(auth.get_current_user), + *, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user) ): dataset = await Dataset.get_or_raise(db, dataset_id, options=[selectinload(Dataset.questions)]) await authorize(current_user, DatasetPolicy.get(dataset)) - for question in dataset.questions: - await telemetry_client.track_crud_dataset_setting( - action="read", dataset=dataset, setting_name="questions", setting=question - ) - await telemetry_client.track_crud_dataset_setting( - action="list", setting_name="questions", dataset=dataset, count=len(dataset.questions) - ) - return Questions(items=dataset.questions) @@ -57,7 +45,6 @@ async def list_dataset_questions( async def create_dataset_question( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, question_create: QuestionCreate, current_user: User = Security(auth.get_current_user), @@ -75,10 +62,4 @@ async def create_dataset_question( await authorize(current_user, DatasetPolicy.create_question(dataset)) - question = await questions.create_question(db, dataset, question_create) - - await telemetry_client.track_crud_dataset_setting( - action="create", setting_name="questions", dataset=dataset, setting=question - ) - - return question + return await questions.create_question(db, dataset, question_create) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index 35f21bdae1..d415dbbcd8 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -247,7 +247,6 @@ async def list_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, include: Optional[RecordIncludeParam] = Depends(parse_record_include_param), offset: int = 0, @@ -267,8 +266,6 @@ async def list_dataset_records( include=include, ) - await telemetry_client.track_crud_records(action="list", record_or_dataset=dataset, count=len(records)) - return Records(items=records, total=total) @@ -277,7 +274,6 @@ async def delete_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ids: str = Query(..., description="A comma separated list with the IDs of the records to be removed"), @@ -297,8 +293,6 @@ async def delete_dataset_records( await datasets.delete_records(db, search_engine, dataset, record_ids) - await telemetry_client.track_crud_records(action="delete", record_or_dataset=dataset, count=num_records) - @router.post( "/me/datasets/{dataset_id}/records/search", @@ -363,17 +357,11 @@ async def search_current_user_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - searc_record_results = SearchRecordsResult( + return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, ) - await telemetry_client.track_crud_records( - action="me/search", record_or_dataset=dataset, count=search_responses.total - ) - - return searc_record_results - @router.post( "/datasets/{dataset_id}/records/search", @@ -385,7 +373,6 @@ async def search_dataset_records( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, body: SearchRecordsQuery, include: Optional[RecordIncludeParam] = Depends(parse_record_include_param), @@ -426,15 +413,11 @@ async def search_dataset_records( query_score=record_id_score_map[record.id]["query_score"], ) - search_record_results = SearchRecordsResult( + return SearchRecordsResult( items=[record["search_record"] for record in record_id_score_map.values()], total=search_responses.total, ) - await telemetry_client.track_crud_records(action="search", record_or_dataset=dataset, count=search_responses.total) - - return search_record_results - @router.get( "/datasets/{dataset_id}/records/search/suggestions/options", @@ -444,7 +427,6 @@ async def search_dataset_records( async def list_dataset_records_search_suggestions_options( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), dataset_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -454,7 +436,7 @@ async def list_dataset_records_search_suggestions_options( suggestion_agents_by_question = await search.get_dataset_suggestion_agents_by_question(db, dataset.id) - search_suggestion_options = SearchSuggestionsOptions( + return SearchSuggestionsOptions( items=[ SearchSuggestionOptions( question=SearchSuggestionOptionsQuestion(id=sa["question_id"], name=sa["question_name"]), @@ -464,10 +446,6 @@ async def list_dataset_records_search_suggestions_options( ] ) - await telemetry_client.track_crud_records_suggestions(action="search") - - return search_suggestion_options - async def _filter_record_metadata_for_user(record: Record, user: User) -> Optional[Dict[str, Any]]: if record.metadata_ is None: diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py index 6841be8a24..69cc536a0f 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py @@ -60,8 +60,6 @@ async def create_dataset_records_bulk( records_bulk = await CreateRecordsBulk(db, search_engine).create_records_bulk(dataset, records_bulk_create) - await telemetry_client.track_crud_records(action="create", record_or_dataset=dataset, count=len(records_bulk.items)) - return records_bulk @@ -93,7 +91,4 @@ async def upsert_dataset_records_bulk( updated = len(records_bulk.updated_item_ids) created = len(records_bulk.items) - updated - await telemetry_client.track_crud_records(action="create", record_or_dataset=dataset, count=created) - await telemetry_client.track_crud_records(action="update", record_or_dataset=dataset, count=updated) - return records_bulk diff --git a/argilla-server/src/argilla_server/api/handlers/v1/fields.py b/argilla-server/src/argilla_server/api/handlers/v1/fields.py index ec17e77650..a62c7dbcde 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/fields.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/fields.py @@ -25,7 +25,6 @@ from argilla_server.database import get_async_db from argilla_server.models import Field, User from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["fields"]) @@ -34,7 +33,6 @@ async def update_field( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), field_id: UUID, field_update: FieldUpdate, current_user: User = Security(auth.get_current_user), @@ -43,20 +41,13 @@ async def update_field( await authorize(current_user, FieldPolicy.update(field)) - field = await datasets.update_field(db, field, field_update) - - await telemetry_client.track_crud_dataset_setting( - action="update", dataset=field.dataset, setting_name="fields", setting=field - ) - - return field + return await datasets.update_field(db, field, field_update) @router.delete("/fields/{field_id}", response_model=FieldSchema) async def delete_field( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), field_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -64,10 +55,4 @@ async def delete_field( await authorize(current_user, FieldPolicy.delete(field)) - field = await datasets.delete_field(db, field) - - await telemetry_client.track_crud_dataset_setting( - action="delete", dataset=field.dataset, setting_name="fields", setting=field - ) - - return field + return await datasets.delete_field(db, field) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py index 8941f97a63..ae0392bfb0 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/metadata_properties.py @@ -31,7 +31,6 @@ from argilla_server.models import MetadataProperty, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["metadata properties"]) @@ -59,7 +58,6 @@ async def get_metadata_property_metrics( async def update_metadata_property( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), metadata_property_id: UUID, metadata_property_update: MetadataPropertyUpdate, current_user: User = Security(auth.get_current_user), @@ -72,23 +70,13 @@ async def update_metadata_property( await authorize(current_user, MetadataPropertyPolicy.update(metadata_property)) - metadata_property = await datasets.update_metadata_property(db, metadata_property, metadata_property_update) - - await telemetry_client.track_crud_dataset_setting( - action="update", - setting_name="metadata_properties", - dataset=metadata_property.dataset, - setting=metadata_property, - ) - - return metadata_property + return await datasets.update_metadata_property(db, metadata_property, metadata_property_update) @router.delete("/metadata-properties/{metadata_property_id}", response_model=MetadataPropertySchema) async def delete_metadata_property( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), metadata_property_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -100,13 +88,4 @@ async def delete_metadata_property( await authorize(current_user, MetadataPropertyPolicy.delete(metadata_property)) - metadata_property = await datasets.delete_metadata_property(db, metadata_property) - - await telemetry_client.track_crud_dataset_setting( - action="delete", - setting_name="metadata_properties", - dataset=metadata_property.dataset, - setting=metadata_property, - ) - - return metadata_property + return await datasets.delete_metadata_property(db, metadata_property) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py index c6864c9905..5f34c57072 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py @@ -17,6 +17,7 @@ from fastapi.responses import RedirectResponse from sqlalchemy.ext.asyncio import AsyncSession +from argilla_server import telemetry from argilla_server.api.schemas.v1.oauth2 import Provider, Providers, Token from argilla_server.api.schemas.v1.users import UserCreate from argilla_server.contexts import accounts @@ -28,7 +29,6 @@ from argilla_server.security.authentication.oauth2 import OAuth2ClientProvider from argilla_server.security.authentication.userinfo import UserInfo from argilla_server.security.settings import settings -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(prefix="/oauth2", tags=["Authentication"]) @@ -72,7 +72,6 @@ async def get_access_token( request: Request, provider: OAuth2ClientProvider = Depends(get_provider_by_name_or_raise), db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ) -> Token: userinfo = UserInfo(await provider.get_user_data(request)).use_claims(provider.claims) @@ -90,6 +89,5 @@ async def get_access_token( ).dict(exclude_unset=True), workspaces=[workspace.name for workspace in settings.oauth.allowed_workspaces], ) - await telemetry_client.track_crud_user(action="create", user=user, is_oauth=True) return Token(access_token=accounts.generate_user_token(user)) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/questions.py b/argilla-server/src/argilla_server/api/handlers/v1/questions.py index 1b12919395..27d8cedf85 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/questions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/questions.py @@ -25,7 +25,6 @@ from argilla_server.database import get_async_db from argilla_server.models import Question, User from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["questions"]) @@ -34,7 +33,6 @@ async def update_question( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), question_id: UUID, question_update: QuestionUpdate, current_user: User = Security(auth.get_current_user), @@ -43,20 +41,13 @@ async def update_question( await authorize(current_user, QuestionPolicy.update(question)) - question = await questions.update_question(db, question, question_update) - - await telemetry_client.track_crud_dataset_setting( - action="update", dataset=question.dataset, setting_name="questions", setting=question - ) - - return question + return await questions.update_question(db, question, question_update) @router.delete("/questions/{question_id}", response_model=QuestionSchema) async def delete_question( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), question_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -64,10 +55,4 @@ async def delete_question( await authorize(current_user, QuestionPolicy.delete(question)) - question = await questions.delete_question(db, question) - - await telemetry_client.track_crud_dataset_setting( - action="delete", dataset=question.dataset, setting_name="questions", setting=question - ) - - return question + return await questions.delete_question(db, question) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/records.py b/argilla-server/src/argilla_server/api/handlers/v1/records.py index edc86dab25..3778921ee2 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/records.py @@ -31,7 +31,6 @@ from argilla_server.models import Dataset, Question, Record, Suggestion, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client from argilla_server.utils import parse_uuids DELETE_RECORD_SUGGESTIONS_LIMIT = 100 @@ -43,7 +42,6 @@ async def get_record( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -59,8 +57,6 @@ async def get_record( await authorize(current_user, RecordPolicy.get(record)) - await telemetry_client.track_crud_records(action="read", record_or_dataset=record) - return record @@ -69,7 +65,6 @@ async def update_record( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, record_update: RecordUpdate, current_user: User = Security(auth.get_current_user), @@ -87,11 +82,7 @@ async def update_record( await authorize(current_user, RecordPolicy.update(record)) - record = await datasets.update_record(db, search_engine, record, record_update) - - await telemetry_client.track_crud_records(action="update", record_or_dataset=record) - - return record + return await datasets.update_record(db, search_engine, record, record_update) @router.post("/records/{record_id}/responses", status_code=status.HTTP_201_CREATED, response_model=Response) @@ -99,7 +90,6 @@ async def create_record_response( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, response_create: ResponseCreate, current_user: User = Security(auth.get_current_user), @@ -115,18 +105,13 @@ async def create_record_response( await authorize(current_user, RecordPolicy.create_response(record)) - response = await datasets.create_response(db, search_engine, record, current_user, response_create) - - await telemetry_client.track_crud_records_responses(action="create", record_id=record_id) - - return response + return await datasets.create_response(db, search_engine, record, current_user, response_create) @router.get("/records/{record_id}/suggestions", status_code=status.HTTP_200_OK, response_model=Suggestions) async def get_record_suggestions( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -142,10 +127,6 @@ async def get_record_suggestions( await authorize(current_user, RecordPolicy.get_suggestions(record)) - await telemetry_client.track_crud_records_suggestions( - action="read", record_id=record_id, count=len(record.suggestions) - ) - return Suggestions(items=record.suggestions) @@ -163,7 +144,6 @@ async def upsert_suggestion( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, suggestion_create: SuggestionCreate, current_user: User = Security(auth.get_current_user), @@ -191,16 +171,10 @@ async def upsert_suggestion( # NOTE: If there is already a suggestion for this record and question, we update it instead of creating a new one. # So we set the correct status code here. - action = "create" if await Suggestion.get_by(db, record_id=record_id, question_id=suggestion_create.question_id): response.status_code = status.HTTP_200_OK - action = "update" - - suggestion = await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) - await telemetry_client.track_crud_records_suggestions(action=action, record_id=record_id) - - return suggestion + return await datasets.upsert_suggestion(db, search_engine, record, question, suggestion_create) @router.delete( @@ -212,7 +186,6 @@ async def delete_record_suggestions( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ids: str = Query(..., description="A comma separated list with the IDs of the suggestions to be removed"), @@ -239,15 +212,12 @@ async def delete_record_suggestions( await datasets.delete_suggestions(db, search_engine, record, suggestion_ids) - await telemetry_client.track_crud_records_suggestions(action="delete", record_id=record_id, count=num_suggestions) - @router.delete("/records/{record_id}", response_model=RecordSchema, response_model_exclude_unset=True) async def delete_record( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), record_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -262,8 +232,4 @@ async def delete_record( await authorize(current_user, RecordPolicy.delete(record)) - record = await datasets.delete_record(db, search_engine, record) - - await telemetry_client.track_crud_records(action="delete", record_or_dataset=record) - - return record + return await datasets.delete_record(db, search_engine, record) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/responses.py b/argilla-server/src/argilla_server/api/handlers/v1/responses.py index 4ce9d6cfb6..ddc389563a 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/responses.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/responses.py @@ -32,7 +32,6 @@ from argilla_server.models import Dataset, Record, Response, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client from argilla_server.use_cases.responses.upsert_responses_in_bulk import ( UpsertResponsesInBulkUseCase, UpsertResponsesInBulkUseCaseFactory, @@ -47,18 +46,9 @@ async def create_current_user_responses_bulk( body: ResponsesBulkCreate, current_user: User = Security(auth.get_current_user), use_case: UpsertResponsesInBulkUseCase = Depends(UpsertResponsesInBulkUseCaseFactory()), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), -) -> ResponsesBulk: +): responses_bulk_items = await use_case.execute(body.items, user=current_user) - responses_bulk_items_filtered = [resp for resp in responses_bulk_items if resp.item] - await telemetry_client.track_crud_records_responses( - action="create", record_id=None, count=len(responses_bulk_items_filtered) - ) - for response in responses_bulk_items_filtered: - if response.item: - await telemetry_client.track_crud_records_responses(action="me/create", record_id=response.item.record_id) - return ResponsesBulk(items=responses_bulk_items) @@ -67,7 +57,6 @@ async def update_response( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), response_id: UUID, response_update: ResponseUpdate, current_user: User = Security(auth.get_current_user), @@ -82,11 +71,7 @@ async def update_response( await authorize(current_user, ResponsePolicy.update(response)) - response = await datasets.update_response(db, search_engine, response, response_update) - - await telemetry_client.track_crud_records_responses(action="update", record_id=response.record_id) - - return response + return await datasets.update_response(db, search_engine, response, response_update) @router.delete("/responses/{response_id}", response_model=ResponseSchema) @@ -94,7 +79,6 @@ async def delete_response( *, db: AsyncSession = Depends(get_async_db), search_engine=Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), response_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -108,8 +92,4 @@ async def delete_response( await authorize(current_user, ResponsePolicy.delete(response)) - response = await datasets.delete_response(db, search_engine, response) - - await telemetry_client.track_crud_records_responses(action="delete", record_id=response.record_id) - - return response + return await datasets.delete_response(db, search_engine, response) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py index 212c7a3dbb..1257545749 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/suggestions.py @@ -25,7 +25,6 @@ from argilla_server.models import Record, Suggestion, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["suggestions"]) @@ -35,7 +34,6 @@ async def delete_suggestion( *, db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), suggestion_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -50,8 +48,4 @@ async def delete_suggestion( await authorize(current_user, SuggestionPolicy.delete(suggestion)) - suggestion = await datasets.delete_suggestion(db, search_engine, suggestion) - - await telemetry_client.track_crud_records_suggestions(action="delete", record_id=suggestion.record_id) - - return suggestion + return await datasets.delete_suggestion(db, search_engine, suggestion) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index ab819e7599..0ee6a23ee2 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -25,19 +25,14 @@ from argilla_server.database import get_async_db from argilla_server.models import User from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["users"]) @router.get("/me", response_model=UserSchema) async def get_current_user( - request: Request, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): - await telemetry_client.track_user_login(request=request, user=current_user) - return current_user @@ -47,15 +42,10 @@ async def get_user( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.get) - user = await User.get_or_raise(db, user_id) - - await telemetry_client.track_crud_user(action="read", user=user) - - return user + return await User.get_or_raise(db, user_id) @router.get("/users", response_model=Users) @@ -63,16 +53,11 @@ async def list_users( *, db: AsyncSession = Depends(get_async_db), current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.list) users = await accounts.list_users(db) - await telemetry_client.track_crud_user(action="list", user=None, is_oauth=False, count=len(users)) - for user in users: - await telemetry_client.track_crud_user(action="read", user=user, is_oauth=False) - return Users(items=users) @@ -82,14 +67,11 @@ async def create_user( db: AsyncSession = Depends(get_async_db), user_create: UserCreate, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, UserPolicy.create) user = await accounts.create_user(db, user_create.dict()) - await telemetry_client.track_crud_user(action="create", user=user, is_oauth=False) - return user @@ -99,17 +81,12 @@ async def delete_user( db: AsyncSession = Depends(get_async_db), user_id: UUID, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): user = await User.get_or_raise(db, user_id) await authorize(current_user, UserPolicy.delete) - user = await accounts.delete_user(db, user) - - await telemetry_client.track_crud_user(action="delete", user=user) - - return user + return await accounts.delete_user(db, user) @router.get("/users/{user_id}/workspaces", response_model=Workspaces) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py index 384d078e60..511e9a5b99 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/vectors_settings.py @@ -25,7 +25,6 @@ from argilla_server.database import get_async_db from argilla_server.models import User, VectorSettings from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["vectors-settings"]) @@ -34,7 +33,6 @@ async def update_vector_settings( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), vector_settings_id: UUID, vector_settings_update: VectorSettingsUpdate, current_user: User = Security(auth.get_current_user), @@ -47,20 +45,13 @@ async def update_vector_settings( await authorize(current_user, VectorSettingsPolicy.update(vector_settings)) - vectors_setting = await datasets.update_vector_settings(db, vector_settings, vector_settings_update) - - await telemetry_client.track_crud_dataset_setting( - action="update", setting_name="vectors_settings", dataset=vectors_setting.dataset, setting=vectors_setting - ) - - return vector_settings + return await datasets.update_vector_settings(db, vector_settings, vector_settings_update) @router.delete("/vectors-settings/{vector_settings_id}", response_model=VectorSettingsSchema) async def delete_vector_settings( *, db: AsyncSession = Depends(get_async_db), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), vector_settings_id: UUID, current_user: User = Security(auth.get_current_user), ): @@ -72,10 +63,4 @@ async def delete_vector_settings( await authorize(current_user, VectorSettingsPolicy.delete(vector_settings)) - vectors_setting = await datasets.delete_vector_settings(db, vector_settings) - - await telemetry_client.track_crud_dataset_setting( - action="delete", setting_name="vectors_settings", dataset=vectors_setting.dataset, setting=vectors_setting - ) - - return vectors_setting + return await datasets.delete_vector_settings(db, vector_settings) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index b2885e4678..1636998ea4 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -33,7 +33,6 @@ from argilla_server.errors.future import NotFoundError, UnprocessableEntityError from argilla_server.models import User, Workspace, WorkspaceUser from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter(tags=["workspaces"]) @@ -44,15 +43,10 @@ async def get_workspace( db: AsyncSession = Depends(get_async_db), workspace_id: UUID, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.get(workspace_id)) - workspace = await Workspace.get_or_raise(db, workspace_id) - - await telemetry_client.track_crud_workspace(action="read", workspace=workspace) - - return workspace + return await Workspace.get_or_raise(db, workspace_id) @router.post("/workspaces", status_code=status.HTTP_201_CREATED, response_model=WorkspaceSchema) @@ -61,15 +55,10 @@ async def create_workspace( db: AsyncSession = Depends(get_async_db), workspace_create: WorkspaceCreate, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.create) - workspace = await accounts.create_workspace(db, workspace_create.dict()) - - await telemetry_client.track_crud_workspace(action="create", workspace=workspace) - - return workspace + return await accounts.create_workspace(db, workspace_create.dict()) @router.delete("/workspaces/{workspace_id}", response_model=WorkspaceSchema) @@ -78,17 +67,12 @@ async def delete_workspace( db: AsyncSession = Depends(get_async_db), workspace_id: UUID, current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): await authorize(current_user, WorkspacePolicy.delete) workspace = await Workspace.get_or_raise(db, workspace_id) - workspace = await accounts.delete_workspace(db, workspace) - - await telemetry_client.track_crud_workspace(action="delete", workspace=workspace) - - return workspace + return await accounts.delete_workspace(db, workspace) @router.get("/me/workspaces", response_model=Workspaces) @@ -96,7 +80,6 @@ async def list_workspaces_me( *, db: AsyncSession = Depends(get_async_db), current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ) -> Workspaces: await authorize(current_user, WorkspacePolicy.list_workspaces_me) @@ -105,10 +88,6 @@ async def list_workspaces_me( else: workspaces = await accounts.list_workspaces_by_user_id(db, current_user.id) - for workspace in workspaces: - await telemetry_client.track_crud_workspace(action="me/read", workspace=workspace) - await telemetry_client.track_crud_workspace(action="me/list", workspace=None, count=len(workspaces)) - return Workspaces(items=workspaces) diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index 0bc76f7425..a72807d4c8 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -34,7 +34,6 @@ WrongTaskError, ) from argilla_server.pydantic_v1 import BaseModel -from argilla_server.telemetry import get_telemetry_client class ErrorDetail(BaseModel): @@ -52,10 +51,6 @@ def __init__(self, error: ServerError): class APIErrorHandler: - @classmethod - async def track_error(cls, error: ServerError, request: Request): - await get_telemetry_client().track_error(error=error, request=request) - @classmethod async def common_exception_handler(cls, request: Request, error: Exception): """Wraps errors as custom generic error""" diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index 75d6027c59..aa246a9415 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -16,31 +16,12 @@ import json import logging import platform -from typing import Union from fastapi import Request, Response from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ from argilla_server.api.errors.v1.exception_handlers import get_request_error -from argilla_server.constants import DEFAULT_USERNAME -from argilla_server.errors.base_errors import ( - ServerError, -) -from argilla_server.models import ( - Dataset, - Field, - FloatMetadataPropertySettings, - IntegerMetadataPropertySettings, - MetadataProperty, - MetadataPropertySettings, - Question, - Record, - TermsMetadataPropertySettings, - User, - VectorSettings, - Workspace, -) from argilla_server.settings import settings from argilla_server.utils._fastapi import resolve_endpoint_path_for_request from argilla_server.utils._telemetry import ( @@ -69,90 +50,6 @@ def __post_init__(self, enable_telemetry: bool): _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") self.enable_telemetry = enable_telemetry - @staticmethod - def _process_request_info(request: Request): - return {header: request.headers.get(header) for header in ["user-agent", "accept-language"]} - - @staticmethod - def _process_user_model(user: User): - return {"user_id": str(user.id), "role": user.role, "is_default_user": user.username == DEFAULT_USERNAME} - - @staticmethod - def _process_workspace_model(workspace: Workspace): - return {"workspace_id": str(workspace.id)} - - @staticmethod - def _process_dataset_model(dataset: Dataset): - return { - "dataset_id": str(dataset.id), - "workspace_id": str(dataset.workspace_id), - } - - @staticmethod - def _process_record_model(record: Record): - return { - "dataset_id": str(record.dataset_id), - "record_id": str(record.id), - } - - @staticmethod - def _process_dataset_settings(dataset: Dataset) -> dict: - user_data = {} - if dataset.is_relationship_loaded("guidelines"): - user_data["guidelines"] = True if getattr(dataset, "guidelines") else False - if dataset.is_relationship_loaded("guidelines"): - user_data["allow_extra_metadata"] = getattr(dataset, "allow_extra_metadata") - if dataset.is_relationship_loaded("distribution"): - distribution = getattr(dataset, "distribution") - user_data["distribution_strategy"] = distribution["strategy"] - if "min_submitted" in distribution: - user_data["distribution_min_submitted"] = distribution["min_submitted"] - - attributes: list[str] = [ - "fields", - "questions", - "vectors_settings", - "metadata_properties", - ] - for attr in attributes: - if dataset.is_relationship_loaded(attr): - user_data[f"count_{attr}"] = len(getattr(dataset, attr)) - - return user_data - - @staticmethod - def _process_dataset_setting_settings( - setting: Union[ - Field, - VectorSettings, - Question, - FloatMetadataPropertySettings, - TermsMetadataPropertySettings, - IntegerMetadataPropertySettings, - ], - ) -> dict: - user_data = {"dataset_id": str(setting.dataset_id)} - if isinstance(setting, (Field, Question)): - user_data["required"] = setting.required - user_data["type"] = setting.settings["type"] - elif isinstance( - setting, - ( - FloatMetadataPropertySettings, - TermsMetadataPropertySettings, - IntegerMetadataPropertySettings, - MetadataProperty, - ), - ): - user_data["type"] = setting.type.value - elif isinstance(setting, VectorSettings): - user_data["dimensions"] = setting.dimensions - user_data["type"] = "default" - else: - raise NotImplementedError("Expected a setting to be processed.") - - return user_data - async def track_data(self, topic: str, data: dict, include_system_info: bool = True, count: int = 1): library_name = "argilla/server" topic = f"{library_name}/{topic}" @@ -204,121 +101,6 @@ async def track_api_request(self, request: Request, response: Response) -> None: await self.track_data(topic=topic, data=data) - async def track_user_login(self, request: Request, user: User): - topic = "user/login" - user_agent = self._process_user_model(user=user) - user_agent.update(**self._process_request_info(request)) - await self.track_data(topic=topic, data=user_agent) - - async def track_crud_user( - self, - action: str, - user: Union[User, None] = None, - is_oauth: Union[bool, None] = None, - is_login: Union[bool, None] = None, - count: Union[int, None] = None, - ): - topic = f"user/{action}" - user_agent = {} - if user: - user_agent.update(self._process_user_model(user=user)) - if is_oauth is not None: - user_agent["is_oauth"] = is_oauth - if is_login is not None: - user_agent["is_login"] = is_login - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_crud_workspace( - self, action: str, workspace: Union[Workspace, None] = None, count: Union[int, None] = None - ): - topic: str = f"workspace/{action}" - user_agent = {} - if workspace: - user_agent.update(self._process_workspace_model(workspace=workspace)) - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_crud_dataset( - self, action: str, dataset: Union[Dataset, None] = None, count: Union[int, None] = None - ): - topic = f"dataset/{action}" - user_agent = {} - if dataset: - user_agent.update(self._process_dataset_model(dataset=dataset)) - user_agent.update(self._process_dataset_settings(dataset=dataset)) - await self.track_data(topic=topic, data=user_agent, count=count) - - attributes: list[str] = ["fields", "questions", "vectors_settings", "metadata_properties"] - if dataset: - for attr in attributes: - if dataset.is_relationship_loaded(attr): - obtained_attr_list = getattr(dataset, attr) - await self.track_crud_dataset_setting( - action=action, setting_name=attr, dataset=dataset, setting=None, count=len(obtained_attr_list) - ) - for obtained_attr in obtained_attr_list: - await self.track_crud_dataset_setting( - action=action, setting_name=attr, dataset=dataset, setting=obtained_attr - ) - - async def track_crud_dataset_setting( - self, - action: str, - setting_name: str, - dataset: Dataset, - setting: Union[Field, VectorSettings, Question, MetadataPropertySettings, None] = None, - count: Union[int, None] = None, - ): - topic = f"dataset/{setting_name}/{action}" - user_agent = self._process_dataset_model(dataset=dataset) - if setting: - user_agent.update(self._process_dataset_setting_settings(setting=setting)) - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_crud_records( - self, action: str, record_or_dataset: Union[Record, Dataset, None] = None, count: Union[int, None] = None - ): - topic = f"dataset/records/{action}" - if isinstance(record_or_dataset, Record): - user_agent = self._process_record_model(record=record_or_dataset) - elif isinstance(record_or_dataset, Dataset): - user_agent = self._process_dataset_model(dataset=record_or_dataset) - else: - raise NotImplementedError("Expected element of `Dataset` or `Record`") - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_crud_records_responses( - self, - action: str, - record_id: str, - count: Union[int, None] = None, - ): - topic = f"dataset/records/responses/{action}" - user_agent = {"record_id": record_id} - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_crud_records_suggestions( - self, - action: str, - record_id: Union[str, None] = None, - count: Union[int, None] = None, - ): - topic = f"dataset/records/suggestions/{action}" - user_agent = {} - if record_id: - user_agent["record_id"] = record_id - await self.track_data(topic=topic, data=user_agent, count=count) - - async def track_error(self, error: ServerError, request: Request): - topic = "error/server" - user_agent = { - "code": error.code, - "user-agent": request.headers.get("user-agent"), - "accept-language": request.headers.get("accept-language"), - "type": error.__class__.__name__, - } - - await self.track_data(topic=topic, data=user_agent) - _TELEMETRY_CLIENT = TelemetryClient() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 761efd5a7c..d5293b8d66 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -209,10 +209,7 @@ async def test_list_current_user_datasets_by_workspace_id( response_body = response.json() assert [dataset["name"] for dataset in response_body["items"]] == ["dataset-a"] - @pytest.mark.skip - async def test_list_dataset_fields( - self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock - ): + async def test_list_dataset_fields(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() text_field_a = await TextFieldFactory.create( name="text-field-a", title="Text Field A", required=True, dataset=dataset @@ -250,11 +247,6 @@ async def test_list_dataset_fields( ], } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="read", dataset=dataset, setting_name="fields", count=len(response.json()["items"]) - ) - test_telemetry.track_data.assert_called() - async def test_list_dataset_fields_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -310,10 +302,7 @@ async def test_list_dataset_fields_with_nonexistent_dataset_id( assert response.status_code == 404 assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} - @pytest.mark.skip - async def test_list_dataset_questions( - self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock - ): + async def test_list_dataset_questions(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() text_question = await TextQuestionFactory.create( name="text-question", @@ -375,11 +364,6 @@ async def test_list_dataset_questions( ] } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="read", dataset=dataset, setting_name="questions", count=len(response.json()["items"]) - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "QuestionFactory, settings", [ @@ -638,11 +622,8 @@ async def test_list_current_user_dataset_metadata_properties_with_nonexistent_da assert response.status_code == 404 assert response.json() == {"detail": f"Dataset with id `{dataset_id}` not found"} - @pytest.mark.skip @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_list_dataset_vectors_settings( - self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock - ): + async def test_list_dataset_vectors_settings(self, async_client: "AsyncClient", role: UserRole): dataset = await DatasetFactory.create() vectors_settings = await VectorSettingsFactory.create_batch(size=3, dataset=dataset) user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -667,11 +648,6 @@ async def test_list_dataset_vectors_settings( ] } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="read", dataset=dataset, setting_name="vectors_settings", count=len(response.json()["items"]) - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize("role", [UserRole.annotator, UserRole.admin]) async def test_list_dataset_vectors_settings_as_user_from_another_workspace( self, async_client: "AsyncClient", role: UserRole @@ -692,7 +668,7 @@ async def test_list_dataset_vectors_settings_without_authentication(self, async_ assert response.status_code == 401 - async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock): + async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create(name="dataset") response = await async_client.get(f"/api/v1/datasets/{dataset.id}", headers=owner_auth_header) @@ -714,9 +690,6 @@ async def test_get_dataset(self, async_client: "AsyncClient", owner_auth_header: "updated_at": dataset.updated_at.isoformat(), } - test_telemetry.track_crud_dataset.assert_called_once_with(action="read", dataset=dataset) - test_telemetry.track_data.assert_called() - async def test_get_dataset_without_authentication(self, async_client: "AsyncClient"): dataset = await DatasetFactory.create() @@ -1051,7 +1024,6 @@ async def test_create_dataset_field( owner_auth_header: dict, settings: dict, expected_settings: dict, - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() field_json = {"name": "name", "title": "title", "settings": settings} @@ -1076,11 +1048,6 @@ async def test_create_dataset_field( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } - test_telemetry.track_crud_dataset_setting.assert_called_once_with( - action="create", setting_name="fields", dataset=dataset, setting=ANY - ) - test_telemetry.track_data.assert_called() - async def test_create_dataset_field_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() field_json = { @@ -1303,7 +1270,6 @@ async def test_create_dataset_metadata_property( owner_auth_header: dict, settings: dict, expected_settings: dict, - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() metadata_property_json = {"name": "name", "title": "title", "settings": settings} @@ -1328,11 +1294,6 @@ async def test_create_dataset_metadata_property( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } - test_telemetry.track_crud_dataset_setting.assert_called_once_with( - action="create", setting_name="metadata_properties", dataset=dataset, setting=ANY - ) - test_telemetry.track_data.assert_called() - async def test_create_dataset_metadata_property_with_dataset_ready( self, async_client: "AsyncClient", @@ -1608,7 +1569,6 @@ async def test_create_dataset_vector_settings( mock_search_engine: SearchEngine, role: UserRole, dataset_status: DatasetStatus, - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create(status=dataset_status) user = await UserFactory.create(role=role, workspaces=[dataset.workspace]) @@ -1643,11 +1603,6 @@ async def test_create_dataset_vector_settings( else: mock_search_engine.configure_index_vectors.assert_called_once_with(vector_settings) - test_telemetry.track_crud_dataset_setting.assert_called_once_with( - action="create", setting_name="vectors_settings", dataset=dataset, setting=ANY - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "payload", [ @@ -1765,7 +1720,6 @@ async def test_create_dataset_records( self, async_client: "AsyncClient", mock_search_engine: SearchEngine, - test_telemetry: MagicMock, db: "AsyncSession", owner: User, owner_auth_header: dict, @@ -1864,11 +1818,6 @@ async def test_create_dataset_records( records = (await db.execute(select(Record))).scalars().all() mock_search_engine.index_records.assert_called_once_with(dataset, records) - test_telemetry.track_crud_records.assert_called_once_with( - action="create", record_or_dataset=dataset, count=len(records) - ) - test_telemetry.track_data.assert_called() - async def test_create_dataset_records_with_response_for_multiple_users( self, async_client: "AsyncClient", @@ -2548,7 +2497,6 @@ async def test_create_dataset_records_as_admin( async_client: "AsyncClient", mock_search_engine: "SearchEngine", db: "AsyncSession", - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create(status=DatasetStatus.ready) admin = await AdminFactory.create(workspaces=[dataset.workspace]) @@ -2621,11 +2569,6 @@ async def test_create_dataset_records_as_admin( records = (await db.execute(select(Record))).scalars().all() mock_search_engine.index_records.assert_called_once_with(dataset, records) - test_telemetry.track_crud_records.assert_called_once_with( - action="create", record_or_dataset=dataset, count=len(records) - ) - test_telemetry.track_data.assert_called() - async def test_create_dataset_records_as_annotator(self, async_client: "AsyncClient", db: "AsyncSession"): annotator = await AnnotatorFactory.create() dataset = await DatasetFactory.create(status=DatasetStatus.ready) @@ -2999,11 +2942,7 @@ async def test_create_dataset_records_with_nonexistent_dataset_id( @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_update_dataset_records( - self, - async_client: "AsyncClient", - mock_search_engine: "SearchEngine", - role: UserRole, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", mock_search_engine: "SearchEngine", role: UserRole ): dataset = await DatasetFactory.create(status=DatasetStatus.ready) user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -3090,9 +3029,6 @@ async def test_update_dataset_records( mock_search_engine.index_records.assert_called_once_with(dataset, records[:4]) - test_telemetry.track_crud_records.assert_called_with(action="update", record_or_dataset=dataset, count=4) - test_telemetry.track_data.assert_called() - async def test_update_dataset_records_with_suggestions( self, async_client: "AsyncClient", mock_search_engine: "SearchEngine", owner_auth_header: dict ): @@ -3534,12 +3470,7 @@ async def test_update_dataset_records_without_authentication(self, async_client: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_delete_dataset_records( - self, - async_client: "AsyncClient", - db: "AsyncSession", - mock_search_engine: SearchEngine, - role: UserRole, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, role: UserRole ): dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -3562,11 +3493,6 @@ async def test_delete_dataset_records( # than the ones created by the factory mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=ANY) - test_telemetry.track_crud_records.assert_called_once_with( - action="delete", record_or_dataset=dataset, count=len(records_ids) + len(random_uuids) - ) - test_telemetry.track_data.assert_called() - async def test_delete_dataset_records_with_no_ids(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() @@ -4615,7 +4541,6 @@ async def test_publish_dataset( async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, - test_telemetry: MagicMock, owner_auth_header, ) -> None: dataset = await DatasetFactory.create() @@ -4630,7 +4555,6 @@ async def test_publish_dataset( response_body = response.json() assert response_body["status"] == "ready" - test_telemetry.track_crud_dataset.assert_called_once_with(action="create", dataset=dataset) mock_search_engine.create_index.assert_called_once_with(dataset) async def test_publish_dataset_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): @@ -4737,9 +4661,7 @@ async def test_publish_dataset_with_nonexistent_dataset_id( ], ) @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) - async def test_update_dataset( - self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, test_telemetry: MagicMock - ): + async def test_update_dataset(self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict): dataset = await DatasetFactory.create( name="Current Name", guidelines="Current Guidelines", status=DatasetStatus.ready ) @@ -4781,9 +4703,6 @@ async def test_update_dataset( assert dataset.guidelines == guidelines assert dataset.allow_extra_metadata is allow_extra_metadata - test_telemetry.track_crud_dataset.assert_called_once_with(action="update", dataset=dataset) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "dataset_json", [ @@ -4882,7 +4801,6 @@ async def test_delete_dataset( mock_search_engine: SearchEngine, owner: User, owner_auth_header: dict, - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create() await TextFieldFactory.create(dataset=dataset) @@ -4910,9 +4828,6 @@ async def test_delete_dataset( mock_search_engine.delete_index.assert_called_once_with(dataset) - test_telemetry.track_crud_dataset.assert_called_once_with(action="delete", dataset=dataset) - test_telemetry.track_data.assert_called() - async def test_delete_published_dataset( self, async_client: "AsyncClient", db: "AsyncSession", owner: User, owner_auth_header: dict ): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_fields.py b/argilla-server/tests/unit/api/handlers/v1/test_fields.py index 98a94f640f..17713ade47 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_fields.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_fields.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from datetime import datetime from typing import TYPE_CHECKING -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -51,12 +51,7 @@ @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) @pytest.mark.asyncio async def test_update_field( - async_client: "AsyncClient", - db: "AsyncSession", - role: UserRole, - payload: dict, - expected_settings: dict, - test_telemetry: MagicMock, + async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, expected_settings: dict ): field = await TextFieldFactory.create() user = await UserFactory.create(role=role, workspaces=[field.dataset.workspace]) @@ -83,11 +78,6 @@ async def test_update_field( assert field.title == title assert field.settings == expected_settings - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="update", dataset=field.dataset, setting_name="fields", setting=field - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize("title", [None, "", "t" * (FIELD_CREATE_TITLE_MAX_LENGTH + 1)]) @pytest.mark.asyncio @@ -190,9 +180,7 @@ async def test_update_field_without_authentication(async_client: "AsyncClient"): @pytest.mark.asyncio -async def test_delete_field( - async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict, test_telemetry: MagicMock -): +async def test_delete_field(async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict): field = await TextFieldFactory.create(name="name", title="title") response = await async_client.delete(f"/api/v1/fields/{field.id}", headers=owner_auth_header) @@ -212,11 +200,6 @@ async def test_delete_field( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="delete", dataset=field.dataset, setting_name="fields", setting=field - ) - test_telemetry.track_data.assert_called() - @pytest.mark.asyncio async def test_delete_field_without_authentication(async_client: "AsyncClient", db: "AsyncSession"): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py index f757d4f73a..4f989e5399 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_list_dataset_records.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Optional, Tuple, Type, Union -from unittest.mock import MagicMock -from uuid import uuid4 +from typing import List, Optional, Tuple, Union import pytest from httpx import AsyncClient @@ -41,9 +39,7 @@ @pytest.mark.asyncio class TestSuiteListDatasetRecords: @pytest.mark.skip(reason="Factory integration with search engine") - async def test_list_dataset_records( - self, async_client: "AsyncClient", owner_auth_header: dict, test_telemetry: MagicMock - ): + async def test_list_dataset_records(self, async_client: "AsyncClient", owner_auth_header: dict): dataset = await DatasetFactory.create() record_a = await RecordFactory.create(fields={"record_a": "value_a"}, dataset=dataset) record_b = await RecordFactory.create( @@ -87,11 +83,6 @@ async def test_list_dataset_records( ], } - test_telemetry.track_crud_records.assert_called_with( - action="read", record_or_dataset=response.dataset, count=response.json()["total"] - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "includes", [[RecordInclude.responses], [RecordInclude.suggestions], [RecordInclude.responses, RecordInclude.suggestions]], diff --git a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py index 7beb4f4485..9c77ac6765 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import TYPE_CHECKING, Type -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -161,9 +160,7 @@ async def test_get_metadata_property_metrics_as_restricted_user_role_from_differ @pytest.mark.asyncio -async def test_update_metadata_property( - async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict, test_telemetry: MagicMock -): +async def test_update_metadata_property(async_client: "AsyncClient", db: "AsyncSession", owner_auth_header: dict): metadata_property = await IntegerMetadataPropertyFactory.create( name="name", title="title", allowed_roles=[UserRole.admin, UserRole.annotator] ) @@ -193,14 +190,6 @@ async def test_update_metadata_property( assert metadata_property.visible_for_annotators == False assert metadata_property.allowed_roles == [UserRole.admin] - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="update", - setting_name="metadata_properties", - dataset=metadata_property.dataset, - setting=metadata_property, - ) - test_telemetry.track_data.assert_called() - @pytest.mark.asyncio async def test_update_metadata_property_without_authentication(async_client: "AsyncClient", db: "AsyncSession"): @@ -400,9 +389,7 @@ async def test_update_metadata_property_with_nonexistent_metadata_property_id( @pytest.mark.parametrize("user_role", [UserRole.owner, UserRole.admin]) @pytest.mark.asyncio -async def test_delete_metadata_property( - async_client: "AsyncClient", db: "AsyncSession", user_role: UserRole, test_telemetry: MagicMock -): +async def test_delete_metadata_property(async_client: "AsyncClient", db: "AsyncSession", user_role: UserRole): metadata_property = await IntegerMetadataPropertyFactory.create(name="name", title="title") user = await UserFactory.create(role=user_role, workspaces=[metadata_property.dataset.workspace]) @@ -426,14 +413,6 @@ async def test_delete_metadata_property( assert (await db.execute(select(func.count(MetadataProperty.id)))).scalar() == 0 - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="delete", - setting_name="metadata_properties", - dataset=metadata_property.dataset, - setting=metadata_property, - ) - test_telemetry.track_data.assert_called() - @pytest.mark.asyncio async def test_delete_metadata_property_without_authentication(async_client: "AsyncClient", db: "AsyncSession"): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_questions.py b/argilla-server/tests/unit/api/handlers/v1/test_questions.py index 879a920215..d4baacb5d7 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_questions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_questions.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import TYPE_CHECKING, Type -from unittest.mock import ANY, MagicMock from uuid import uuid4 import pytest @@ -249,7 +248,6 @@ async def test_update_question( payload: dict, expected_settings: dict, role: UserRole, - test_telemetry: MagicMock, ): question = await QuestionFactory.create() user = await UserFactory.create(role=role, workspaces=[question.dataset.workspace]) @@ -280,11 +278,6 @@ async def test_update_question( assert question.description == description assert question.settings == expected_settings - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="update", dataset=question.dataset, setting_name="questions", setting=ANY - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize("title", [None, "", "t" * (QUESTION_CREATE_TITLE_MAX_LENGTH + 1)]) @pytest.mark.asyncio @@ -487,9 +480,7 @@ async def test_update_question_as_annotator(async_client: "AsyncClient"): @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) @pytest.mark.asyncio -async def test_delete_question( - async_client: "AsyncClient", db: "AsyncSession", role: UserRole, test_telemetry: MagicMock -): +async def test_delete_question(async_client: "AsyncClient", db: "AsyncSession", role: UserRole): question = await TextQuestionFactory.create(name="name", title="title", description="description") user = await UserFactory.create(role=role, workspaces=[question.dataset.workspace]) @@ -513,11 +504,6 @@ async def test_delete_question( "updated_at": question.updated_at.isoformat(), } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="delete", dataset=question.dataset, setting_name="questions", setting=ANY - ) - test_telemetry.track_data.assert_called() - @pytest.mark.asyncio async def test_delete_question_as_admin_from_different_workspace(async_client: "AsyncClient", db: "AsyncSession"): diff --git a/argilla-server/tests/unit/api/handlers/v1/test_records.py b/argilla-server/tests/unit/api/handlers/v1/test_records.py index 2d53476c9e..3c361b1666 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_records.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_records.py @@ -14,7 +14,7 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Awaitable, Callable, Type -from unittest.mock import MagicMock, call +from unittest.mock import call from uuid import UUID, uuid4 import pytest @@ -82,7 +82,7 @@ async def create_ranking_question(dataset: "Dataset") -> None: @pytest.mark.asyncio class TestSuiteRecords: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin, UserRole.annotator]) - async def test_get_record(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): + async def test_get_record(self, async_client: "AsyncClient", role: UserRole): dataset = await DatasetFactory.create() record = await RecordFactory.create(dataset=dataset) user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -104,9 +104,6 @@ async def test_get_record(self, async_client: "AsyncClient", role: UserRole, tes "updated_at": record.updated_at.isoformat(), } - test_telemetry.track_crud_records.assert_called_with(action="read", record_or_dataset=record) - test_telemetry.track_data.assert_called() - async def test_get_record_without_authentication(self, async_client: "AsyncClient"): record = await RecordFactory.create() @@ -140,9 +137,7 @@ async def test_get_record_with_nonexistent_record_id(self, async_client: "AsyncC assert response.json() == {"detail": f"Record with id `{record_id}` not found"} @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_update_record( - self, async_client: "AsyncClient", mock_search_engine: SearchEngine, role: UserRole, test_telemetry: MagicMock - ): + async def test_update_record(self, async_client: "AsyncClient", mock_search_engine: SearchEngine, role: UserRole): dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) question_0 = await TextQuestionFactory.create(dataset=dataset) @@ -238,9 +233,6 @@ async def test_update_record( mock_search_engine.index_records.assert_called_once_with(dataset, [record]) - test_telemetry.track_crud_records.assert_called_with(action="update", record_or_dataset=record) - test_telemetry.track_data.assert_called() - async def test_update_record_with_null_metadata( self, async_client: "AsyncClient", mock_search_engine: SearchEngine, owner_auth_header: dict ): @@ -994,13 +986,7 @@ async def test_create_record_response_without_authentication(self, async_client: @pytest.mark.parametrize("status", ["submitted", "discarded", "draft"]) async def test_create_record_response( - self, - async_client: "AsyncClient", - db: "AsyncSession", - owner: User, - owner_auth_header: dict, - status: str, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", db: "AsyncSession", owner: User, owner_auth_header: dict, status: str ): dataset = await DatasetFactory.create() await TextQuestionFactory.create(name="input_ok", dataset=dataset) @@ -1043,9 +1029,6 @@ async def test_create_record_response( "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } - test_telemetry.track_crud_records_responses.assert_called_with(action="create", record_id=record.id) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "status, expected_status_code, expected_response_count", [("submitted", 422, 0), ("discarded", 201, 1), ("draft", 201, 1)], @@ -1245,7 +1228,7 @@ async def test_create_record_response_with_nonexistent_record_id( assert (await db.execute(select(func.count(Response.id)))).scalar() == 0 @pytest.mark.parametrize("role", [UserRole.annotator, UserRole.admin, UserRole.owner]) - async def test_get_record_suggestions(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): + async def test_get_record_suggestions(self, async_client: "AsyncClient", role: UserRole): dataset = await DatasetFactory.create() user = await UserFactory.create(role=role, workspaces=[dataset.workspace]) record = await RecordFactory.create(dataset=dataset) @@ -1288,11 +1271,6 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U ] } - test_telemetry.track_crud_records_suggestions.assert_called_with( - action="read", record_id=record.id, count=len(response.json()["items"]) - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize( "payload", [ @@ -1312,7 +1290,7 @@ async def test_get_record_suggestions(self, async_client: "AsyncClient", role: U ) @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_create_record_suggestion( - self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict, test_telemetry: MagicMock + self, async_client: "AsyncClient", db: "AsyncSession", role: UserRole, payload: dict ): dataset = await DatasetFactory.create() question = await TextQuestionFactory.create(dataset=dataset) @@ -1342,9 +1320,6 @@ async def test_create_record_suggestion( assert (await db.execute(select(func.count(Suggestion.id)))).scalar() == 1 - test_telemetry.track_crud_records_suggestions.assert_called_with(action="create", record_id=record.id) - test_telemetry.track_data.assert_called() - async def test_create_record_suggestion_update( self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, owner_auth_header: dict ): @@ -1432,12 +1407,7 @@ async def test_create_record_suggestion_as_annotator(self, async_client: "AsyncC @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) async def test_delete_record( - self, - async_client: "AsyncClient", - db: "AsyncSession", - mock_search_engine: "SearchEngine", - role: UserRole, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: "SearchEngine", role: UserRole ): dataset = await DatasetFactory.create() record = await RecordFactory.create(dataset=dataset) @@ -1461,9 +1431,6 @@ async def test_delete_record( assert (await db.execute(select(func.count(Record.id)))).scalar() == 0 mock_search_engine.delete_records.assert_called_once_with(dataset=dataset, records=[record]) - test_telemetry.track_crud_records.assert_called_with(action="delete", record_or_dataset=record) - test_telemetry.track_data.assert_called() - async def test_delete_record_as_admin_from_another_workspace(self, async_client: "AsyncClient", db: "AsyncSession"): dataset = await DatasetFactory.create() record = await RecordFactory.create(dataset=dataset) @@ -1499,12 +1466,7 @@ async def test_delete_record_non_existent(self, async_client: "AsyncClient", own @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_delete_record_suggestions( - self, - async_client: "AsyncClient", - db: "AsyncSession", - mock_search_engine: SearchEngine, - role: UserRole, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", db: "AsyncSession", mock_search_engine: SearchEngine, role: UserRole ) -> None: dataset = await DatasetFactory.create() user = await UserFactory.create(workspaces=[dataset.workspace], role=role) @@ -1528,13 +1490,6 @@ async def test_delete_record_suggestions( expected_calls = [call(suggestion) for suggestion in suggestions] mock_search_engine.delete_record_suggestion.assert_has_calls(expected_calls) - test_telemetry.track_crud_records_suggestions.assert_called_with( - action="delete", - record_id=record.id, - count=len(suggestions_ids) + len(random_uuids), - ) - test_telemetry.track_data.assert_called() - async def test_delete_record_suggestions_with_no_ids( self, async_client: "AsyncClient", owner_auth_header: dict ) -> None: diff --git a/argilla-server/tests/unit/api/handlers/v1/test_responses.py b/argilla-server/tests/unit/api/handlers/v1/test_responses.py index 89d65a0f60..ceddf5201d 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_responses.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_responses.py @@ -14,7 +14,6 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Type -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -72,7 +71,6 @@ async def test_update_response( mock_search_engine: SearchEngine, owner_auth_header: dict, response_json: dict, - test_telemetry: MagicMock, ): dataset = await DatasetFactory.create(status=DatasetStatus.ready) await TextQuestionFactory.create(name="input_ok", dataset=dataset, required=True) @@ -109,9 +107,6 @@ async def test_update_response( mock_search_engine.update_record_response.assert_called_once_with(response) - test_telemetry.track_crud_records_responses.assert_called_with(action="update", record_id=record.id) - test_telemetry.track_data.assert_called() - async def test_update_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create( values={ @@ -413,12 +408,7 @@ async def test_update_response_with_nonexistent_response_id( } async def test_delete_response( - self, - async_client: "AsyncClient", - mock_search_engine: SearchEngine, - db: "AsyncSession", - owner_auth_header: dict, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", mock_search_engine: SearchEngine, db: "AsyncSession", owner_auth_header: dict ): response = await ResponseFactory.create() dataset = response.record.dataset @@ -436,9 +426,6 @@ async def test_delete_response( mock_search_engine.delete_record_response.assert_called_once_with(response) - test_telemetry.track_crud_records_responses.assert_called_with(action="delete", record_id=response.record.id) - test_telemetry.track_data.assert_called() - async def test_delete_response_without_authentication(self, async_client: "AsyncClient", db: "AsyncSession"): response = await ResponseFactory.create() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py index d1e36dc18f..5f83800df0 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_suggestions.py @@ -14,7 +14,6 @@ from datetime import datetime from typing import TYPE_CHECKING -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -34,12 +33,7 @@ class TestSuiteSuggestions: @pytest.mark.parametrize("role", [UserRole.admin, UserRole.owner]) async def test_delete_suggestion( - self, - async_client: "AsyncClient", - mock_search_engine: SearchEngine, - db: "AsyncSession", - role: UserRole, - test_telemetry: MagicMock, + self, async_client: "AsyncClient", mock_search_engine: SearchEngine, db: "AsyncSession", role: UserRole ) -> None: suggestion = await SuggestionFactory.create() user = await UserFactory.create(role=role, workspaces=[suggestion.record.dataset.workspace]) @@ -67,11 +61,6 @@ async def test_delete_suggestion( mock_search_engine.delete_record_suggestion.assert_called_once_with(suggestion) - test_telemetry.track_crud_records_suggestions.assert_called_with( - action="delete", record_id=suggestion.record.id - ) - test_telemetry.track_data.assert_called() - async def test_delete_suggestion_non_existent(self, async_client: "AsyncClient", owner_auth_header: dict) -> None: suggestion_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py index df448d10f3..fb58a34e67 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_vectors_settings.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import TYPE_CHECKING -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -30,7 +29,7 @@ @pytest.mark.asyncio class TestSuiteVectorsSettings: @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_update_vector_settings(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): + async def test_update_vector_settings(self, async_client: "AsyncClient", role: UserRole): vector_settings = await VectorSettingsFactory.create() user = await UserFactory.create(role=role, workspaces=[vector_settings.dataset.workspace]) @@ -53,11 +52,6 @@ async def test_update_vector_settings(self, async_client: "AsyncClient", role: U assert vector_settings.title == "New Title" - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="update", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings - ) - test_telemetry.track_data.assert_called() - @pytest.mark.parametrize("title", [None, "", "t" * (VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH + 1)]) async def test_update_vector_settings_with_invalid_title( self, async_client: "AsyncClient", owner_auth_header: dict, title: str @@ -124,7 +118,7 @@ async def test_update_vector_settings_as_annotator(self, async_client: "AsyncCli assert response.status_code == 403 @pytest.mark.parametrize("role", [UserRole.owner, UserRole.admin]) - async def test_delete_vector_settings(self, async_client: "AsyncClient", role: UserRole, test_telemetry: MagicMock): + async def test_delete_vector_settings(self, async_client: "AsyncClient", role: UserRole): vector_settings = await VectorSettingsFactory.create() user = await UserFactory.create(role=role, workspaces=[vector_settings.dataset.workspace]) @@ -143,11 +137,6 @@ async def test_delete_vector_settings(self, async_client: "AsyncClient", role: U "updated_at": vector_settings.updated_at.isoformat(), } - test_telemetry.track_crud_dataset_setting.assert_called_with( - action="delete", setting_name="vectors_settings", dataset=vector_settings.dataset, setting=vector_settings - ) - test_telemetry.track_data.assert_called() - async def test_delete_vector_settings_non_existing(self, async_client: "AsyncClient", owner_auth_header: dict): vector_settings_id = uuid4() diff --git a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py index 4b8c39b78b..9696326ca1 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_workspaces.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from unittest.mock import MagicMock from uuid import uuid4 import pytest @@ -24,7 +23,7 @@ @pytest.mark.asyncio class TestSuiteWorkspaces: - async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: dict, test_telemetry: MagicMock): + async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: dict): workspace = await WorkspaceFactory.create(name="workspace") response = await async_client.get(f"/api/v1/workspaces/{workspace.id}", headers=owner_auth_header) @@ -37,9 +36,6 @@ async def test_get_workspace(self, async_client: AsyncClient, owner_auth_header: "updated_at": workspace.updated_at.isoformat(), } - test_telemetry.track_crud_workspace(action="read", workspace=workspace) - test_telemetry.track_data.assert_called() - async def test_get_workspace_without_authentication(self, async_client: AsyncClient): workspace = await WorkspaceFactory.create() @@ -84,9 +80,7 @@ async def test_get_workspace_with_nonexistent_workspace_id( assert response.status_code == 404 assert response.json() == {"detail": f"Workspace with id `{workspace_id}` not found"} - async def test_delete_workspace( - self, async_client: AsyncClient, owner_auth_header: dict, test_telemetry: MagicMock - ): + async def test_delete_workspace(self, async_client: AsyncClient, owner_auth_header: dict): workspace = await WorkspaceFactory.create(name="workspace_delete") other_workspace = await WorkspaceFactory.create() @@ -96,9 +90,6 @@ async def test_delete_workspace( assert response.status_code == 200 - test_telemetry.track_crud_workspace(action="delete", workspace=workspace) - test_telemetry.track_data.assert_called() - async def test_delete_workspace_with_feedback_datasets(self, async_client: AsyncClient, owner_auth_header: dict): workspace = await WorkspaceFactory.create(name="workspace_delete") diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index b5d140580c..7643b92a06 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -13,7 +13,6 @@ # limitations under the License. import uuid -from typing import Union from unittest.mock import MagicMock import pytest @@ -24,33 +23,10 @@ from argilla_server.api.errors.v1.exception_handlers import set_request_error from argilla_server.errors import ServerError -from argilla_server.models import ( - Record, - User, -) from argilla_server.telemetry import TelemetryClient -from tests.factories import ( - DatasetFactory, - IntegerMetadataPropertyFactory, - LabelSelectionQuestionFactory, - MultiLabelSelectionQuestionFactory, - RankingQuestionFactory, - RatingQuestionFactory, - RecordFactory, - ResponseFactory, - SpanQuestionFactory, - SuggestionFactory, - TextFieldFactory, - TextQuestionFactory, - UserFactory, - VectorSettingsFactory, - WorkspaceFactory, -) mock_request = Request(scope={"type": "http", "headers": {}}) -__CRUD__ = ["create", "read", "update", "delete"] - @pytest.mark.asyncio class TestSuiteTelemetry: @@ -142,64 +118,3 @@ async def test_track_api_request_call_with_error_and_exception( "response.error_code": "argilla.api.errors::ServerError", }, ) - - async def test_track_user_login(self, test_telemetry: MagicMock): - user = User(id=uuid.uuid4(), username="argilla") - await test_telemetry.track_user_login(request=mock_request, user=user) - test_telemetry.track_data.assert_called() - - @pytest.mark.parametrize("is_oauth", [True, False]) - async def test_track_crud_user(self, test_telemetry: MagicMock, is_oauth: bool): - user = await UserFactory.create() - await test_telemetry.track_crud_user(action="create", user=user, is_oauth=is_oauth) - test_telemetry.track_data.assert_called() - - async def test_track_track_crud_workspace(self, test_telemetry: MagicMock): - workspace = await WorkspaceFactory.create() - await test_telemetry.track_crud_workspace(action="create", workspace=workspace) - test_telemetry.track_data.assert_called() - - async def test_track_track_crud_dataset( - self, - test_telemetry: MagicMock, - ): - dataset = await DatasetFactory.create() - await test_telemetry.track_crud_dataset(action="create", dataset=dataset) - test_telemetry.track_data.assert_called() - - @pytest.mark.parametrize("record_or_dataset_factory", [RecordFactory, DatasetFactory]) - async def test_track_track_crud_records( - self, test_telemetry: MagicMock, record_or_dataset_factory: Union[DatasetFactory, RecordFactory] - ): - record_or_dataset = await record_or_dataset_factory.create() - if isinstance(record_or_dataset, Record): - await ResponseFactory.create(record=record_or_dataset) - await SuggestionFactory.create(record=record_or_dataset) - await test_telemetry.track_crud_records(action="create", record_or_dataset=record_or_dataset) - test_telemetry.track_data.assert_called() - - @pytest.mark.parametrize("action", __CRUD__) - @pytest.mark.parametrize( - "setting_factory_config", - [ - ("vectors_settings", VectorSettingsFactory), - ("metadata_properties", IntegerMetadataPropertyFactory), - ("fields", TextFieldFactory), - ("questions", RankingQuestionFactory), - ("questions", RatingQuestionFactory), - ("questions", LabelSelectionQuestionFactory), - ("questions", MultiLabelSelectionQuestionFactory), - ("questions", SpanQuestionFactory), - ("questions", TextQuestionFactory), - ], - ) - async def test_track_crud_dataset_setting(self, test_telemetry: MagicMock, action: str, setting_factory_config): - setting_name, setting_factory = setting_factory_config - setting = await setting_factory.create_batch(size=1) - setting_config = {setting_name: setting} - dataset = await DatasetFactory.create(**setting_config) - await test_telemetry.track_crud_dataset(action=action, dataset=dataset) - test_telemetry.track_crud_dataset_setting.assert_called_with( - action=action, setting_name=setting_name, setting=setting[0], dataset=dataset - ) - test_telemetry.track_data.assert_called() diff --git a/argilla-server/tests/unit/conftest.py b/argilla-server/tests/unit/conftest.py index 866ad1b290..ea1713cf8d 100644 --- a/argilla-server/tests/unit/conftest.py +++ b/argilla-server/tests/unit/conftest.py @@ -11,11 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import contextlib -from typing import TYPE_CHECKING, Dict, Generator - -import pytest -import pytest_asyncio import uuid from typing import TYPE_CHECKING, Dict, Generator, Optional diff --git a/argilla-server/tests/unit/errors/test_api_errors.py b/argilla-server/tests/unit/errors/test_api_errors.py deleted file mode 100644 index b85d7791f7..0000000000 --- a/argilla-server/tests/unit/errors/test_api_errors.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest.mock import MagicMock - -import pytest -from argilla_server.api.schemas.v1.datasets import Dataset -from argilla_server.errors.base_errors import ( - EntityAlreadyExistsError, - EntityNotFoundError, - GenericServerError, - ServerError, -) -from argilla_server.errors.error_handler import APIErrorHandler -from fastapi import Request - -mock_request = Request(scope={"type": "http", "headers": {}}) - - -@pytest.mark.asyncio -class TestAPIErrorHandler: - @pytest.mark.skip - @pytest.mark.asyncio - @pytest.mark.parametrize( - ["error", "expected_event"], - [ - ( - EntityNotFoundError(name="mock-name", type="MockType"), - { - "accept-language": None, - "code": "argilla.api.errors::EntityNotFoundError", - "type": "MockType", - "user-agent": None, - }, - ), - ( - EntityAlreadyExistsError(name="mock-name", type=Dataset, workspace="mock-workspace"), - { - "accept-language": None, - "code": "argilla.api.errors::EntityAlreadyExistsError", - "type": "Dataset", - "user-agent": None, - }, - ), - ( - GenericServerError(RuntimeError("This is a mock error")), - { - "accept-language": None, - "code": "argilla.api.errors::GenericServerError", - "type": "builtins.RuntimeError", - "user-agent": None, - }, - ), - ( - ServerError(), - { - "accept-language": None, - "code": "argilla.api.errors::ServerError", - "user-agent": None, - }, - ), - ], - ) - async def test_track_error(self, test_telemetry: MagicMock, error, expected_event): - await APIErrorHandler.track_error(error, request=mock_request) - - user_agent = { - "code": error.code, - "user-agent": mock_request.headers.get("user-agent"), - "accept-language": mock_request.headers.get("accept-language"), - "type": error.__class__.__name__, - "count": 1, - } - user_agent.update(test_telemetry._system_info) - - test_telemetry.track_data.assert_called_once_with(topic="error/server", user_agent=user_agent) From c7c22f8dd89c65c69cc41275d7496f0e56c0dfb0 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Mon, 2 Sep 2024 14:11:03 +0200 Subject: [PATCH 55/63] Update argilla/mkdocs.yml --- argilla/mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla/mkdocs.yml b/argilla/mkdocs.yml index 5e6e0d839b..64d8f08a9c 100644 --- a/argilla/mkdocs.yml +++ b/argilla/mkdocs.yml @@ -184,7 +184,7 @@ nav: - FastAPI Server: - Server configuration: reference/argilla-server/configuration.md - Telemetry: - - Server Telemetry: reference/telemetry.md + - Server Telemetry: reference/argilla-server/telemetry.md - Community: - community/index.md - How to contribute?: community/contributor.md From 32f3baa2eec7baf09cae577d3ee203fb2e11ecb0 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Mon, 2 Sep 2024 14:13:21 +0200 Subject: [PATCH 56/63] chore: Revert doc change --- argilla/docs/reference/argilla-server/telemetry.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla/docs/reference/argilla-server/telemetry.md b/argilla/docs/reference/argilla-server/telemetry.md index d2e907f402..7cf081127e 100644 --- a/argilla/docs/reference/argilla-server/telemetry.md +++ b/argilla/docs/reference/argilla-server/telemetry.md @@ -45,7 +45,7 @@ The following usage and error information is reported: * The system’s release version, e.g. `Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020` * The machine type, e.g. `AMD64` * The underlying platform spec with as much useful information as possible. (eg. `macOS-10.16-x86_64-i386-64bit`) -* The type of deployment: `quickstart` or `server`, and if it is deployed on Hugging Face spaces. +* The type of deployment: `huggingface_space` or `server` * The dockerized deployment flag: `True` or `False` This is performed by registering counters for the create, read, update, delete (CRUD) and list operations for different API resources: From 0c9c608e84ad1afefb9f4abb5086d066cbf3b7e3 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 2 Sep 2024 14:16:48 +0200 Subject: [PATCH 57/63] chore: revert doc changes --- .../docs/reference/argilla-server/telemetry.md | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/argilla/docs/reference/argilla-server/telemetry.md b/argilla/docs/reference/argilla-server/telemetry.md index 7cf081127e..6126a54bd0 100644 --- a/argilla/docs/reference/argilla-server/telemetry.md +++ b/argilla/docs/reference/argilla-server/telemetry.md @@ -48,21 +48,6 @@ The following usage and error information is reported: * The type of deployment: `huggingface_space` or `server` * The dockerized deployment flag: `True` or `False` -This is performed by registering counters for the create, read, update, delete (CRUD) and list operations for different API resources: - -* Users -* Workspaces -* Datasets - * Settings - * Fields - * Questions - * Vector Settings - * Metadata Properties - * Records - * Suggestions - * Responses -* Raised server API errors - For transparency, you can inspect the source code where this is performed [here](https://github.com/argilla-io/argilla/argilla-server/src/argilla_server/telemetry.py). If you have any doubts, don't hesitate to join our [Discord channel](http://hf.co/join/discord) or open a GitHub issue. We'd be very happy to discuss how we can improve this. From 00b6caa8bad8c9eeb16bb6c2149fa9f05c6aa5e1 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 2 Sep 2024 14:18:40 +0200 Subject: [PATCH 58/63] chore: Remove unused attribute --- argilla-server/src/argilla_server/telemetry.py | 5 +---- argilla-server/tests/unit/commons/test_telemetry.py | 5 ----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry.py index aa246a9415..2450eab00a 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry.py @@ -34,9 +34,7 @@ @dataclasses.dataclass class TelemetryClient: - enable_telemetry: dataclasses.InitVar[bool] = settings.enable_telemetry - - def __post_init__(self, enable_telemetry: bool): + def __post_init__(self): self._system_info = { "system": platform.system(), "machine": platform.machine(), @@ -48,7 +46,6 @@ def __post_init__(self, enable_telemetry: bool): _LOGGER.info("System Info:") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - self.enable_telemetry = enable_telemetry async def track_data(self, topic: str, data: dict, include_system_info: bool = True, count: int = 1): library_name = "argilla/server" diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index 7643b92a06..ca864b850b 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -30,11 +30,6 @@ @pytest.mark.asyncio class TestSuiteTelemetry: - async def test_disable_telemetry(self): - telemetry_client = TelemetryClient(enable_telemetry=False) - - assert telemetry_client.enable_telemetry == False - async def test_track_api_request(self, test_telemetry: TelemetryClient, mocker: MockerFixture): mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") From 942455db7bce018a6f45e9550f1370350515234f Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Mon, 2 Sep 2024 19:07:45 +0200 Subject: [PATCH 59/63] [FEAT] `argilla server`: add user and server id on telemetry metrics (#5445) # Description This PR restores the server_id for telemetry purposes and also add the user.id and user.role when tracking API requests. **Type of change** - Improvement (change adding some improvement to an existing functionality) - Documentation update **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- .../security/authentication/provider.py | 27 ++++++++++++ .../src/argilla_server/telemetry/__init__.py | 16 +++++++ .../{telemetry.py => telemetry/_client.py} | 36 ++++++++-------- .../_telemetry.py => telemetry/_helpers.py} | 32 ++++++++++++++ .../tests/unit/commons/test_telemetry.py | 43 ++++++++++++++++--- argilla-server/tests/unit/conftest.py | 2 +- .../tests/unit/telemetry/__init__.py | 14 ++++++ .../unit/telemetry/test_telemetry_helpers.py | 43 +++++++++++++++++++ .../tests/unit/test_api_telemetry.py | 14 ++++++ 9 files changed, 202 insertions(+), 25 deletions(-) create mode 100644 argilla-server/src/argilla_server/telemetry/__init__.py rename argilla-server/src/argilla_server/{telemetry.py => telemetry/_client.py} (78%) rename argilla-server/src/argilla_server/{utils/_telemetry.py => telemetry/_helpers.py} (78%) create mode 100644 argilla-server/tests/unit/telemetry/__init__.py create mode 100644 argilla-server/tests/unit/telemetry/test_telemetry_helpers.py diff --git a/argilla-server/src/argilla_server/security/authentication/provider.py b/argilla-server/src/argilla_server/security/authentication/provider.py index 002faa73cc..de33a81a18 100644 --- a/argilla-server/src/argilla_server/security/authentication/provider.py +++ b/argilla-server/src/argilla_server/security/authentication/provider.py @@ -28,6 +28,32 @@ from argilla_server.security.authentication.userinfo import UserInfo +def set_request_user(request: Request, user: User): + """ + Set the request user in the request state. + + Parameters: + request: The request object. + user: The user. + + """ + + request.state.user = user + + +def get_request_user(request: Request) -> Optional[User]: + """ + Get the current user from the request. + + Parameters: + request (Request): The request object. + + Returns: + The user if available, None otherwise. + """ + return getattr(request.state, "user", None) + + class AuthenticationProvider: """Authentication provider for the API requests.""" @@ -58,6 +84,7 @@ async def get_current_user( if not user: raise UnauthorizedError() + set_request_user(request, user) return user async def _authenticate_request_user(self, db: AsyncSession, request: Request) -> Optional[UserInfo]: diff --git a/argilla-server/src/argilla_server/telemetry/__init__.py b/argilla-server/src/argilla_server/telemetry/__init__.py new file mode 100644 index 0000000000..64f5f3cc31 --- /dev/null +++ b/argilla-server/src/argilla_server/telemetry/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._client import TelemetryClient, get_telemetry_client # noqa +from ._helpers import * # noqa diff --git a/argilla-server/src/argilla_server/telemetry.py b/argilla-server/src/argilla_server/telemetry/_client.py similarity index 78% rename from argilla-server/src/argilla_server/telemetry.py rename to argilla-server/src/argilla_server/telemetry/_client.py index 2450eab00a..de8835701a 100644 --- a/argilla-server/src/argilla_server/telemetry.py +++ b/argilla-server/src/argilla_server/telemetry/_client.py @@ -16,17 +16,19 @@ import json import logging import platform +import uuid from fastapi import Request, Response from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ from argilla_server.api.errors.v1.exception_handlers import get_request_error -from argilla_server.settings import settings +from argilla_server.security.authentication.provider import get_request_user from argilla_server.utils._fastapi import resolve_endpoint_path_for_request -from argilla_server.utils._telemetry import ( +from argilla_server.telemetry._helpers import ( is_running_on_docker_container, server_deployment_type, + get_server_id, ) _LOGGER = logging.getLogger(__name__) @@ -34,8 +36,12 @@ @dataclasses.dataclass class TelemetryClient: + _server_id: uuid.UUID = dataclasses.field(init=False) + def __post_init__(self): + self._server_id = get_server_id() self._system_info = { + "server_id": self._server_id.urn, "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), @@ -47,16 +53,11 @@ def __post_init__(self): _LOGGER.info("System Info:") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - async def track_data(self, topic: str, data: dict, include_system_info: bool = True, count: int = 1): - library_name = "argilla/server" - topic = f"{library_name}/{topic}" - - user_agent = {**data} - if include_system_info: - user_agent.update(self._system_info) - if count is not None: - user_agent["count"] = count + async def track_data(self, topic: str, data: dict): + library_name = "argilla-server" + topic = f"argilla/server/{topic}" + user_agent = {**data, **self._system_info} send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) async def track_api_request(self, request: Request, response: Response) -> None: @@ -85,15 +86,16 @@ async def track_api_request(self, request: Request, response: Response) -> None: "response.status": str(response.status_code), } - if "Server-Timing" in response.headers: - duration_in_ms = response.headers["Server-Timing"] - duration_in_ms = duration_in_ms.removeprefix("total;dur=") - + if server_timing := response.headers.get("Server-Timing"): + duration_in_ms = server_timing.removeprefix("total;dur=") data["duration_in_milliseconds"] = duration_in_ms + if user := get_request_user(request=request): + data["user.id"] = str(user.id) + data["user.role"] = user.role + if response.status_code >= 400: - argilla_error: Exception = get_request_error(request=request) - if argilla_error: + if argilla_error := get_request_error(request=request): data["response.error_code"] = argilla_error.code # noqa await self.track_data(topic=topic, data=data) diff --git a/argilla-server/src/argilla_server/utils/_telemetry.py b/argilla-server/src/argilla_server/telemetry/_helpers.py similarity index 78% rename from argilla-server/src/argilla_server/utils/_telemetry.py rename to argilla-server/src/argilla_server/telemetry/_helpers.py index 8015ba5319..95a4986bf5 100644 --- a/argilla-server/src/argilla_server/utils/_telemetry.py +++ b/argilla-server/src/argilla_server/telemetry/_helpers.py @@ -13,11 +13,43 @@ # limitations under the License. import logging import os +import uuid +from uuid import UUID from argilla_server.integrations.huggingface.spaces import HUGGINGFACE_SETTINGS +from argilla_server.settings import settings _LOGGER = logging.getLogger(__name__) +_SERVER_ID_DAT_FILE = "server_id.dat" + + +def get_server_id() -> UUID: + """ + Returns the server ID. If it is not set, it generates a new one and stores it + in $ARGILLA_HOME/server_id.dat + + Returns: + UUID: The server ID + + """ + + server_id_file = os.path.join(settings.home_path, _SERVER_ID_DAT_FILE) + + if os.path.exists(server_id_file): + with open(server_id_file, "r") as f: + server_id = f.read().strip() + try: + return UUID(server_id) + except ValueError: + _LOGGER.warning(f"Invalid server ID in {server_id_file}. Generating a new one.") + + server_id = uuid.uuid4() + with open(server_id_file, "w") as f: + f.write(str(server_id)) + + return server_id + def server_deployment_type() -> str: """Returns the type of deployment of the server.""" diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index ca864b850b..ca89a57109 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -11,14 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import uuid from unittest.mock import MagicMock import pytest -from fastapi import Request, APIRouter -from fastapi.routing import APIRoute -from pytest_mock import mocker, MockerFixture +from fastapi import Request +from pytest_mock import MockerFixture from starlette.responses import JSONResponse from argilla_server.api.errors.v1.exception_handlers import set_request_error @@ -30,8 +28,35 @@ @pytest.mark.asyncio class TestSuiteTelemetry: + async def test_create_client_with_server_id(self, mocker: MockerFixture): + mock_server_id = uuid.uuid4() + mocker.patch("argilla_server.telemetry._client.get_server_id", return_value=mock_server_id) + + test_telemetry = TelemetryClient() + + assert "server_id" in test_telemetry._system_info + assert test_telemetry._system_info["server_id"] == mock_server_id.urn + + async def test_track_data(self, mocker: MockerFixture): + from argilla_server._version import __version__ as version + + mock = mocker.patch("argilla_server.telemetry._client.send_telemetry") + + telemetry = TelemetryClient() + + await telemetry.track_data("test_topic", {"test": "test"}) + + mock.assert_called_once_with( + topic="argilla/server/test_topic", + library_name="argilla-server", + library_version=version, + user_agent={"test": "test", **telemetry._system_info}, + ) + async def test_track_api_request(self, test_telemetry: TelemetryClient, mocker: MockerFixture): - mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + mocker.patch( + "argilla_server.telemetry._client.resolve_endpoint_path_for_request", return_value="/api/test/endpoint" + ) request = Request( scope={ @@ -60,7 +85,9 @@ async def test_track_api_request(self, test_telemetry: TelemetryClient, mocker: ) async def test_track_api_request_call_with_error(self, test_telemetry: TelemetryClient, mocker: MockerFixture): - mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + mocker.patch( + "argilla_server.telemetry._client.resolve_endpoint_path_for_request", return_value="/api/test/endpoint" + ) request = Request( scope={ @@ -87,7 +114,9 @@ async def test_track_api_request_call_with_error(self, test_telemetry: Telemetry async def test_track_api_request_call_with_error_and_exception( self, test_telemetry: TelemetryClient, mocker: MockerFixture ): - mocker.patch("argilla_server.telemetry.resolve_endpoint_path_for_request", return_value="/api/test/endpoint") + mocker.patch( + "argilla_server.telemetry._client.resolve_endpoint_path_for_request", return_value="/api/test/endpoint" + ) request = Request( scope={ diff --git a/argilla-server/tests/unit/conftest.py b/argilla-server/tests/unit/conftest.py index ea1713cf8d..a702be6c36 100644 --- a/argilla-server/tests/unit/conftest.py +++ b/argilla-server/tests/unit/conftest.py @@ -118,7 +118,7 @@ def test_telemetry(mocker: "MockerFixture") -> "TelemetryClient": setattr(real_telemetry, attr_name, wrapped) # Patch the _TELEMETRY_CLIENT to use the real_telemetry - mocker.patch("argilla_server.telemetry._TELEMETRY_CLIENT", new=real_telemetry) + mocker.patch("argilla_server.telemetry._client._TELEMETRY_CLIENT", new=real_telemetry) return real_telemetry diff --git a/argilla-server/tests/unit/telemetry/__init__.py b/argilla-server/tests/unit/telemetry/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/telemetry/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/telemetry/test_telemetry_helpers.py b/argilla-server/tests/unit/telemetry/test_telemetry_helpers.py new file mode 100644 index 0000000000..a6753a06be --- /dev/null +++ b/argilla-server/tests/unit/telemetry/test_telemetry_helpers.py @@ -0,0 +1,43 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from unittest.mock import patch, mock_open +from uuid import UUID + +import pytest +from pytest_mock import MockerFixture + +from argilla_server.settings import settings +from argilla_server.telemetry import get_server_id + + +class TestTelemetryHelpers: + def test_get_server_id_without_existing_file(self, mocker: MockerFixture): + mocker.patch.object(os.path, "exists", return_value=False) + + with patch("builtins.open", mock_open()) as mock: + server_id = get_server_id() + another_server_id = get_server_id() + + assert server_id != another_server_id + assert mock.call_count == 2 + mock.assert_called_with(os.path.join(settings.home_path, "server_id.dat"), "w") + + def test_get_server_id_with_existing_file(self, mocker: MockerFixture): + mocker.patch.object(os.path, "exists", return_value=True) + + with patch("builtins.open", mock_open(read_data="00000000-0000-0000-0000-000000000000")) as mock: + server_id = get_server_id() + assert server_id == UUID(int=0) diff --git a/argilla-server/tests/unit/test_api_telemetry.py b/argilla-server/tests/unit/test_api_telemetry.py index 9f10aa14ae..7f0984cfcd 100644 --- a/argilla-server/tests/unit/test_api_telemetry.py +++ b/argilla-server/tests/unit/test_api_telemetry.py @@ -17,6 +17,7 @@ from unittest.mock import MagicMock, ANY import pytest +from pytest_mock import MockerFixture from starlette.testclient import TestClient from argilla_server._app import create_server_app @@ -44,6 +45,19 @@ def test_track_api_request_call_on_error(self, test_telemetry: TelemetryClient): test_telemetry.track_api_request.assert_called_once() + def test_track_api_request_with_unexpected_telemetry_error( + self, test_telemetry: TelemetryClient, mocker: "MockerFixture" + ): + with mocker.patch.object(test_telemetry, "track_api_request", side_effect=Exception("mocked error")): + settings.enable_telemetry = True + + client = TestClient(create_server_app()) + + response = client.get("/api/v1/version") + + test_telemetry.track_api_request.assert_called_once() + assert response.status_code == 200 + def test_not_track_api_request_call_when_disabled_telemetry(self, test_telemetry: TelemetryClient): settings.enable_telemetry = False From 0b825a05d1425a64f66951f74682d0781a1ce93f Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Tue, 3 Sep 2024 09:52:50 +0200 Subject: [PATCH 60/63] =?UTF-8?q?[FEAT]=C2=A0`argilla=20server`:=20track?= =?UTF-8?q?=20servert=20startup=20(#5447)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds the track startup method defined in https://github.com/argilla-io/argilla/pull/5441 and include perstitent_storaged_enbled info as part of the system info **Type of change** - Improvement (change adding some improvement to an existing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla-server/src/argilla_server/_app.py | 36 ++++++++++++------- .../src/argilla_server/telemetry/_client.py | 20 ++++++++--- .../tests/unit/commons/test_telemetry.py | 26 ++++++++++++-- argilla-server/tests/unit/test_app.py | 22 +++++++++++- 4 files changed, 82 insertions(+), 22 deletions(-) diff --git a/argilla-server/src/argilla_server/_app.py b/argilla-server/src/argilla_server/_app.py index a980586d27..7833cfb995 100644 --- a/argilla-server/src/argilla_server/_app.py +++ b/argilla-server/src/argilla_server/_app.py @@ -48,9 +48,9 @@ @contextlib.asynccontextmanager async def app_lifespan(app: FastAPI): # See https://fastapi.tiangolo.com/advanced/events/#lifespan - show_telemetry_warning() await configure_database() await configure_search_engine() + track_server_startup() yield @@ -185,18 +185,28 @@ def _create_statics_folder(path_from): ) -def show_telemetry_warning(): - if settings.enable_telemetry: - message = "\n" - message += inspect.cleandoc( - "Argilla uses telemetry to report anonymous usage and error information. You\n" - "can know more about what information is reported at:\n\n" - " https://docs.argilla.io/latest/reference/argilla-server/telemetry/\n\n" - "Telemetry is currently enabled. If you want to disable it, you can configure\n" - "the environment variable before relaunching the server:\n\n" - f'{"#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1"}' - ) - _LOGGER.warning(message) +def track_server_startup() -> None: + """ + Track server startup telemetry event if telemetry is enabled + """ + if not settings.enable_telemetry: + return + + _show_telemetry_warning() + get_telemetry_client().track_server_startup() + + +def _show_telemetry_warning(): + message = "\n" + message += inspect.cleandoc( + "Argilla uses telemetry to report anonymous usage and error information. You\n" + "can know more about what information is reported at:\n\n" + " https://docs.argilla.io/latest/reference/argilla-server/telemetry/\n\n" + "Telemetry is currently enabled. If you want to disable it, you can configure\n" + "the environment variable before relaunching the server:\n\n" + f'{"#set HF_HUB_DISABLE_TELEMETRY=1" if os.name == "nt" else "$>export HF_HUB_DISABLE_TELEMETRY=1"}' + ) + _LOGGER.warning(message) async def _create_oauth_allowed_workspaces(db: AsyncSession): diff --git a/argilla-server/src/argilla_server/telemetry/_client.py b/argilla-server/src/argilla_server/telemetry/_client.py index de8835701a..ada1544739 100644 --- a/argilla-server/src/argilla_server/telemetry/_client.py +++ b/argilla-server/src/argilla_server/telemetry/_client.py @@ -17,12 +17,14 @@ import logging import platform import uuid +from typing import Optional from fastapi import Request, Response from huggingface_hub.utils import send_telemetry from argilla_server._version import __version__ from argilla_server.api.errors.v1.exception_handlers import get_request_error +from argilla_server.integrations.huggingface.spaces import HUGGINGFACE_SETTINGS from argilla_server.security.authentication.provider import get_request_user from argilla_server.utils._fastapi import resolve_endpoint_path_for_request from argilla_server.telemetry._helpers import ( @@ -48,16 +50,17 @@ def __post_init__(self): "sys_version": platform.version(), "deployment": server_deployment_type(), "docker": is_running_on_docker_container(), + "persistent_storage_enabled": HUGGINGFACE_SETTINGS.space_persistent_storage_enabled, } _LOGGER.info("System Info:") _LOGGER.info(f"Context: {json.dumps(self._system_info, indent=2)}") - async def track_data(self, topic: str, data: dict): + def track_data(self, topic: str, data: Optional[dict] = None): library_name = "argilla-server" topic = f"argilla/server/{topic}" - user_agent = {**data, **self._system_info} + user_agent = {**(data or {}), **self._system_info} send_telemetry(topic=topic, library_name=library_name, library_version=__version__, user_agent=user_agent) async def track_api_request(self, request: Request, response: Response) -> None: @@ -76,8 +79,6 @@ async def track_api_request(self, request: Request, response: Response) -> None: if endpoint_path is None: return - topic = f"endpoints" - data = { "endpoint": f"{request.method} {endpoint_path}", "request.user-agent": request.headers.get("user-agent"), @@ -98,7 +99,16 @@ async def track_api_request(self, request: Request, response: Response) -> None: if argilla_error := get_request_error(request=request): data["response.error_code"] = argilla_error.code # noqa - await self.track_data(topic=topic, data=data) + self.track_data(topic="endpoints", data=data) + + def track_server_startup(self) -> None: + """ + This method is used to track the launch of the server. + + Returns: + None + """ + self.track_data(topic="startup") _TELEMETRY_CLIENT = TelemetryClient() diff --git a/argilla-server/tests/unit/commons/test_telemetry.py b/argilla-server/tests/unit/commons/test_telemetry.py index ca89a57109..c78ed16c29 100644 --- a/argilla-server/tests/unit/commons/test_telemetry.py +++ b/argilla-server/tests/unit/commons/test_telemetry.py @@ -21,6 +21,7 @@ from argilla_server.api.errors.v1.exception_handlers import set_request_error from argilla_server.errors import ServerError +from argilla_server.integrations.huggingface.spaces import HUGGINGFACE_SETTINGS from argilla_server.telemetry import TelemetryClient mock_request = Request(scope={"type": "http", "headers": {}}) @@ -37,14 +38,29 @@ async def test_create_client_with_server_id(self, mocker: MockerFixture): assert "server_id" in test_telemetry._system_info assert test_telemetry._system_info["server_id"] == mock_server_id.urn - async def test_track_data(self, mocker: MockerFixture): + def test_create_client_with_persistent_storage_enabled(self): + HUGGINGFACE_SETTINGS.space_persistent_storage_enabled = True + + test_telemetry = TelemetryClient() + + assert "persistent_storage_enabled" in test_telemetry._system_info + assert test_telemetry._system_info["persistent_storage_enabled"] is True + + def test_create_client_with_persistent_storage_disabled(self): + HUGGINGFACE_SETTINGS.space_persistent_storage_enabled = False + + test_telemetry = TelemetryClient() + + assert "persistent_storage_enabled" in test_telemetry._system_info + assert test_telemetry._system_info["persistent_storage_enabled"] is False + + def test_track_data(self, mocker: MockerFixture): from argilla_server._version import __version__ as version mock = mocker.patch("argilla_server.telemetry._client.send_telemetry") telemetry = TelemetryClient() - - await telemetry.track_data("test_topic", {"test": "test"}) + telemetry.track_data("test_topic", {"test": "test"}) mock.assert_called_once_with( topic="argilla/server/test_topic", @@ -142,3 +158,7 @@ async def test_track_api_request_call_with_error_and_exception( "response.error_code": "argilla.api.errors::ServerError", }, ) + + def test_track_server_startup(self, test_telemetry: TelemetryClient): + test_telemetry.track_server_startup() + test_telemetry.track_data.assert_called_once_with(topic="startup") diff --git a/argilla-server/tests/unit/test_app.py b/argilla-server/tests/unit/test_app.py index 7472f661bf..48fbc82bff 100644 --- a/argilla-server/tests/unit/test_app.py +++ b/argilla-server/tests/unit/test_app.py @@ -13,12 +13,19 @@ # limitations under the License. from typing import cast from unittest import mock +from unittest.mock import MagicMock import pytest +from pytest_mock import MockerFixture from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from argilla_server._app import create_server_app, configure_database, _create_oauth_allowed_workspaces +from argilla_server._app import ( + create_server_app, + configure_database, + _create_oauth_allowed_workspaces, + track_server_startup, +) from argilla_server.models import Workspace from argilla_server.security.authentication.oauth2 import OAuth2Settings from argilla_server.security.authentication.oauth2.settings import AllowedWorkspace @@ -26,6 +33,7 @@ from starlette.routing import Mount from starlette.testclient import TestClient +from argilla_server.telemetry import TelemetryClient from tests.factories import WorkspaceFactory @@ -118,3 +126,15 @@ async def test_create_workspaces_with_existing_workspaces(self, db: AsyncSession workspaces = (await db.scalars(select(Workspace))).all() assert len(workspaces) == 1 + + def test_track_telemetry_on_startup(self, test_settings: Settings, test_telemetry: TelemetryClient): + settings.enable_telemetry = True + + track_server_startup() + test_telemetry.track_server_startup.assert_called_once() + + def test_track_telemetry_on_startup_disabled(self, test_settings: Settings, test_telemetry: TelemetryClient): + settings.enable_telemetry = False + + track_server_startup() + test_telemetry.track_server_startup.assert_not_called() From d93c27b5eae58ef6374429f58a5d695920677a2d Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Tue, 3 Sep 2024 10:00:53 +0200 Subject: [PATCH 61/63] chore: Align the user.id registration --- argilla-server/src/argilla_server/telemetry/_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-server/src/argilla_server/telemetry/_client.py b/argilla-server/src/argilla_server/telemetry/_client.py index ada1544739..a9c677bc84 100644 --- a/argilla-server/src/argilla_server/telemetry/_client.py +++ b/argilla-server/src/argilla_server/telemetry/_client.py @@ -92,7 +92,7 @@ async def track_api_request(self, request: Request, response: Response) -> None: data["duration_in_milliseconds"] = duration_in_ms if user := get_request_user(request=request): - data["user.id"] = str(user.id) + data["user.id"] = user.id.urn data["user.role"] = user.role if response.status_code >= 400: From cd45e6b7de43928e349e1c84a35ec991bed9f3d3 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Tue, 3 Sep 2024 10:01:14 +0200 Subject: [PATCH 62/63] chore: review docs --- argilla/docs/reference/argilla-server/telemetry.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/argilla/docs/reference/argilla-server/telemetry.md b/argilla/docs/reference/argilla-server/telemetry.md index 6126a54bd0..cd8c57ac90 100644 --- a/argilla/docs/reference/argilla-server/telemetry.md +++ b/argilla/docs/reference/argilla-server/telemetry.md @@ -35,10 +35,11 @@ We do not collect any piece of information related to the source data you store The following usage and error information is reported: -* The code of the raised error and the entity type related to the error, if any (Dataset, Workspace,...) +* The code of the raised error * The `user-agent` and `accept-language` http headers * Task name and number of records for bulk operations * An anonymous generated user uuid +* An anonymous generated server uuid * The Argilla version running the server * The Python version, e.g. `3.8.13` * The system/OS name, such as `Linux`, `Darwin`, `Windows` From 0c51124cdd1ec850e8ed353381ccf0a38bf4a24a Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Tue, 3 Sep 2024 10:21:42 +0200 Subject: [PATCH 63/63] chore: Update CHANGELOG --- argilla-server/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index ad03419c32..637ba51202 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -20,6 +20,10 @@ These are the section headers that we use: - Added new endpoint `GET /api/v1/datsets/:dataset_id/users/progress` to compute the users progress. ([#5367](https://github.com/argilla-io/argilla/pull/5367)) +### Changed + +- Change the telemetry module to use the HuggingFace telemetry client ([#5218](https://github.com/argilla-io/argilla/pull/5218)) + ### Fixed - Fixed response duplicate checking ([#5357](https://github.com/argilla-io/argilla/issues/5357))