diff --git a/notebooks/Fixtures/Fixtures.ipynb b/notebooks/Fixtures/Fixtures.ipynb new file mode 100644 index 00000000000..46eb2cfbaf7 --- /dev/null +++ b/notebooks/Fixtures/Fixtures.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# What are main resources that need fixtures ?\n", + "# Launch Server - Datasite and Gateway\n", + "# Datasets\n", + "# Users - DO, DS, Admin\n", + "# UserCodes\n", + "# Create Jobs\n", + "# Clean Stores" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "from fixture_utils import DatasetType\n", + "from fixture_utils import ServiceRole\n", + "from fixture_utils import SyftFixture\n", + "\n", + "# syft absolute\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\"test-domain\", dev_mode=False, reset=True)\n", + "root_client = server.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "config = {\n", + " \"user\": [\n", + " {\"role\": ServiceRole.DATA_OWNER, \"to_create\": 1},\n", + " {\"role\": ServiceRole.DATA_SCIENTIST, \"to_create\": 1},\n", + " ],\n", + " \"dataset\": [{\"type\": DatasetType.TABULAR, \"to_create\": 1}],\n", + " \"user_code\": {\n", + " \"to_create\": 5,\n", + " \"via_project\": False,\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "fixture = SyftFixture(config, root_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "fixture.create()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "fixture.info()" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "#### Create code via project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "config_with_project = {\n", + " \"user\": [\n", + " {\"role\": ServiceRole.DATA_OWNER, \"to_create\": 1},\n", + " {\"role\": ServiceRole.DATA_SCIENTIST, \"to_create\": 1},\n", + " ],\n", + " \"dataset\": [{\"type\": DatasetType.TABULAR, \"to_create\": 1}],\n", + " \"user_code\": {\n", + " \"to_create\": 5,\n", + " \"via_project\": True,\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "fixture = SyftFixture(config_with_project, root_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "fixture.create()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "root_client.users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "root_client.datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "root_client.requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "root_client.code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Fixtures/fixture_utils.py b/notebooks/Fixtures/fixture_utils.py new file mode 100644 index 00000000000..f5730085827 --- /dev/null +++ b/notebooks/Fixtures/fixture_utils.py @@ -0,0 +1,226 @@ +# stdlib +import ast +from collections.abc import Callable +from dataclasses import dataclass +from enum import Enum +from random import choice + +# third party +from faker import Faker +import pandas as pd +from tqdm import tqdm + +# syft absolute +import syft as sy +from syft import autocache +from syft.client.client import SyftClient +from syft.service.api.api import register_fn_in_linecache +from syft.service.code.user_code import syft_function_single_use +from syft.service.project.project import Project +from syft.service.response import SyftError +from syft.service.user.user import UserView +from syft.service.user.user_roles import ServiceRole +from syft.types.base import SyftBaseModel + + +class BaseFixtureConfig(SyftBaseModel): + class Config: + arbitrary_types_allowed = True + + to_create: int + created: int = 0 + + +class DatasetType(Enum): + """Type of datasets one can create.""" + + IMAGE = "image" + TABULAR = "tabular" + + +class UserFixtureConfig(BaseFixtureConfig): + role: ServiceRole + + +class DatasetFixtureConfig(BaseFixtureConfig): + type: DatasetType + + +class CodeFixtureConfig(BaseFixtureConfig): + via_project: bool + # uni_distribution: bool + + +@dataclass +class UserEmailPassword: + email: str + password: str + + +class FixtureConfig(SyftBaseModel): + user: list[UserFixtureConfig] + dataset: list[DatasetFixtureConfig] + user_code: CodeFixtureConfig + + +class SyftFixture: + """ + A class to create sample data for the syft platform. + """ + + def __init__(self, config: dict, root_client: SyftClient) -> None: + self.config = FixtureConfig(**config) + self.root_client = root_client + self.faker = Faker() + + def _add_users(self): + for user_config in tqdm(self.config.user, desc="Users", position=0): + users_to_create = user_config.to_create - user_config.created + for _ in range(users_to_create): + self._add_user(role=user_config.role) + user_config.created += 1 + + def _add_user(self, role: ServiceRole): + password = self.faker.password() + + user = self.root_client.api.services.user.create( + name=self.faker.name(), + email=self.faker.email(), + password=password, + password_verify=password, + institution=self.faker.company(), + website=self.faker.url(), + role=role, + ) + assert isinstance(user, UserView) + + @staticmethod + def _get_sample_tabular_data(split_ratio: float = 0.8): + tabular_data_url = autocache( + "https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True" + ) + tabular_data = pd.read_csv(tabular_data_url) + stringcols = tabular_data.select_dtypes(include="object").columns + tabular_data[stringcols] = tabular_data[stringcols].fillna("").astype(str) + columns = tabular_data.shape[0] + private_index = int(split_ratio * columns) + private_data = tabular_data[:private_index] + mock_data = tabular_data[private_index:] + return private_data, mock_data + + def _get_sample_data(self, type: DatasetType) -> tuple: + if type == DatasetType.TABULAR: + return self._get_sample_tabular_data() + else: + raise NotImplementedError + + def _add_dataset(self, data_type: DatasetType): + dataset_name = f"{self.faker.first_name()}-Dataset" + private_data, mock_data = self._get_sample_data(data_type) + asset = sy.Asset( + name=f"{dataset_name}-{self.faker.uuid4()[:6]}", + description=self.faker.text(), + data=private_data, + mock=mock_data, + ) + + dataset = sy.Dataset( + name=dataset_name, + description=self.faker.text(), + url=self.faker.url(), + asset_list=[asset], + ) + res = self.root_client.upload_dataset(dataset) + assert not isinstance(res, sy.SyftError), res + + def _add_datasets(self): + for dataset_config in tqdm(self.config.dataset, desc="Datasets:", position=0): + datasets_to_create = dataset_config.to_create - dataset_config.created + for _ in range(datasets_to_create): + self._add_dataset(data_type=dataset_config.type) + dataset_config.created += 1 + + def _gen_sample_func(self, syft_decorator: Callable): + func_name = self.faker.pystr(min_chars=None, max_chars=12) + func_str = f'def {func_name}() -> str:\n return "Hello -> {func_name}"\n' + src = ast.unparse(ast.parse(func_str).body[0]) + raw_byte_code = compile(src, func_name, "exec") + register_fn_in_linecache(func_name, src) + exec(raw_byte_code) + new_func = eval(func_name, None, locals()) + return syft_decorator()(new_func) + + def _submit_user_code(self, via_project: bool, ds_client: SyftClient): + new_func = self._gen_sample_func(syft_decorator=syft_function_single_use) + if via_project: + new_project = sy.Project( + name=f"Project-{self.faker.name()}", + description=self.faker.text(), + members=[ds_client], + ) + res = new_project.create_code_request(new_func, ds_client) + project = new_project.send() + assert isinstance(project, Project) + else: + res = self.root_client.code.request_code_execution( + new_func, reason=self.faker.text() + ) + assert not isinstance(res, SyftError), res + + def _add_user_code(self): + ds_users = self.root_client.users.search(role=ServiceRole.DATA_SCIENTIST) + + user_client_map = {} + + if len(ds_users) == 0: + print( + "No Data scientist available to add user code to. " + "Please create some users with Data Scientist role." + ) + return + + user_code_to_create = ( + self.config.user_code.to_create - self.config.user_code.created + ) + print(f"Creating {user_code_to_create} user code.") + for _ in range(user_code_to_create): + # Randomly choose a data scientist + ds_user = choice(ds_users) + if ds_user.email not in user_client_map: + user_client_map[ds_user.email] = self.root_client.login_as( + email=ds_user.email + ) + + # Get the DS client + ds_client = user_client_map[ds_user.email] + + # Create user code + self._submit_user_code( + self.config.user_code.via_project, + ds_client=ds_client, + ) + self.config.user_code.created += 1 + + def create(self) -> str: + self._add_users() + self._add_datasets() + self._add_user_code() + print(self.info()) + + def info(self): + _repr_ = "\nUsers: " + + for user_conf in self.config.user: + _repr_ += ( + f"\n\t{user_conf.role.name}: {user_conf.created}/{user_conf.to_create}" + ) + + _repr_ += "\nDatasets:" + for dataset_conf in self.config.dataset: + _repr_ += f"\n\t{dataset_conf.type.name}: {dataset_conf.created}/{dataset_conf.to_create}" + + user_code_conf = self.config.user_code + _repr_ += f"\nUserCode: {user_code_conf.created}/{user_code_conf.to_create}" + _repr_ += f"\n\tVia Project: {user_code_conf.via_project}, Distribution: Random" + + print(_repr_) diff --git a/packages/syft/src/syft/client/datasite_client.py b/packages/syft/src/syft/client/datasite_client.py index 6d25959b9fa..becfa35d1ce 100644 --- a/packages/syft/src/syft/client/datasite_client.py +++ b/packages/syft/src/syft/client/datasite_client.py @@ -133,7 +133,7 @@ def upload_dataset(self, dataset: CreateDataset) -> SyftSuccess | SyftError: prompt_warning_message(message=message, confirm=True) with tqdm( - total=len(dataset.asset_list), colour="green", desc="Uploading" + total=len(dataset.asset_list), colour="green", desc="Uploading", position=1 ) as pbar: for asset in dataset.asset_list: try: diff --git a/packages/syft/src/syft/protocol/protocol_version.json b/packages/syft/src/syft/protocol/protocol_version.json index aec411969b2..96f4d0adafe 100644 --- a/packages/syft/src/syft/protocol/protocol_version.json +++ b/packages/syft/src/syft/protocol/protocol_version.json @@ -37,6 +37,13 @@ "hash": "b087d0c62b7d304c6ca80e4fb0e8a7f2a444be8f8cba57490dc09aeb98033105", "action": "add" } + }, + "UserSearch": { + "2": { + "version": 2, + "hash": "4b85bcbffc077a18be805960cc6d658225270875ccea1fa2b46370988ea0b32d", + "action": "add" + } } } } diff --git a/packages/syft/src/syft/service/user/user.py b/packages/syft/src/syft/service/user/user.py index 9bf538d3386..30c6b265e0d 100644 --- a/packages/syft/src/syft/service/user/user.py +++ b/packages/syft/src/syft/service/user/user.py @@ -218,7 +218,7 @@ class UserCreate(SyftObject): @serializable() -class UserSearch(PartialSyftObject): +class UserSearchV1(PartialSyftObject): __canonical_name__ = "UserSearch" __version__ = SYFT_OBJECT_VERSION_1 @@ -228,6 +228,18 @@ class UserSearch(PartialSyftObject): name: str +@serializable() +class UserSearch(PartialSyftObject): + __canonical_name__ = "UserSearch" + __version__ = SYFT_OBJECT_VERSION_2 + + id: UID + email: EmailStr + verify_key: SyftVerifyKey + name: str + role: ServiceRole + + @serializable() class UserView(SyftObject): __canonical_name__ = "UserView" @@ -443,3 +455,13 @@ def userv1_to_user_verify() -> list[Callable]: @transform(User, UserPrivateKey) def user_to_user_verify() -> list[Callable]: return [keep(["email", "signing_key", "id", "role"])] + + +@migrate(UserSearch, UserSearchV1) +def downgrade_user_search() -> list[Callable]: + return [drop("role")] + + +@migrate(UserSearchV1, UserSearch) +def upgrade_user_search() -> list[Callable]: + return [] diff --git a/packages/syft/src/syft/service/user/user_service.py b/packages/syft/src/syft/service/user/user_service.py index fe48010b4f4..dd298ca1e50 100644 --- a/packages/syft/src/syft/service/user/user_service.py +++ b/packages/syft/src/syft/service/user/user_service.py @@ -592,6 +592,26 @@ def update( return user.to(UserView) + @service_method( + path="user.key_for_email", name="key_for_email", roles=ADMIN_ROLE_LEVEL + ) + def key_for_email( + self, context: AuthedServiceContext, email: str + ) -> UserPrivateKey | SyftError: + result = self.stash.get_by_email(credentials=context.credentials, email=email) + + if result.is_err(): + return SyftError( + message=f"Failed to get user: {email}. Error: {result.err()}" + ) + + user = result.ok() + + if user is None: + return SyftError(message=f"No user found with email: {email}") + + return user.to(UserPrivateKey) + def get_target_object( self, credentials: SyftVerifyKey, uid: UID ) -> User | SyftError: