Spaces:
Runtime error
Runtime error
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| import os | |
| from typing import TYPE_CHECKING, List, Optional | |
| if TYPE_CHECKING: | |
| from apify_client.clients import DatasetClient | |
| from camel.utils import api_keys_required | |
| class Apify: | |
| r"""Apify is a platform that allows you to automate any web workflow. | |
| Args: | |
| api_key (Optional[str]): API key for authenticating with the Apify API. | |
| """ | |
| def __init__( | |
| self, | |
| api_key: Optional[str] = None, | |
| ) -> None: | |
| from apify_client import ApifyClient | |
| self._api_key = api_key or os.environ.get("APIFY_API_KEY") | |
| self.client = ApifyClient(token=self._api_key) | |
| def run_actor( | |
| self, | |
| actor_id: str, | |
| run_input: Optional[dict] = None, | |
| content_type: Optional[str] = None, | |
| build: Optional[str] = None, | |
| max_items: Optional[int] = None, | |
| memory_mbytes: Optional[int] = None, | |
| timeout_secs: Optional[int] = None, | |
| webhooks: Optional[list] = None, | |
| wait_secs: Optional[int] = None, | |
| ) -> Optional[dict]: | |
| r"""Run an actor on the Apify platform. | |
| Args: | |
| actor_id (str): The ID of the actor to run. | |
| run_input (Optional[dict]): The input data for the actor. Defaults | |
| to `None`. | |
| content_type (str, optional): The content type of the input. | |
| build (str, optional): Specifies the Actor build to run. It can be | |
| either a build tag or build number. By default, the run uses | |
| the build specified in the default run configuration for the | |
| Actor (typically latest). | |
| max_items (int, optional): Maximum number of results that will be | |
| returned by this run. If the Actor is charged per result, you | |
| will not be charged for more results than the given limit. | |
| memory_mbytes (int, optional): Memory limit for the run, in | |
| megabytes. By default, the run uses a memory limit specified in | |
| the default run configuration for the Actor. | |
| timeout_secs (int, optional): Optional timeout for the run, in | |
| seconds. By default, the run uses timeout specified in the | |
| default run configuration for the Actor. | |
| webhooks (list, optional): Optional webhooks | |
| (https://docs.apify.com/webhooks) associated with the Actor | |
| run, which can be used to receive a notification, e.g. when the | |
| Actor finished or failed. If you already have a webhook set up | |
| for the Actor, you do not have to add it again here. | |
| wait_secs (int, optional): The maximum number of seconds the server | |
| waits for finish. If not provided, waits indefinitely. | |
| Returns: | |
| Optional[dict]: The output data from the actor if successful. | |
| # please use the 'defaultDatasetId' to get the dataset | |
| Raises: | |
| RuntimeError: If the actor fails to run. | |
| """ | |
| try: | |
| return self.client.actor(actor_id).call( | |
| run_input=run_input, | |
| content_type=content_type, | |
| build=build, | |
| max_items=max_items, | |
| memory_mbytes=memory_mbytes, | |
| timeout_secs=timeout_secs, | |
| webhooks=webhooks, | |
| wait_secs=wait_secs, | |
| ) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to run actor {actor_id}: {e}") from e | |
| def get_dataset_client( | |
| self, | |
| dataset_id: str, | |
| ) -> "DatasetClient": | |
| r"""Get a dataset client from the Apify platform. | |
| Args: | |
| dataset_id (str): The ID of the dataset to get the client for. | |
| Returns: | |
| DatasetClient: The dataset client. | |
| Raises: | |
| RuntimeError: If the dataset client fails to be retrieved. | |
| """ | |
| try: | |
| return self.client.dataset(dataset_id) | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"Failed to get dataset {dataset_id}: {e}" | |
| ) from e | |
| def get_dataset( | |
| self, | |
| dataset_id: str, | |
| ) -> Optional[dict]: | |
| r"""Get a dataset from the Apify platform. | |
| Args: | |
| dataset_id (str): The ID of the dataset to get. | |
| Returns: | |
| dict: The dataset. | |
| Raises: | |
| RuntimeError: If the dataset fails to be retrieved. | |
| """ | |
| try: | |
| return self.get_dataset_client(dataset_id).get() | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"Failed to get dataset {dataset_id}: {e}" | |
| ) from e | |
| def update_dataset( | |
| self, | |
| dataset_id: str, | |
| name: str, | |
| ) -> dict: | |
| r"""Update a dataset on the Apify platform. | |
| Args: | |
| dataset_id (str): The ID of the dataset to update. | |
| name (str): The new name for the dataset. | |
| Returns: | |
| dict: The updated dataset. | |
| Raises: | |
| RuntimeError: If the dataset fails to be updated. | |
| """ | |
| try: | |
| return self.get_dataset_client(dataset_id).update(name=name) | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"Failed to update dataset {dataset_id}: {e}" | |
| ) from e | |
| def get_dataset_items( | |
| self, | |
| dataset_id: str, | |
| ) -> List: | |
| r"""Get items from a dataset on the Apify platform. | |
| Args: | |
| dataset_id (str): The ID of the dataset to get items from. | |
| Returns: | |
| list: The items in the dataset. | |
| Raises: | |
| RuntimeError: If the items fail to be retrieved. | |
| """ | |
| try: | |
| items = self.get_dataset_client(dataset_id).list_items().items | |
| return items | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"Failed to get dataset items {dataset_id}: {e}" | |
| ) from e | |
| def get_datasets( | |
| self, | |
| unnamed: Optional[bool] = None, | |
| limit: Optional[int] = None, | |
| offset: Optional[int] = None, | |
| desc: Optional[bool] = None, | |
| ) -> List[dict]: | |
| r"""Get all named datasets from the Apify platform. | |
| Args: | |
| unnamed (bool, optional): Whether to include unnamed key-value | |
| stores in the list | |
| limit (int, optional): How many key-value stores to retrieve | |
| offset (int, optional): What key-value store to include as first | |
| when retrieving the list | |
| desc (bool, optional): Whether to sort the key-value stores in | |
| descending order based on their modification date | |
| Returns: | |
| List[dict]: The datasets. | |
| Raises: | |
| RuntimeError: If the datasets fail to be retrieved. | |
| """ | |
| try: | |
| return ( | |
| self.client.datasets() | |
| .list(unnamed=unnamed, limit=limit, offset=offset, desc=desc) | |
| .items | |
| ) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to get datasets: {e}") from e | |