Source code for google_pandas_load.loader_quick_setup
from google.cloud import bigquery, storage
from google.auth.credentials import Credentials
from google_pandas_load.loader import Loader
from typing import Optional
[docs]class LoaderQuickSetup(Loader):
"""
The purpose of this class is to quickly set up a loader.
An instance of LoaderQuickSetup is simply an instance of the base class
built with the following arguments:
::
bq_client=bq_client
dataset_id=dataset_id
gs_client=gs_client
bucket_name=bucket_name
bucket_dir_path=bucket_dir_path
local_dir_path=local_dir_path
separator=separator
chunk_size=chunk_size
timeout=timeout
where
::
bq_client = google.cloud.bigquery.Client(
project=project_id,
credentials=credentials)
dataset_id = project_id + '.' + dataset_name
gs_client = google.cloud.storage.Client(
project=project_id,
credentials=credentials)
Note:
If the optional argument bucket_dir_path is not given, data will be
stored at the root of the bucket. It is a good practice to specify this
argument so that data is stored in a defined bucket directory.
Args:
project_id (str, optional): The project id.
dataset_name (str, optional): The dataset name.
bucket_name (str, optional): The bucket name.
bucket_dir_path (str, optional): See base class.
credentials (google.auth.credentials.Credentials): Credentials used to
build the bq_client and the gs_client. If not passed, falls back to
the default inferred from the environment.
local_dir_path (str, optional): See base class.
separator (str, optional): See base class.
chunk_size (int, optional): See base class.
timeout (int, optional): See base class.
"""
def __init__(
self,
project_id: Optional[str] = None,
dataset_name: Optional[str] = None,
bucket_name: Optional[str] = None,
bucket_dir_path: Optional[str] = None,
credentials: Optional[Credentials] = None,
local_dir_path: Optional[str] = None,
separator: Optional[str] = '|',
chunk_size: Optional[int] = 2**28,
timeout: Optional[int] = 60):
self._project_id = project_id
self._check_project_id_dataset_name_bucket_name_consistency(
dataset_name, bucket_name)
bq_client = None
dataset_id = None
gs_client = None
if dataset_name is not None:
bq_client = bigquery.Client(
project=self._project_id, credentials=credentials)
dataset_id = f'{self._project_id}.{dataset_name}'
if bucket_name is not None:
gs_client = storage.Client(
project=self._project_id, credentials=credentials)
super().__init__(
bq_client=bq_client,
dataset_id=dataset_id,
gs_client=gs_client,
bucket_name=bucket_name,
bucket_dir_path=bucket_dir_path,
local_dir_path=local_dir_path,
separator=separator,
chunk_size=chunk_size,
timeout=timeout)
@property
def project_id(self) -> str:
"""str: The project_id."""
return self._project_id
def _check_project_id_dataset_name_bucket_name_consistency(
self, dataset_name, bucket_name):
c1 = self._project_id is None
c2 = dataset_name is None
c3 = bucket_name is None
if not c1 and c2 and c3:
msg = ('At least one of dataset_name or bucket_name '
'must be provided if project_id is provided')
raise ValueError(msg)
if not c2 and c1:
msg = 'project_id must be provided if dataset_name is provided'
raise ValueError(msg)
if not c3 and c1:
msg = 'project_id must be provided if bucket_name is provided'
raise ValueError(msg)