skyplane.api.transfer_job#

Classes

Chunker(src_iface, dst_iface, transfer_config)

class that chunks the original files and makes the chunk requests

CopyJob(src_path, dst_path[, recursive, ...])

copy job that copies the source objects to the destination

SyncJob(src_path, dst_path[, recursive, ...])

sync job that copies the source objects that does not exist in the destination bucket to the destination

TransferJob(src_path, dst_path[, recursive, ...])

transfer job with transfer configurations

class skyplane.api.transfer_job.Chunker(src_iface, dst_iface, transfer_config, concurrent_multipart_chunk_threads=64)[source]#

Bases: object

class that chunks the original files and makes the chunk requests

Parameters:
  • src_iface (ObjectStoreInterface) – source object store interface

  • dst_iface (ObjectStoreInterface) – destination object store interface

  • transfer_config (TransferConfig) – the configuration during the transfer

  • concurrent_multipart_chunk_threads (int) – the maximum number of concurrent threads that dispatch multipart chunk requests (default: 64)

chunk(transfer_pair_generator)[source]#

Break transfer list into chunks.

Parameters:

transfer_pair_generator (Generator) – generator of pairs of objects to transfer

Return type:

Generator[Chunk, None, None]

static map_object_key_prefix(source_prefix, source_key, dest_prefix, recursive=False)[source]#

map_object_key_prefix computes the mapping of a source key in a bucket prefix to the destination. Users invoke a transfer via the CLI; aws s3 cp s3://bucket/source_prefix s3://bucket/dest_prefix. The CLI will query the object store for all objects in the source prefix and map them to the destination prefix using this function.

Parameters:
  • source_prefix (string) – source bucket folder prefix

  • source_key (string) – source file key to map in the folder prefix

  • destination_prefix (string) – destination bucket folder prefix

  • recursive (bool) – whether to copy all the objects matching the pattern (default: False)

to_chunk_requests(gen_in)[source]#

Converts a generator of chunks to a generator of chunk requests.

Parameters:

gen_in (Generator) – generator that generates chunk requests

Return type:

Generator[ChunkRequest, None, None]

transfer_pair_generator(src_prefix, dst_prefix, recursive, prefilter_fn=None)[source]#

Query source region and return list of objects to transfer.

Parameters:
  • src_prefix (string) – source bucket folder prefix

  • dst_prefix (string) – destination bucket folder prefix

  • recursive (bool) – if true, will copy objects at folder prefix recursively

  • prefilter_fn (Callable[[ObjectStoreObject], bool]) – filters out objects whose prefixes do not match the filter function (default: None)

Return type:

Generator[Tuple[ObjectStoreObject, ObjectStoreObject], None, None]

class skyplane.api.transfer_job.CopyJob(src_path, dst_path, recursive=False, requester_pays=False, transfer_list=<factory>, multipart_transfer_list=<factory>)[source]#

Bases: TransferJob

copy job that copies the source objects to the destination

Parameters:
  • transfer_list (list) – transfer list for later verification

  • multipart_transfer_list (list) – multipart transfer list for later verification

dispatch(dataplane, transfer_config, dispatch_batch_size=100)[source]#

Dispatch transfer job to specified gateways.

Parameters:
  • dataplane (Dataplane) – dataplane that starts the transfer job

  • transfer_config (TransferConfig) – the configuration during the transfer

  • dispatch_batch_size (int) – maximum size of the buffer to temporarily store the generators (default: 1000)

Return type:

Generator[ChunkRequest, None, None]

property dst_iface: ObjectStoreInterface#

Return the destination object store interface

property dst_prefix: str | None#

Return the destination prefix

finalize()[source]#

Complete the multipart upload requests

gen_transfer_pairs(chunker=None)[source]#

Generate transfer pairs for the transfer job.

Parameters:

chunker (Chunker) – chunker that makes the chunk requests

Return type:

Generator[Tuple[ObjectStoreObject, ObjectStoreObject], None, None]

property http_pool#

http connection pool

size_gb()#

Return the size of the transfer in GB

property src_iface: ObjectStoreInterface#

Return the source object store interface

property src_prefix: str | None#

Return the source prefix

verify()[source]#

Verify the integrity of the transfered destination objects

class skyplane.api.transfer_job.SyncJob(src_path, dst_path, recursive=False, requester_pays=False, transfer_list=<factory>, multipart_transfer_list=<factory>)[source]#

Bases: CopyJob

sync job that copies the source objects that does not exist in the destination bucket to the destination

dispatch(dataplane, transfer_config, dispatch_batch_size=100)#

Dispatch transfer job to specified gateways.

Parameters:
  • dataplane (Dataplane) – dataplane that starts the transfer job

  • transfer_config (TransferConfig) – the configuration during the transfer

  • dispatch_batch_size (int) – maximum size of the buffer to temporarily store the generators (default: 1000)

Return type:

Generator[ChunkRequest, None, None]

property dst_iface: ObjectStoreInterface#

Return the destination object store interface

property dst_prefix: str | None#

Return the destination prefix

finalize()#

Complete the multipart upload requests

gen_transfer_pairs(chunker=None)[source]#

Generate transfer pairs for the transfer job.

Parameters:

chunker (Chunker) – chunker that makes the chunk requests

Return type:

Generator[Tuple[ObjectStoreObject, ObjectStoreObject], None, None]

property http_pool#

http connection pool

size_gb()#

Return the size of the transfer in GB

property src_iface: ObjectStoreInterface#

Return the source object store interface

property src_prefix: str | None#

Return the source prefix

verify()#

Verify the integrity of the transfered destination objects

class skyplane.api.transfer_job.TransferJob(src_path, dst_path, recursive=False, requester_pays=False)[source]#

Bases: ABC

transfer job with transfer configurations

Parameters:
  • src_path (str) – source full path

  • dst_path (str) – destination full path

  • recursive (bool) – if true, will transfer objects at folder prefix recursively (default: False)

  • requester_pays (bool) – if set, will support requester pays buckets. (default: False)

  • uuid (str) – the uuid of one single transfer job

dispatch(dataplane, **kwargs)[source]#

Dispatch transfer job to specified gateways.

Return type:

Generator[ChunkRequest, None, None]

property dst_iface: ObjectStoreInterface#

Return the destination object store interface

property dst_prefix: str | None#

Return the destination prefix

finalize()[source]#

Complete the multipart upload requests

size_gb()[source]#

Return the size of the transfer in GB

property src_iface: ObjectStoreInterface#

Return the source object store interface

property src_prefix: str | None#

Return the source prefix

verify()[source]#

Verifies the transfer completed, otherwise raises TransferFailedException.