From 5fd5c41ebd1780dc895c24d9ec978f63dc848c6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi?= <remi.cresson@inrae.fr> Date: Tue, 10 Sep 2024 22:00:50 +0200 Subject: [PATCH] add: push file --- dinamis_sdk/__init__.py | 4 +++- dinamis_sdk/s3.py | 46 +++++++++++++++++++++++++++++++++-------- dinamis_sdk/upload.py | 34 ++++++++++++++++++++++++++++++ tests/test_push.py | 13 ++++++++++++ 4 files changed, 87 insertions(+), 10 deletions(-) create mode 100644 dinamis_sdk/upload.py create mode 100644 tests/test_push.py diff --git a/dinamis_sdk/__init__.py b/dinamis_sdk/__init__.py index dfa1b29..a20433c 100644 --- a/dinamis_sdk/__init__.py +++ b/dinamis_sdk/__init__.py @@ -7,6 +7,8 @@ from dinamis_sdk.s3 import ( sign_urls, sign_item, sign_asset, - sign_item_collection + sign_item_collection, + sign_url_put ) # noqa from dinamis_sdk import auth # noqa +from dinamis_sdk.upload import push diff --git a/dinamis_sdk/s3.py b/dinamis_sdk/s3.py index f544a13..b445a4b 100644 --- a/dinamis_sdk/s3.py +++ b/dinamis_sdk/s3.py @@ -28,11 +28,11 @@ import pydantic from .utils import ( log, - SIGNED_URL_TTL_MARGIN, - CREDENTIALS, + SIGNED_URL_TTL_MARGIN, + CREDENTIALS, MAX_URLS, - S3_SIGNING_ENDPOINT, - S3_STORAGE_DOMAIN, + S3_SIGNING_ENDPOINT, + S3_STORAGE_DOMAIN, SIGNED_URL_DURATION_SECONDS, BYPASS_API ) @@ -154,7 +154,10 @@ def sign_string(url: str, copy: bool = True) -> str: return sign_urls(urls=[url])[url] -def sign_urls(urls: List[str]) -> Dict[str, str]: +def _generic_sign_urls( + urls: List[str], + route: str +) -> Dict[str, str]: """Sign URLs with a S3 Token. Signing URL allows read access to files in storage. @@ -165,6 +168,7 @@ def sign_urls(urls: List[str]) -> Dict[str, str]: Single URLs can be found on a STAC Item's Asset ``href`` value. Only URLs to assets in S3 Storage are signed, other URLs are returned unmodified. + route: API route Returns: dict of signed HREF: key = original URL, value = signed URL @@ -193,18 +197,38 @@ def sign_urls(urls: List[str]) -> Dict[str, str]: not_signed_urls = [url for url in urls if url not in signed_urls] signed_urls.update({ url: signed_url.href - for url, signed_url in get_signed_urls(not_signed_urls).items() + for url, signed_url in _generic_get_signed_urls( + urls=not_signed_urls, + route=route + ).items() }) return signed_urls +def sign_urls(urls: List[str]) -> Dict[str, str]: + return _generic_sign_urls(urls=urls, route="sign_urls") + + +def sign_urls_put(urls: List[str]) -> Dict[str, str]: + return _generic_sign_urls(urls=urls, route="sign_urls_put") + + +def sign_url_put(url: str) -> str: + """ + Sign a single URL for put + """ + urls = sign_urls_put([url]) + return urls[url] + + def _repl_vrt(match: re.Match) -> str: # replace all blob-storages URLs with a signed version. url = match.string[slice(*match.span())] return sign_urls(url)[url] -def sign_vrt_string(vrt: str, copy: bool = True) -> str: # pylint: disable = W0613 # noqa: E501 +def sign_vrt_string(vrt: str, + copy: bool = True) -> str: # pylint: disable = W0613 # noqa: E501 """Sign a VRT-like string containing URLs from the storage. Signing URLs allows read access to files in storage. @@ -413,8 +437,9 @@ def sign_mapping(mapping: Mapping, copy: bool = True) -> Mapping: sign_reference_file = sign_mapping -def get_signed_urls( +def _generic_get_signed_urls( urls: List[str], + route: str, retry_total: int = 10, retry_backoff_factor: float = .8 ) -> Dict[str, SignedURL]: @@ -426,6 +451,7 @@ def get_signed_urls( Args: urls: urls + route: route (API) retry_total (int): The number of allowable retry attempts for REST API calls. Use retry_total=0 to disable retries. A backoff factor to apply between attempts. @@ -501,8 +527,10 @@ def get_signed_urls( params = {"urls": not_signed_urls_chunk} if SIGNED_URL_DURATION_SECONDS: params["duration_seconds"] = SIGNED_URL_DURATION_SECONDS + post_url = f"{S3_SIGNING_ENDPOINT}{route}" + log.debug("POST %s", post_url) response = session.post( - f"{S3_SIGNING_ENDPOINT}sign_urls", + post_url, params=params, headers=headers, timeout=10 diff --git a/dinamis_sdk/upload.py b/dinamis_sdk/upload.py new file mode 100644 index 0000000..df2492d --- /dev/null +++ b/dinamis_sdk/upload.py @@ -0,0 +1,34 @@ +import requests +import urllib3.util.retry +from .s3 import sign_url_put + +def push( + local_filename: str, + target_url: str, + retry_total: int = 5, + retry_backoff_factor: float = .8 +): + """ + Publish a local file to the cloud + + """ + remote_presigned_url = sign_url_put(target_url) + + session = requests.Session() + retry = urllib3.util.retry.Retry( + total=retry_total, + backoff_factor=retry_backoff_factor, + status_forcelist=[404, 429, 500, 502, 503, 504], + allowed_methods=False, + ) + adapter = requests.adapters.HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + + with open(local_filename, 'rb') as f: + ret = session.put(remote_presigned_url, data=f) + + if ret.status_code == 200: + return remote_presigned_url + + ret.raise_for_status() diff --git a/tests/test_push.py b/tests/test_push.py new file mode 100644 index 0000000..d7623ea --- /dev/null +++ b/tests/test_push.py @@ -0,0 +1,13 @@ +import dinamis_sdk +import time + +local_filename = "/tmp/toto.txt" + +with open(local_filename, "w") as f: + f.write("hello world") + +pushed = dinamis_sdk.push( + local_filename=local_filename, + target_url="https://s3-data.meso.umontpellier.fr/sm1-gdc-tests/titi.txt" +) +print("Done") -- GitLab