From ccb4fe2a073604079fabf8cd1145144d394f1b67 Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 8 Nov 2023 17:07:20 +0100 Subject: [PATCH] DOC: add sumarize() documentation --- doc/index.md | 1 + doc/summarize.md | 167 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 doc/summarize.md diff --git a/doc/index.md b/doc/index.md index 7861878..d76ef33 100644 --- a/doc/index.md +++ b/doc/index.md @@ -22,6 +22,7 @@ to make OTB more Python friendly. ## Advanced use - [Comparison between pyotb and OTB native library](comparison_otb.md) +- [Summarize applications](summarize.md) - [OTB versions](otb_versions.md) - [Managing loggers](managing_loggers.md) - [Troubleshooting & limitations](troubleshooting.md) diff --git a/doc/summarize.md b/doc/summarize.md new file mode 100644 index 0000000..13f3021 --- /dev/null +++ b/doc/summarize.md @@ -0,0 +1,167 @@ +## Summarize applications + +pyotb enables to summarize applications as a dictionary with keys/values for +parameters. This feature can be used to keep track of a process, composed of +multiple applications chained together. + +### Single application + +Let's take the example of one single application. + +```python +import pyotb + +app = pyotb.RigidTransformResample({ + 'in': 'my_image.tif', + 'interpolator': 'linear', + 'transform.type.id.scaley': 0.5, + 'transform.type.id.scalex': 0.5 +}) +``` + +The application can be summarized using `pyotb.summarize()` or +`app.summary()`, which are equivalent. + +```python +print(app.summarize()) +``` + +Results in the following (lines have been pretty printed for the sake of +documentation): + +```json lines +{ + 'name': 'RigidTransformResample', + 'parameters': { + 'transform.type': 'id', + 'in': 'my_image.tif', + 'interpolator': 'linear', + 'transform.type.id.scaley': 0.5, + 'transform.type.id.scalex': 0.5 + } +} +``` + +Note that we can also summarize an application after it has been executed: + +```python +app.write('output.tif', pixel_type='uint16') +print(app.summarize()) +``` + +Which results in the following: + +```json lines +{ + 'name': 'RigidTransformResample', + 'parameters': { + 'transform.type': 'id', + 'in': 'my_image.tif', + 'interpolator': 'linear', + 'transform.type.id.scaley': 0.5, + 'transform.type.id.scalex': 0.5, + 'out': 'output.tif' + } +} +``` + +Now `'output.tif'` has been added to the application parameters. + +### Multiple applications chained together (pipeline) + +When multiple applications are chained together, the summary of the last +application will describe all upstream processes. + +```python +import pyotb + +app1 = pyotb.RigidTransformResample({ + 'in': 'my_image.tif', + 'interpolator': 'linear', + 'transform.type.id.scaley': 0.5, + 'transform.type.id.scalex': 0.5 +}) +app2 = pyotb.Smoothing(app1) +print(app2.summarize()) +``` + +Results in: + +```json lines +{ + 'name': 'Smoothing', + 'parameters': { + 'type': 'anidif', + 'type.anidif.timestep': 0.125, + 'type.anidif.nbiter': 10, + 'type.anidif.conductance': 1.0, + 'in': { + 'name': 'RigidTransformResample', + 'parameters': { + 'transform.type': 'id', + 'in': 'my_image.tif', + 'interpolator': 'linear', + 'transform.type.id.scaley': 0.5, + 'transform.type.id.scalex': 0.5 + } + } + } +} +``` + +### Remote files URL stripping + +Cloud-based raster URLs often include tokens or random strings resulting from +the URL signing. +Those can be removed from the summarized paths, using the `strip_inpath` +and/or `strip_outpath` arguments respectively for inputs and/or outputs. + +Here is an example with Microsoft Planetary Computer: + +```python +import planetary_computer +import pyotb + +url = ( + "https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/31/N/EA/2023/" + "11/03/S2A_MSIL2A_20231103T095151_N0509_R079_T31NEA_20231103T161409.SAFE/" + "GRANULE/L2A_T31NEA_A043691_20231103T100626/IMG_DATA/R10m/T31NEA_20231103" + "T095151_B02_10m.tif" +) +signed_url = planetary_computer.sign_inplace(url) +app = pyotb.Smoothing(signed_url) +``` + +By default, the summary does not strip the URL. + +```python +print(app.summarize()["parameters"]["in"]) +``` + +This results in: + +``` +/vsicurl/https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/31/N/EA/... +2023/11/03/S2A_MSIL2A_20231103T095151_N0509_R079_T31NEA_20231103T161409.SAFE... +/GRANULE/L2A_T31NEA_A043691_20231103T100626/IMG_DATA/R10m/T31NEA_20231103T... +095151_B02_10m.tif?st=2023-11-07T15%3A52%3A47Z&se=2023-11-08T16%3A37%3A47Z&... +sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=... +72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-11-08T11%3A41%3A41Z&ske=2023-... +11-15T11%3A41%3A41Z&sks=b&skv=2021-06-08&sig=xxxxxxxxxxx...xxxxx +``` + +Now we can strip the URL to keep only the resource identifier and get rid of +the token: + +```python +print(app.summarize(strip_inpath=True)["parameters"]["in"]) +``` + +Which now results in: + +``` +/vsicurl/https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/31/N/EA/... +2023/11/03/S2A_MSIL2A_20231103T095151_N0509_R079_T31NEA_20231103T161409.SAFE... +/GRANULE/L2A_T31NEA_A043691_20231103T100626/IMG_DATA/R10m/T31NEA_20231103T... +095151_B02_10m.tif +``` \ No newline at end of file -- GitLab