From fecc47d5b15b6a933c3cf19dcadd26ca0714dcea Mon Sep 17 00:00:00 2001 From: Daniel N <2color@users.noreply.github.com> Date: Fri, 23 Jan 2026 14:42:08 +0100 Subject: [PATCH 1/8] chore: move kubo specific guides into kubo categorty --- docs/.vuepress/config.js | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js index 539ae1a15..0236a3e84 100644 --- a/docs/.vuepress/config.js +++ b/docs/.vuepress/config.js @@ -215,6 +215,8 @@ module.exports = { '/how-to/troubleshooting-kubo', '/how-to/webtransport', '/install/run-ipfs-inside-docker', + '/how-to/observe-peers', + '/how-to/peering-with-content-providers' ] }, { @@ -242,15 +244,6 @@ module.exports = { '/how-to/move-ipfs-installation/move-ipfs-installation', ] }, - { - title: 'Work with peers', - sidebarDepth: 1, - collapsable: true, - children: [ - '/how-to/observe-peers', - '/how-to/peering-with-content-providers' - ] - }, { title: 'Websites on IPFS', sidebarDepth: 1, From 0aac21db9f7ec3931d95d315a0778a93c5dfe946 Mon Sep 17 00:00:00 2001 From: Daniel N <2color@users.noreply.github.com> Date: Fri, 23 Jan 2026 16:15:38 +0100 Subject: [PATCH 2/8] initial draft of geospatial guide --- docs/.vuepress/config.js | 8 + docs/how-to/publish-geospatial-data.md | 216 +++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 docs/how-to/publish-geospatial-data.md diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js index 0236a3e84..6f5b7350a 100644 --- a/docs/.vuepress/config.js +++ b/docs/.vuepress/config.js @@ -219,6 +219,14 @@ module.exports = { '/how-to/peering-with-content-providers' ] }, + { + title: 'Publish Scientific Data', + sidebarDepth: 1, + collapsable: true, + children: [ + '/how-to/publish-geospatial-data', + ] + }, { title: 'Troubleshooting', sidebarDepth: 1, diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/publish-geospatial-data.md new file mode 100644 index 000000000..229e47074 --- /dev/null +++ b/docs/how-to/publish-geospatial-data.md @@ -0,0 +1,216 @@ +--- +title: Publish Geospatial Data with IPFS +description: +--- + +# Publish Geospatial Data with IPFS + +In this guide, you will learn how to publish public geospatial data sets using IPFS, with a focus on the [Zarr](https://zarr.dev/) format. You'll learn how to leverage decentralized distribution with IPFS for better collaboration, data integrity, and open access. + +Note that while this guide focuses on Zarr, it's applicable to other data sets. + +By the end of this guide, you will publish a Zarr dataset to the IPFS network in a way that is retrievable directly within [Xarray](https://xarray.dev/) + +If you are interested in a real-world example following the patterns in this guide, check out the [The ORCESTRA campaign](https://orcestra-campaign.org/intro.html). + +- [Why IPFS for Geospatial Data?](#why-ipfs-for-geospatial-data) +- [Prerequisites](#prerequisites) +- [Step 1: Prepare Your Zarr Data Set](#step-1-prepare-your-zarr-data-set) +- [Step 2: Add Your Data Set to IPFS](#step-2-add-your-data-set-to-ipfs) + - [Step 3: Organizing Your Data](#step-3-organizing-your-data) +- [Step 4: Verify Providing Status](#step-4-verify-providing-status) +- [Step 5: Content Discovery](#step-5-content-discovery) + - [Option A: Share the CID Directly](#option-a-share-the-cid-directly) + - [Option B: Use IPNS for Updatable References](#option-b-use-ipns-for-updatable-references) + - [Option C: Use DNSLink for Human-Readable URLs](#option-c-use-dnslink-for-human-readable-urls) +- [Accessing Published Data](#accessing-published-data) +- [Choosing Your Approach](#choosing-your-approach) +- [Reference](#reference) + +## Why IPFS for Geospatial Data? + +Geospatial data sets such as weather observations, satellite imagery, and sensor readings, are typically stored as multidimensional arrays, also commonly known as tensors. + +As these data sets grow larger and more distributed, traditional formats like NetCDF and HDF5 show their limitations: metadata interleaved with data requires large sequential reads before you can access the data you need. + +**[Zarr](https://zarr.dev/)** is a modern format that addresses these limitations and is optimized for networked and distributed storage characterised by high throughput with high latency. Zarr complements the popular [Xarray](https://xarray.dev/) which provides the data structures and operations for analyzing the data sets. + +Some of the key properties of Zarr include: + +- **Separated metadata**: A data catalogue/index lets you understand data set structure before fetching any data, +- **Chunked by default**: Arrays split into small chunks let you download only the subset you need. +- **Consolidated metadata**: All metadata in a single `zarr.json` file speeds reads for multi-array data sets. + +> **Note:** For a more elaborate explanation on the underlying principles and motivation for Zarr, check out [this blog post](https://tom-nicholas.com/blog/2025/cloud-optimized-scientific-data/), by one of the Zarr contributors. + +**IPFS** complements Zarr with decentralized distribution: + +- **Content addressing**: Data is identified by what it contains using CIDs, not where it's stored +- **Built-in integrity**: Cryptographic hashes verify data hasn't been corrupted or tampered with +- **Participatory sharing**: Anyone can help distribute data sets they've downloaded +- **Open access**: No vendor lock-in or centralized infrastructure required + +This combination has proven effective in real-world campaigns like [Orcestra](https://orcestra-campaign.org/orcestra.html), where scientists collaborated with limited internet connectivity in the field while sharing data globally. + +## Prerequisites + +Before starting, ensure you have: + +- A Zarr data set for +- [Kubo](/install/command-line/) or [IPFS Desktop](/install/ipfs-desktop/) installed on a machine with a public IP +- Basic familiarity with the command line + +## Step 1: Prepare Your Zarr Data Set + +When preparing your Zarr data set for IPFS, aim for approximately 1 MiB chunks. This aligns well with IPFS's chunking strategy and provides a good balance between granularity and overhead. Note that this is not strictly required. + +To calculate chunk dimensions for a target byte size, work backwards from your datatype: + +```python +import xarray as xr + +ds = xr.open_dataset(filename) +# Example: targeting ~1 MB chunks with float32 data +ds.to_zarr('output.zarr', encoding={ + 'var_name': {'chunks': (1, 512, 512)} +}) + +# Total size: 1 × 512 × 512 × 4 bytes (float32) = ~1 MB per chunk +``` + +## Step 2: Add Your Data Set to IPFS + +Add your Zarr folder to IPFS using the `ipfs add` command: + +```bash +ipfs add --recursive \ + --hidden \ + --raw-leaves \ + --chunker=size-1048576 \ + --cid-version=1 \ + --pin-name="halo-measurements-2026-01-23" \ + --quieter \ + ./my-dataset.zarr +``` + +This command: + +1. **Merkleizes** the folder: converts files and directories into content-addressed blocks with UnixFS +2. **Pins** the data locally: prevents garbage collection from removing it +3. **Queues providing**: announces to the IPFS network that your node has this data +4. **Outputs the root CID**: the identifier for your entire dataset + +The `--quieter` flag outputs only the root CID, which identifies the complete dataset. + +> **Note:** + +### Step 3: Organizing Your Data + +Two options help manage multiple data sets on your node: + +**Named pins** (`--pin-name`): Label data sets for easy identification in `ipfs pin ls`. + +**MFS (Mutable File System)**: Create a human-readable directory structure for your CIDs: + +```bash +ipfs add ... --to-files=/datasets/halo-measurements-2026-01-23 +``` + +MFS gives you a familiar filesystem interface to organize content-addressed data. + +## Step 4: Verify Providing Status + +After adding, Kubo continuously announces your content to the network. Check the status: + +```bash +ipfs stats provide +``` + +For detailed diagnostics, see the [provide system documentation](https://github.com/ipfs/kubo/blob/master/docs/provide-stats.md). + +## Step 5: Content Discovery + +Users need a way to discover your datasets. Choose an approach based on your needs: + +### Option A: Share the CID Directly + +For one-off sharing, provide the CID directly: + +``` +ipfs://bafybeif52irmuurpb27cujwpqhtbg5w6maw4d7zppg2lqgpew25gs5eczm +``` + +### Option B: Use IPNS for Updatable References + +IPNS provides a stable identifier that you can update when datasets change: + +```bash +# Publish your dataset under your node's IPNS key +ipfs name publish /ipfs/ + +# Update to a new version later +ipfs name publish /ipfs/ +``` + +Users can subscribe to your IPNS name to always get the latest version. + +### Option C: Use DNSLink for Human-Readable URLs + +Link a DNS name to your content by adding a TXT record: + +``` +_dnslink.data.example.org TXT "dnslink=/ipfs/" +``` + +Users can then access your data at: + +``` +https://data.example.org.ipfs.dweb.link/ +``` + +## Accessing Published Data + +Once published, users can access your Zarr datasets through multiple methods: + +**IPFS HTTP Gateways**: + +See the [retrieval guide](../quickstart/retrieve.md) + +**Python with ipfsspec**: + +```python +import xarray as xr + +ds = xr.open_dataset( + "ipfs://bafybeif52irmuurpb27cujwpqhtbg5w6maw4d7zppg2lqgpew25gs5eczm", + engine="zarr" +) +``` + +**JavaScript with Verified Fetch**: + +```javascript +import { verifiedFetch } from '@helia/verified-fetch' + +const response = await verifiedFetch('ipfs:///zarr.json') +``` + +## Choosing Your Approach + +Consider these factors when planning your publishing strategy: + +| Factor | Considerations | +| ------------------- | -------------------------------------------- | +| **Publishers** | Single node or multiple providers? | +| **Dataset size** | How large are individual datasets? | +| **Growth rate** | How frequently do you add new data? | +| **Content routing** | Public DHT, private DHT, or central indexer? | + +For most Geospatial use cases, start with a single Kubo node publishing to the public Amino DHT. Scale to multiple providers or private infrastructure as your needs grow. + +## Reference + +- [Kubo documentation](https://docs.ipfs.tech/install/command-line/) +- [Kubo configuration options](https://github.com/ipfs/kubo/blob/master/docs/config.md) +- [ipfsspec for Python](https://github.com/fsspec/ipfsspec/) +- [Cloud-Optimized Geospatial Data (Zarr deep-dive)](https://tom-nicholas.com/blog/2025/cloud-optimized-Geospatial-data/) From 12eab5398614a568d553f74817297a55da09448d Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 14:34:59 +0100 Subject: [PATCH 3/8] Apply suggestion from @vmx Co-authored-by: Volker Mische --- docs/how-to/publish-geospatial-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/publish-geospatial-data.md index 229e47074..7f101bb51 100644 --- a/docs/how-to/publish-geospatial-data.md +++ b/docs/how-to/publish-geospatial-data.md @@ -75,7 +75,7 @@ ds.to_zarr('output.zarr', encoding={ 'var_name': {'chunks': (1, 512, 512)} }) -# Total size: 1 × 512 × 512 × 4 bytes (float32) = ~1 MB per chunk +# Total size: 1 × 512 × 512 × 4 bytes (float32) = 1048576 bytes = 1 MiB per chunk ``` ## Step 2: Add Your Data Set to IPFS From fd6337f69750f9575fbae3ecccfea344603d3544 Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 14:42:10 +0100 Subject: [PATCH 4/8] Apply suggestion from @mishmosh Co-authored-by: Mosh <1306020+mishmosh@users.noreply.github.com> --- docs/how-to/publish-geospatial-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/publish-geospatial-data.md index 7f101bb51..f7f925eea 100644 --- a/docs/how-to/publish-geospatial-data.md +++ b/docs/how-to/publish-geospatial-data.md @@ -130,7 +130,7 @@ For detailed diagnostics, see the [provide system documentation](https://github. ## Step 5: Content Discovery -Users need a way to discover your datasets. Choose an approach based on your needs: +Now that your data is available on the public network, the next step is making it discoverable to others. Choose a sharing approach based on your needs: ### Option A: Share the CID Directly From 45d15d2de8a97ca4ee4d9e28eb44f84d32999eb4 Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 14:42:24 +0100 Subject: [PATCH 5/8] Apply suggestion from @mishmosh Co-authored-by: Mosh <1306020+mishmosh@users.noreply.github.com> --- docs/how-to/publish-geospatial-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/publish-geospatial-data.md index f7f925eea..b2ec41414 100644 --- a/docs/how-to/publish-geospatial-data.md +++ b/docs/how-to/publish-geospatial-data.md @@ -142,7 +142,7 @@ ipfs://bafybeif52irmuurpb27cujwpqhtbg5w6maw4d7zppg2lqgpew25gs5eczm ### Option B: Use IPNS for Updatable References -IPNS provides a stable identifier that you can update when datasets change: +If you want to share a stable identifier but be able to update the underlying dataset, create an [IPNS](https://docs.ipfs.tech/concepts/ipns/) identifier and share that instead. This is useful for datasets that get updated regularly —users can bookmark your IPNS name and always retrieve the latest version. ```bash # Publish your dataset under your node's IPNS key From 3e7510945f21034542e1605d990c4e26e5fa371a Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 15:22:17 +0100 Subject: [PATCH 6/8] address feedback and refine guide --- docs/how-to/publish-geospatial-data.md | 40 ++++++++++++++++++-------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/publish-geospatial-data.md index b2ec41414..60246c5a3 100644 --- a/docs/how-to/publish-geospatial-data.md +++ b/docs/how-to/publish-geospatial-data.md @@ -56,13 +56,17 @@ This combination has proven effective in real-world campaigns like [Orcestra](ht Before starting, ensure you have: -- A Zarr data set for -- [Kubo](/install/command-line/) or [IPFS Desktop](/install/ipfs-desktop/) installed on a machine with a public IP +- A Zarr data set ready for publishing - Basic familiarity with the command line +- [Kubo](/install/command-line/) or [IPFS Desktop](/install/ipfs-desktop/) installed on a machine. + +:::callout +See the [NAT and port forwarding guide](../how-to/nat-configuration.md) for more information on how to configure port forwarding so that your IPFS node is publicly reachable, thus allowing reliable retrievability of data by other nodes. +::: ## Step 1: Prepare Your Zarr Data Set -When preparing your Zarr data set for IPFS, aim for approximately 1 MiB chunks. This aligns well with IPFS's chunking strategy and provides a good balance between granularity and overhead. Note that this is not strictly required. +When preparing your Zarr data set for IPFS, aim for approximately 1 MiB chunks to align with IPFS's 1 MiB maximum block size. While this is not a strict requirement, using larger Zarr chunks will cause IPFS to split them into multiple blocks, potentially increasing retrieval latency. Chunking in Zarr is a nuanced topic beyond the scope of this guide. To calculate chunk dimensions for a target byte size, work backwards from your datatype: @@ -78,6 +82,15 @@ ds.to_zarr('output.zarr', encoding={ # Total size: 1 × 512 × 512 × 4 bytes (float32) = 1048576 bytes = 1 MiB per chunk ``` +:::callout +Chunking in Zarr is a nuanced topic beyond the scope of this guide. For more information on optimizing chunk sizes, see: + +- [Zarr performance guide](https://zarr.readthedocs.io/en/stable/user-guide/performance/) +- [Chunks and chunkability](https://element84.com/software-engineering/chunks-and-chunkability-tyranny-of-the-chunk/) +- [Zarr chunking introduction](https://eopf-toolkit.github.io/eopf-101/03_about_chunking/31_zarr_chunking_intro.html) +- [Cloud optimization practices](https://esipfed.github.io/cloud-computing-cluster/optimization-practices.html) +::: + ## Step 2: Add Your Data Set to IPFS Add your Zarr folder to IPFS using the `ipfs add` command: @@ -96,13 +109,13 @@ ipfs add --recursive \ This command: 1. **Merkleizes** the folder: converts files and directories into content-addressed blocks with UnixFS -2. **Pins** the data locally: prevents garbage collection from removing it -3. **Queues providing**: announces to the IPFS network that your node has this data -4. **Outputs the root CID**: the identifier for your entire dataset +1. **Pins** the data locally: prevents garbage collection from removing it +1. **Starts providing**: to the IPFS network that your IPFS node has this data +1. **Outputs the root CID**: the identifier for your entire dataset The `--quieter` flag outputs only the root CID, which identifies the complete dataset. -> **Note:** +> **Note:** Check out the [lifecycle of data in IPFS](../../concepts/lifecycle.md), to learn more about how how merkleizing, pinning, and providing work under the hood. ### Step 3: Organizing Your Data @@ -142,7 +155,7 @@ ipfs://bafybeif52irmuurpb27cujwpqhtbg5w6maw4d7zppg2lqgpew25gs5eczm ### Option B: Use IPNS for Updatable References -If you want to share a stable identifier but be able to update the underlying dataset, create an [IPNS](https://docs.ipfs.tech/concepts/ipns/) identifier and share that instead. This is useful for datasets that get updated regularly —users can bookmark your IPNS name and always retrieve the latest version. +If you want to share a stable identifier but be able to update the underlying dataset, create an [IPNS](https://docs.ipfs.tech/concepts/ipns/) identifier and share that instead. This is useful for datasets that get updated regularly — users can bookmark your IPNS name and always retrieve the latest version. ```bash # Publish your dataset under your node's IPNS key @@ -152,7 +165,7 @@ ipfs name publish /ipfs/ ipfs name publish /ipfs/ ``` -Users can subscribe to your IPNS name to always get the latest version. +IPNS is supported by all the retrieval methods in the [Accessing Published Data](#accessing-published-data) section below. Keep in mind that IPNS name resolution adds latency to the retrieval process. ### Option C: Use DNSLink for Human-Readable URLs @@ -172,17 +185,20 @@ https://data.example.org.ipfs.dweb.link/ Once published, users can access your Zarr datasets through multiple methods: -**IPFS HTTP Gateways**: +### IPFS HTTP Gateways See the [retrieval guide](../quickstart/retrieve.md) -**Python with ipfsspec**: +### Python with ipfsspec + +[ipfsspec](https://pypi.org/project/ipfsspec/) brings verified IPFS retrieval to the Python ecosystem by implementing the [fsspec](https://github.com/fsspec/filesystem_spec) interface, the same abstraction layer used by xarray, pandas, Dask, and Zarr for remote data access. ```python import xarray as xr +# after the installation of ipfsspec, `ipfs://` urls are automatically recognized ds = xr.open_dataset( - "ipfs://bafybeif52irmuurpb27cujwpqhtbg5w6maw4d7zppg2lqgpew25gs5eczm", + "ipfs://bafybeiesyutuduzqwvu4ydn7ktihjljicywxeth6wtgd5zi4ynxzqngx4m", engine="zarr" ) ``` From 8811dd56c2085dad855d2d89519798211c4ce9ec Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 15:35:58 +0100 Subject: [PATCH 7/8] reorg into folder --- docs/.vuepress/config.js | 4 +-- .../publish-geospatial-zarr-data.md} | 26 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) rename docs/how-to/{publish-geospatial-data.md => scientific-data/publish-geospatial-zarr-data.md} (89%) diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js index 6f5b7350a..294c057cb 100644 --- a/docs/.vuepress/config.js +++ b/docs/.vuepress/config.js @@ -220,11 +220,11 @@ module.exports = { ] }, { - title: 'Publish Scientific Data', + title: 'Scientific Data', sidebarDepth: 1, collapsable: true, children: [ - '/how-to/publish-geospatial-data', + '/how-to/scientific-data/publish-geospatial-zarr-data', ] }, { diff --git a/docs/how-to/publish-geospatial-data.md b/docs/how-to/scientific-data/publish-geospatial-zarr-data.md similarity index 89% rename from docs/how-to/publish-geospatial-data.md rename to docs/how-to/scientific-data/publish-geospatial-zarr-data.md index 60246c5a3..629a2f6a4 100644 --- a/docs/how-to/publish-geospatial-data.md +++ b/docs/how-to/scientific-data/publish-geospatial-zarr-data.md @@ -1,15 +1,15 @@ --- -title: Publish Geospatial Data with IPFS -description: +title: Publish Geospatial Zarr Data with IPFS +description: Learn how to publish geospatial datasets using IPFS and Zarr for decentralized distribution, data integrity, and open access. --- -# Publish Geospatial Data with IPFS +# Publish Geospatial Zarr Data with IPFS In this guide, you will learn how to publish public geospatial data sets using IPFS, with a focus on the [Zarr](https://zarr.dev/) format. You'll learn how to leverage decentralized distribution with IPFS for better collaboration, data integrity, and open access. Note that while this guide focuses on Zarr, it's applicable to other data sets. -By the end of this guide, you will publish a Zarr dataset to the IPFS network in a way that is retrievable directly within [Xarray](https://xarray.dev/) +By the end of this guide, you will publish a Zarr dataset to the IPFS network in a way that is retrievable directly within [Xarray](https://xarray.dev/). If you are interested in a real-world example following the patterns in this guide, check out the [The ORCESTRA campaign](https://orcestra-campaign.org/intro.html). @@ -17,7 +17,7 @@ If you are interested in a real-world example following the patterns in this gui - [Prerequisites](#prerequisites) - [Step 1: Prepare Your Zarr Data Set](#step-1-prepare-your-zarr-data-set) - [Step 2: Add Your Data Set to IPFS](#step-2-add-your-data-set-to-ipfs) - - [Step 3: Organizing Your Data](#step-3-organizing-your-data) +- [Step 3: Organizing Your Data](#step-3-organizing-your-data) - [Step 4: Verify Providing Status](#step-4-verify-providing-status) - [Step 5: Content Discovery](#step-5-content-discovery) - [Option A: Share the CID Directly](#option-a-share-the-cid-directly) @@ -61,12 +61,12 @@ Before starting, ensure you have: - [Kubo](/install/command-line/) or [IPFS Desktop](/install/ipfs-desktop/) installed on a machine. :::callout -See the [NAT and port forwarding guide](../how-to/nat-configuration.md) for more information on how to configure port forwarding so that your IPFS node is publicly reachable, thus allowing reliable retrievability of data by other nodes. +See the [NAT and port forwarding guide](../nat-configuration.md) for more information on how to configure port forwarding so that your IPFS node is publicly reachable, thus allowing reliable retrievability of data by other nodes. ::: ## Step 1: Prepare Your Zarr Data Set -When preparing your Zarr data set for IPFS, aim for approximately 1 MiB chunks to align with IPFS's 1 MiB maximum block size. While this is not a strict requirement, using larger Zarr chunks will cause IPFS to split them into multiple blocks, potentially increasing retrieval latency. Chunking in Zarr is a nuanced topic beyond the scope of this guide. +When preparing your Zarr data set for IPFS, aim for approximately 1 MiB chunks to align with IPFS's 1 MiB maximum block size. While this is not a strict requirement, using larger Zarr chunks will cause IPFS to split them into multiple blocks, potentially increasing retrieval latency. To calculate chunk dimensions for a target byte size, work backwards from your datatype: @@ -110,14 +110,14 @@ This command: 1. **Merkleizes** the folder: converts files and directories into content-addressed blocks with UnixFS 1. **Pins** the data locally: prevents garbage collection from removing it -1. **Starts providing**: to the IPFS network that your IPFS node has this data +1. **Provides** to the IPFS network that your node has this data 1. **Outputs the root CID**: the identifier for your entire dataset The `--quieter` flag outputs only the root CID, which identifies the complete dataset. -> **Note:** Check out the [lifecycle of data in IPFS](../../concepts/lifecycle.md), to learn more about how how merkleizing, pinning, and providing work under the hood. +> **Note:** Check out the [lifecycle of data in IPFS](../../../concepts/lifecycle.md) to learn more about how merkleizing, pinning, and providing work under the hood. -### Step 3: Organizing Your Data +## Step 3: Organizing Your Data Two options help manage multiple data sets on your node: @@ -187,7 +187,7 @@ Once published, users can access your Zarr datasets through multiple methods: ### IPFS HTTP Gateways -See the [retrieval guide](../quickstart/retrieve.md) +See the [retrieval guide](../../quickstart/retrieve.md). ### Python with ipfsspec @@ -203,7 +203,7 @@ ds = xr.open_dataset( ) ``` -**JavaScript with Verified Fetch**: +### JavaScript with Verified Fetch ```javascript import { verifiedFetch } from '@helia/verified-fetch' @@ -229,4 +229,4 @@ For most Geospatial use cases, start with a single Kubo node publishing to the p - [Kubo documentation](https://docs.ipfs.tech/install/command-line/) - [Kubo configuration options](https://github.com/ipfs/kubo/blob/master/docs/config.md) - [ipfsspec for Python](https://github.com/fsspec/ipfsspec/) -- [Cloud-Optimized Geospatial Data (Zarr deep-dive)](https://tom-nicholas.com/blog/2025/cloud-optimized-Geospatial-data/) +- [Cloud-Optimized Scientific Data (Zarr deep-dive)](https://tom-nicholas.com/blog/2025/cloud-optimized-scientific-data/) From 3be1cd38fbcf49892a88d58af4a7b11ff3291f32 Mon Sep 17 00:00:00 2001 From: Daniel Norman Date: Fri, 6 Feb 2026 15:39:36 +0100 Subject: [PATCH 8/8] chore: add TODO --- .../scientific-data/publish-geospatial-zarr-data.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/how-to/scientific-data/publish-geospatial-zarr-data.md b/docs/how-to/scientific-data/publish-geospatial-zarr-data.md index 629a2f6a4..389b1f718 100644 --- a/docs/how-to/scientific-data/publish-geospatial-zarr-data.md +++ b/docs/how-to/scientific-data/publish-geospatial-zarr-data.md @@ -62,6 +62,7 @@ Before starting, ensure you have: :::callout See the [NAT and port forwarding guide](../nat-configuration.md) for more information on how to configure port forwarding so that your IPFS node is publicly reachable, thus allowing reliable retrievability of data by other nodes. + ::: ## Step 1: Prepare Your Zarr Data Set @@ -89,6 +90,7 @@ Chunking in Zarr is a nuanced topic beyond the scope of this guide. For more inf - [Chunks and chunkability](https://element84.com/software-engineering/chunks-and-chunkability-tyranny-of-the-chunk/) - [Zarr chunking introduction](https://eopf-toolkit.github.io/eopf-101/03_about_chunking/31_zarr_chunking_intro.html) - [Cloud optimization practices](https://esipfed.github.io/cloud-computing-cluster/optimization-practices.html) + ::: ## Step 2: Add Your Data Set to IPFS @@ -175,11 +177,10 @@ Link a DNS name to your content by adding a TXT record: _dnslink.data.example.org TXT "dnslink=/ipfs/" ``` -Users can then access your data at: +Users can then access your data by using the `ipns://` prefix. + +TODO: add example. -``` -https://data.example.org.ipfs.dweb.link/ -``` ## Accessing Published Data