Source code for hats.io.paths
"""Methods for creating partitioned data paths"""
from __future__ import annotations
import re
from pathlib import Path
from urllib.parse import urlencode
from fsspec.implementations.http import HTTPFileSystem
from upath import UPath
from hats.io.file_io.file_pointer import get_upath
from hats.pixel_math.healpix_pixel import INVALID_PIXEL, HealpixPixel
[docs]
PARTITION_ORDER = "Norder"
[docs]
PARTITION_PIXEL = "Npix"
[docs]
MARGIN_ORDER = "margin_Norder"
[docs]
MARGIN_DIR = "margin_Dir"
[docs]
MARGIN_PIXEL = "margin_Npix"
[docs]
HIVE_COLUMNS = [
PARTITION_ORDER,
PARTITION_DIR,
PARTITION_PIXEL,
MARGIN_ORDER,
MARGIN_DIR,
MARGIN_PIXEL,
]
[docs]
DATASET_DIR = "dataset"
[docs]
PARTITION_INFO_FILENAME = "partition_info.csv"
[docs]
DATA_THUMBNAIL_FILENAME = "data_thumbnail.parquet"
[docs]
POINT_MAP_FILENAME = "point_map.fits"
[docs]
SKYMAP_FILENAME = "skymap.fits"
[docs]
def dataset_directory(catalog_base_dir: str | Path | UPath | None) -> UPath:
"""Create path pointer for the dataset directory. This will not create the directory.
The directory name will take the HiPS standard form of::
<catalog_base_dir>/dataset/
Parameters
----------
catalog_base_dir : str | Path | UPath | None
base directory of the catalog (includes catalog name)
Returns
-------
UPath
dataset directory name
"""
return get_upath(catalog_base_dir) / DATASET_DIR
[docs]
def pixel_directory(
catalog_base_dir: str | Path | UPath | None,
pixel_order: int,
pixel_number: int | None = None,
directory_number: int | None = None,
) -> UPath:
"""Create path pointer for a pixel directory. This will not create the directory.
One of pixel_number or directory_number is required. The directory name will
take the HiPS standard form of::
<catalog_base_dir>/dataset/Norder=<pixel_order>/Dir=<directory number>
Where the directory number is calculated using integer division as::
(pixel_number/10000)*10000
Parameters
----------
catalog_base_dir : str | Path | UPath | None
base directory of the catalog (includes catalog name)
pixel_order : int
the healpix order of the pixel
pixel_number : int | None
the number of the healpix pixel at ``pixel_order``
directory_number : int | None
directory number (or inferred from pixel number)
Returns
-------
UPath
directory name
"""
norder = int(pixel_order)
if directory_number is not None:
ndir = directory_number
elif pixel_number is not None:
npix = int(pixel_number)
ndir = int(npix / 10_000) * 10_000
else:
raise ValueError("One of pixel_number or directory_number is required to create pixel directory")
return (
get_upath(catalog_base_dir) / DATASET_DIR / f"{PARTITION_ORDER}={norder}" / f"{PARTITION_DIR}={ndir}"
)
[docs]
def get_healpix_from_path(path: str) -> HealpixPixel:
"""Find the `pixel_order` and `pixel_number` from a string like the
following::
Norder=<pixel_order>/Dir=<directory number>/Npix=<pixel_number>.parquet
NB: This expects the format generated by the `pixel_catalog_file` method
Parameters
----------
path : str
path to parse
Returns
-------
HealpixPixel
Constructed HealpixPixel object representing the pixel in the path.
``INVALID_PIXEL`` if the path doesn't match the expected pattern for any reason.
"""
healpix_path_pattern = re.compile(r".*Norder=(\d*).*Npix=(\d*).*")
match = healpix_path_pattern.match(path)
if not match:
return INVALID_PIXEL
order, pixel = match.groups()
return HealpixPixel(int(order), int(pixel))
[docs]
def dict_to_query_urlparams(query_params: dict | None = None) -> str:
"""Converts a dictionary to a url query parameter string
Parameters
----------
query_params: dict | None
(Default value = None) set of URL query parameters.
Returns
-------
str
query parameter string to append to a url
"""
if not query_params:
return ""
query = {}
for key, value in query_params.items():
if not all([key, value]):
continue
if isinstance(value, list):
value = ",".join(value).replace(" ", "")
query[key] = value
if not query:
return ""
# Build the query string and add the "?" prefix
url_params = "?" + urlencode(query, doseq=True)
return url_params
[docs]
def pixel_catalog_file(
catalog_base_dir: str | Path | UPath | None,
pixel: HealpixPixel,
query_params: dict | None = None,
npix_suffix: str = ".parquet",
) -> UPath:
"""Create path *pointer* for a pixel catalog file. This will not create the directory
or file.
The catalog file name will take the HiPS standard form of::
<catalog_base_dir>/Norder=<pixel_order>/Dir=<directory number>/Npix=<pixel_number>.parquet
Where the directory number is calculated using integer division as::
(pixel_number/10000)*10000
Parameters
----------
catalog_base_dir : str | Path | UPath | None
base directory of the catalog (includes catalog name)
pixel : HealpixPixel
the healpix pixel to create path to
query_params: dict | None
(Default value = None) Params to append to URL. Ex::
{'cols': ['ra', 'dec'], 'fltrs': ['r>=10', 'g<18']}
npix_suffix: str
(Default value = ".parquet") extension for the parquet file (or `/` if a directory)
Returns
-------
UPath
catalog file name
"""
catalog_base_dir = get_upath(catalog_base_dir)
suffix = npix_suffix if npix_suffix not in ["/", "\\"] else ""
url_params = ""
if isinstance(catalog_base_dir.fs, HTTPFileSystem) and query_params:
url_params = dict_to_query_urlparams(query_params)
return (
catalog_base_dir
/ DATASET_DIR
/ f"{PARTITION_ORDER}={pixel.order}"
/ f"{PARTITION_DIR}={pixel.dir}"
/ f"{PARTITION_PIXEL}={pixel.pixel}{suffix}{url_params}"
)
[docs]
def get_partition_info_pointer(catalog_base_dir: str | Path | UPath) -> UPath:
"""Get file pointer to ``partition_info.csv`` metadata file
Parameters
----------
catalog_base_dir: str | Path | UPath
base directory of the catalog (includes catalog name)
Returns
-------
UPath
File Pointer to the catalog's ``partition_info.csv`` file
"""
return get_upath(catalog_base_dir) / PARTITION_INFO_FILENAME
[docs]
def get_data_thumbnail_pointer(catalog_base_dir: str | Path | UPath) -> UPath:
"""Get file pointer to `data_thumbnail` parquet file
Parameters
----------
catalog_base_dir: str | Path | UPath
base directory of the catalog (includes catalog name)
Returns
-------
UPath
File Pointer to the catalog's `data_thumbnail` file
"""
return get_upath(catalog_base_dir) / DATASET_DIR / DATA_THUMBNAIL_FILENAME
[docs]
def get_point_map_file_pointer(catalog_base_dir: str | Path | UPath) -> UPath:
"""Get file pointer to `point_map.fits` FITS image file.
Parameters
----------
catalog_base_dir: str | Path | UPath
base directory of the catalog (includes catalog name)
Returns
-------
UPath
File Pointer to the catalog's `point_map.fits` FITS image file.
"""
return get_upath(catalog_base_dir) / POINT_MAP_FILENAME
[docs]
def get_skymap_file_pointer(catalog_base_dir: str | Path | UPath, order: int | None = None) -> UPath:
"""Get file pointer to `skymap.fits` or `skymap.K.fits` FITS image file.
Parameters
----------
catalog_base_dir: str | Path | UPath
base directory of the catalog (includes catalog name)
order: int | None
(Default value = None) desired order for the map, if looking for a down-sampled map.
Returns
-------
UPath
File Pointer to the FITS image file.
"""
if order is not None and order >= 0:
return get_upath(catalog_base_dir) / f"skymap.{order}.fits"
return get_upath(catalog_base_dir) / SKYMAP_FILENAME