Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PI24 run script: metadata generation added, refactoring #632

Merged
merged 5 commits into from
Nov 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 164 additions & 29 deletions karabo/examples/SRCNet_v0.1_simulation_2_AAstar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# This script generates simulated visibilities and images resembling SKAO data.
# It also outputs corresponding ObsCore metadata ready to be ingested to Rucio.
#
# Images: dirty image and cleaned image using WSClean.
# These are MFS images (frequency channels aggregated into one channel),
Expand All @@ -9,8 +10,12 @@
# - 3 TB visibilities (after image cleaning)
# - 12 GB images
import math
import os
from datetime import datetime, timedelta, timezone

from karabo.data.obscore import ObsCoreMeta
from karabo.data.src import RucioMeta
from karabo.imaging.image import Image
from karabo.imaging.imager_base import DirtyImagerConfig
from karabo.imaging.imager_wsclean import (
WscleanDirtyImager,
Expand All @@ -22,26 +27,59 @@
from karabo.simulation.sky_model import SkyModel
from karabo.simulation.telescope import Telescope
from karabo.simulation.telescope_versions import SKAMidAAStarVersions
from karabo.simulation.visibility import Visibility
from karabo.simulator_backend import SimulatorBackend
from karabo.util.helpers import get_rnd_str

if __name__ == "__main__":
print(f"{datetime.now()} Script started")
# Simulation
# Phase center: should be mean of coverage
# Means of values from sky model description
PHASE_CENTER_RA = 150.12
PHASE_CENTER_DEC = 2.21

# Imaging
# Image size in degrees should be smaller than FOV
# Bigger baseline -> higher resolution
# Image resolution from SKAO's generate_visibilities.ipynb
IMAGING_NPIXEL = 20000
# -> Cellsize < FOV / 20000 -> 9.32190458333e-7
IMAGING_CELLSIZE = 9.3e-7

# Rucio
RUCIO_NAMESPACE = "chocolate"
# in seconds
RUCIO_LIFETIME = 31536000

# Metadata
OBS_COLLECTION = "SKAO/SKAMID"

obs_sim_id = 0 # inc/change for new simulation
user_rnd_str = get_rnd_str(k=7, seed=os.environ.get("USER"))
OBS_ID = f"karabo-{user_rnd_str}-{obs_sim_id}" # unique ID per user & simulation

# Simulate using OSKAR
SIMULATOR_BACKEND = SimulatorBackend.OSKAR
# Prefix for name part of Rucio DIDs.
# Example: visibilities DID will be [RUCIO_NAMESPACE]:[RUCIO_NAME_PREFIX]measurements.MS
# Keep in mind that DIDs need to be globally unique.
# Would probably make sense to make OBS_ID part of this for future runs.
RUCIO_NAME_PREFIX = "pi24_run_1_"

# Output root dir, this is just a default, set to your liking
OUTPUT_ROOT_DIR = os.path.join(os.environ["SCRATCH"], f"{RUCIO_NAME_PREFIX}output")
os.makedirs(OUTPUT_ROOT_DIR, exist_ok=False)
print(f"Output will be written under output root dir {OUTPUT_ROOT_DIR}")


def generate_visibilities() -> Visibility:
simulator_backend = SimulatorBackend.OSKAR

# Phase center: should be mean of coverage
# Link to metadata of survey:
# https://archive.sarao.ac.za/search/MIGHTEE%20COSMOS/target/J0408-6545/captureblockid/1587911796/
sky_model = SkyModel.get_MIGHTEE_Sky()
# Means of values from sky model description
phase_center_ra = 150.12
phase_center_dec = 2.21

telescope = Telescope.constructor( # type: ignore[call-overload]
name="SKA-MID-AAstar",
version=SKAMidAAStarVersions.SKA_OST_ARRAY_CONFIG_2_3_1,
backend=SIMULATOR_BACKEND,
backend=simulator_backend,
)

# From sky model description
Expand Down Expand Up @@ -79,8 +117,8 @@
)

observation = Observation(
phase_centre_ra_deg=phase_center_ra,
phase_centre_dec_deg=phase_center_dec,
phase_centre_ra_deg=PHASE_CENTER_RA,
phase_centre_dec_deg=PHASE_CENTER_DEC,
# During the chosen time range [start, start + length]
# sources shouldn't be behind horizon, otherwise we won't see much.
# Original survey: 2020-04-26 14:36:50.820 UTC to 2020-04-26 22:35:42.665 UTC
Expand All @@ -93,42 +131,139 @@
frequency_increment_hz=frequency_increment_hz,
)

print(f"{datetime.now()} Starting simulation")
visibility = simulation.run_simulation(
return simulation.run_simulation( # type: ignore[no-any-return]
telescope,
sky_model,
observation,
backend=SIMULATOR_BACKEND,
backend=simulator_backend,
visibility_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}measurements.MS",
),
) # type: ignore[call-overload]

# Imaging using WSClean
# Image size in degrees should be smaller than FOV
# Bigger baseline -> higher resolution
# Image resolution from SKAO's generate_visibilities.ipynb
imaging_npixel = 20000
# -> Cellsize < FOV / 20000 -> 9.32190458333e-7
imaging_cellsize = 9.3e-7

print(f"{datetime.now()} Creating dirty image")
def create_visibilities_metadata(visibility: Visibility) -> None:
ocm = ObsCoreMeta.from_visibility(
vis=visibility,
calibrated=False,
)
# remove path-infos for `name`
name = os.path.split(visibility.path)[-1]
rm = RucioMeta(
namespace=RUCIO_NAMESPACE, # needs to be specified by Rucio service
name=name,
lifetime=RUCIO_LIFETIME,
dataset_name=None,
meta=ocm,
)
# ObsCore mandatory fields
ocm.obs_collection = OBS_COLLECTION
ocm.obs_id = OBS_ID
obs_publisher_did = RucioMeta.get_ivoid( # rest args are defaults
namespace=rm.namespace,
name=rm.name,
)
ocm.obs_publisher_did = obs_publisher_did

ocm.access_url = create_access_url(RUCIO_NAMESPACE, name)

path_meta = RucioMeta.get_meta_fname(fname=visibility.path)
_ = rm.to_dict(fpath=path_meta)
print(f"Created {path_meta=}")


def create_dirty_image(visibility: Visibility) -> Image:
dirty_imager = WscleanDirtyImager(
DirtyImagerConfig(
imaging_npixel=imaging_npixel,
imaging_cellsize=imaging_cellsize,
imaging_npixel=IMAGING_NPIXEL,
imaging_cellsize=IMAGING_CELLSIZE,
combine_across_frequencies=True,
)
)
dirty_image = dirty_imager.create_dirty_image(visibility)

print(f"{datetime.now()} Creating cleaned image")
return dirty_imager.create_dirty_image(
visibility,
output_fits_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}dirty.fits",
),
)


def create_cleaned_image(visibility: Visibility, dirty_image: Image) -> Image:
image_cleaner = WscleanImageCleaner(
WscleanImageCleanerConfig(
imaging_npixel=imaging_npixel,
imaging_cellsize=imaging_cellsize,
imaging_npixel=IMAGING_NPIXEL,
imaging_cellsize=IMAGING_CELLSIZE,
)
)
cleaned_image = image_cleaner.create_cleaned_image(

return image_cleaner.create_cleaned_image(
visibility,
dirty_fits_path=dirty_image.path,
output_fits_path=os.path.join(
OUTPUT_ROOT_DIR,
f"{RUCIO_NAME_PREFIX}cleaned.fits",
),
)


def create_image_metadata(image: Image) -> None:
# Create image metadata
ocm = ObsCoreMeta.from_image(img=image)
# remove path-infos for `name`
name = os.path.split(image.path)[-1]
rm = RucioMeta(
namespace=RUCIO_NAMESPACE, # needs to be specified by Rucio service
name=name,
lifetime=RUCIO_LIFETIME,
dataset_name=None,
meta=ocm,
)
# ObsCore mandatory fields
# some of the metadata is taken from the visibilities, since both data-products
# originate from the same observation
ocm.obs_collection = OBS_COLLECTION
ocm.obs_id = OBS_ID
obs_publisher_did = RucioMeta.get_ivoid( # rest args are defaults
namespace=rm.namespace,
name=rm.name,
)
ocm.obs_publisher_did = obs_publisher_did

ocm.s_ra = PHASE_CENTER_RA
ocm.s_dec = PHASE_CENTER_DEC
ocm.access_url = create_access_url(RUCIO_NAMESPACE, name)

path_meta = RucioMeta.get_meta_fname(fname=image.path)
_ = rm.to_dict(fpath=path_meta)
print(f"Created {path_meta=}")


def create_access_url(namespace: str, name: str) -> str:
return f"https://datalink.ivoa.srcdev.skao.int/rucio/links?id={namespace}:{name}"


if __name__ == "__main__":
print(f"{datetime.now()} Script started")

print(f"{datetime.now()} Starting simulation")
visibility = generate_visibilities()

print(f"{datetime.now()} Creating visibility metadata")
create_visibilities_metadata(visibility)

print(f"{datetime.now()} Creating dirty image")
dirty_image = create_dirty_image(visibility)

print(f"{datetime.now()} Creating dirty image metadata")
create_image_metadata(dirty_image)

print(f"{datetime.now()} Creating cleaned image")
cleaned_image = create_cleaned_image(visibility, dirty_image)

print(f"{datetime.now()} Creating cleaned image metadata")
create_image_metadata(cleaned_image)

print(f"{datetime.now()} Script finished")
Loading