Source code for data_buttons.hst
# Ensure python3 compatibility
from __future__ import absolute_import, print_function, division
import fnmatch
import glob
import os
import shutil
import multiprocessing as mp
import logging
import resource
import astropy.units as u
import numpy as np
from astropy.io import fits
from astropy.table import Table
from astroquery.mast import Observations
from astroquery.gaia import Gaia
from astroquery.ned import Ned
from drizzlepac import astrodrizzle, photeq, tweakreg, tweakback
import stwcs
import crds
import pickle
import wget
from . import alignimages
[docs]def hst_button(
galaxies,
skymethod='globalmin+match',
instruments="ACS/WFC",
prop_ids=None,
filters=None,
radius=None,
filepath=None,
download_data=True,
correct_astrometry=True,
create_mosaic=True,
jy_conversion=True,
verbose=False,
log_filename='hst.log',
):
"""Create a HST mosaic, given a galaxy name.
Using a galaxy name and radius, queries around that object,
downloads available HST data and mosaics into a final product. It
will create separate mosaics for each proposal ID, and the file structure
will look like ``/galaxy/HST/proposal_id/galaxy_instrument_filter_proposal_id.fits``.
N.B. I must confess to not being well-versed with HST data, so if
anyone can help improve this please let me know.
This data button uses a number of tools included in the drizzlepac
Python package. This includes alignimages/tweakreg and astrodrizzle,
which correct astrometry and are specifically tailored for the setup
of HST data. This means that 1) creating mosaics with this will likely
take a long time and 2) you will need a beefy computer (especially with
regards to hard drive space).
Args:
galaxies (str or list): Names of galaxies to create mosaics for.
Resolved by NED.
skymethod (str, optional): Method used for AstroDrizzle's background
matching step. In general, this can be left untouched but for
mosaics with little overlap, it may be worth playing around
with this. For instance, I've had some luck when there isn't
much overlap between exposures using 'globalmin'. Options are
'localmin', 'globalmin+match', 'globalmin', and 'match'. Defaults
to 'globalmin+match'.
instruments (str or list, optional): Instrument to download data
for. Can be any combination of 'ACS/WFC', 'WFC3/IR',
'WFC3/UVIS', 'WFPC2/PC', or 'WFPC2/WFC'. If you want all
available data for all these instruments, select 'all', but
this is not recommended! Defaults to 'ACS/WFC'.
prop_ids (str or list, optional): Proposal IDs to download data for.
Defaults to None, which will pull out all proposal IDs for each
instrument.
filters (str or list, optional): Filters to download data for.
The script will look for each filter, for each instrument.
Defaults to None, which will pull out all applicable filters
for each instrument, for each proposal ID.
radius (astropy.units.Quantity, optional): Radius around the
galaxy to search for observations. Defaults to None, where
it will query Ned to get size.
filepath (str, optional): Path to save the working and output
files to. If not specified, saves to current working
directory.
download_data (bool, optional): If True, will download data from
MAST. Defaults to True.
correct_astrometry (bool, optional): If True, will perform astrometric
corrections to the downloaded data using alignimages. Defaults
to True.
create_mosaic (bool, optional): Switching this to True will
mosaic the data using astrodrizzle as appropriate. Defaults
to True.
jy_conversion (bool, optional): Convert the mosaicked file from
raw units to Jy/pix. Defaults to True.
verbose (bool, optional): Can be used to suppress most of the
output messages produced during the process. Mainly useful
for debugging. Defaults to False.
log_filename (str, optional): Will produce a stripped down log
of what data the code is reducing. By default, will save to
galaxy/hst.log.
"""
if isinstance(galaxies, str):
galaxies = [galaxies]
if isinstance(instruments,str):
instruments = [instruments]
if instruments == 'all':
instruments = ['ACS/WFC',
'WFC3/IR','WFC3/UVIS',
'WFPC2/PC','WFPC2/WFC']
if isinstance(filters,str):
filters = [filters]
if isinstance(prop_ids,str):
prop_ids = [prop_ids]
if filepath is not None:
os.chdir(filepath)
orig_dir = os.getcwd()
if radius is not None:
original_radius = radius.copy()
else:
original_radius = None
steps = []
if download_data:
steps.append(1)
if correct_astrometry:
steps.append(2)
if create_mosaic:
steps.append(3)
if jy_conversion:
steps.append(4)
# Set up folders for various corrections
os.environ['CRDS_SERVER_URL'] = 'https://hst-crds.stsci.edu'
os.environ['CRDS_PATH'] = orig_dir+'/reference_files'
os.environ['iref'] = orig_dir+'/reference_files/references/hst/wfc3/'
os.environ['jref'] = orig_dir+'/reference_files/references/hst/acs/'
os.environ['uref'] = orig_dir+'/reference_files/references/hst/wfpc2/'
# For large proposals, astrodrizzle can run into file open
# issues so raise the max file open amount.
_, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE,(hard,hard))
# Change the temp directory -- if this gets filled up it can cause
# problems.
orig_tmpdir = os.environ['TMPDIR']
if not os.path.exists('tmp'):
os.mkdir('tmp')
os.environ['TMPDIR'] = orig_dir+'/tmp'
for galaxy in galaxies:
if not os.path.exists(galaxy):
os.mkdir(galaxy)
if not os.path.exists(galaxy+'/HST'):
os.mkdir(galaxy+'/HST')
if not verbose:
# Various packages used here put out a lot of messages. Silence info messages.
loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
for logger in loggers:
logger.setLevel(logging.ERROR)
# Even if verbose is not True, still print out some useful messages to the
# console.
hst_logger = logging.getLogger('data_buttons')
handler = logging.FileHandler(galaxy+'/'+log_filename,mode='w')
hst_logger.addHandler(handler)
hst_logger.addHandler(logging.StreamHandler())
hst_logger.setLevel(logging.INFO)
hst_logger.info('Beginning '+galaxy)
hst_logger.info(' ')
hst_logger.info(' ')
if radius is None:
try:
size_query = Ned.get_table(galaxy,table='diameters')
radius = np.max(size_query['NED Major Axis'])/2*u.arcsec
radius = radius.to(u.deg)
except:
hst_logger.warning(galaxy+' not resolved by Ned, using 0.2deg radius.')
radius = 0.2*u.degree
obs_table = Observations.query_criteria(objectname=galaxy,
radius=radius,
obs_type='all',
obs_collection='HST')
# Ignore any calibration observations.
obs_table = obs_table[obs_table['intentType'] == 'science']
for instrument in instruments:
# Pixel sizes for final mosaics selected to match the HLA.
pix_size = {'ACS/HRC':0.025,
'ACS/SBC':0.03,
'ACS/WFC':0.05,
'NICMOS/NIC1':0.025,
'NICMOS/NIC2':0.05,
'NICMOS/NIC3':0.1,
'WFC3/IR':0.09,
'WFC3/UVIS':0.04,
'WFPC2/PC':0.05,
'WFPC2/WFC':0.1}[instrument]
# Bits to consider good for drizzling.
bits = {'ACS/HRC':256,
'ACS/SBC':256,
'ACS/WFC':256,
'NICMOS/NIC1':0,
'NICMOS/NIC2':0,
'NICMOS/NIC3':0,
'WFC3/IR':768,
'WFC3/UVIS':256,
'WFPC2/PC':'8,1024',
'WFPC2/WFC':'8,1024'}[instrument]
# Filename extension, in order of preference.
suffixes = {'ACS/WFC':['FLC','FLT'],
'WFC3/IR':['FLT'],
'WFC3/UVIS':['FLC','FLT'],
'WFPC2/PC':[['C0M','C1M']],
'WFPC2/WFC':[['C0M','C1M']],
}[instrument]
# The instruments often have / in the name, so account for
# this in making folders and files.
hst_logger.info('Beginning '+instrument)
if not os.path.exists(galaxy+'/HST/'+instrument.replace('/','_')):
os.mkdir(galaxy+'/HST/'+instrument.replace('/','_'))
reset_filters = False
instrument_table = obs_table[obs_table['instrument_name'] == instrument]
reset_prop_ids = False
if not prop_ids:
prop_ids = list(np.unique(instrument_table['proposal_id']))
reset_prop_ids = True
hst_logger.info('Available proposal IDs: '+','.join(prop_ids))
hst_logger.info(' ')
for prop_id in prop_ids:
hst_logger.info('Proposal ID: '+str(prop_id))
prop_table = instrument_table[instrument_table['proposal_id'] == prop_id]
if not filters:
filters = list(np.unique(prop_table['filters']))
reset_filters = True
hst_logger.info('Available filters: '+','.join(filters))
for hst_filter in filters:
# If we have a highly illegal filter, just skip.
# TODO: This needs to be sorted for some fringe
# cases, probably.
if not hst_filter[0] == 'F':
continue
hst_logger.info('Filter: '+str(hst_filter))
# Pull out available data and download.
filter_table = prop_table[prop_table['filters'] == hst_filter]
if len(filter_table) == 0:
hst_logger.warning('No available data to download. Skipping...')
continue
data_products_id = Observations.get_product_list(filter_table)
for suffix in suffixes:
download_table = Observations.filter_products(data_products_id,
productSubGroupDescription=suffix,
mrp_only=False)
if len(download_table) > 0:
break
if isinstance(suffix,list):
filename_exts = [ext.lower() for ext in suffix]
else:
filename_exts = [suffix.lower()]
hst_logger.info(instrument+'/'+prop_id+'/'+hst_filter)
if not os.path.exists(galaxy+
'/HST/'+
instrument.replace('/','_')+
'/'+
hst_filter):
os.mkdir(galaxy+
'/HST/'+
instrument.replace('/','_')+
'/'+
hst_filter)
if not os.path.exists(galaxy+'/HST/'+prop_id):
os.mkdir(galaxy+'/HST/'+prop_id)
full_filepath = (galaxy+
'/HST/'+
instrument.replace('/','_')+
'/'+
hst_filter+
'/'
+prop_id)
if not os.path.exists(full_filepath):
os.mkdir(full_filepath)
if 1 in steps:
# Download files
download_mast(download_table,
download_dir="hst_temp/" + galaxy)
if not os.path.exists(full_filepath+'/raw'):
os.mkdir(full_filepath+'/raw')
if not os.path.exists(full_filepath+'/outputs'):
os.mkdir(full_filepath+'/outputs')
# Pull out the relevant files, and move to base folder.
for filename_ext in filename_exts:
matches = []
for root, _, filenames in os.walk("hst_temp/" + galaxy):
for filename in fnmatch.filter(
filenames, "*_"+filename_ext+".fits"
):
matches.append(os.path.join(root, filename))
for match in matches:
filename = match.split('/')
os.rename(match,full_filepath+'/raw/'+filename[-1])
# Clean up any temporary files.
shutil.rmtree("hst_temp/" + galaxy, ignore_errors=True)
filename_ext = filename_exts[0]
hst_files = glob.glob(full_filepath+'/raw/*_'+filename_ext+'.fits')
if 2 in steps:
# First, update the WCS information in case it's
# required.
for filename_ext in filename_exts:
hst_files = glob.glob(full_filepath+'/raw/*_'+filename_ext+'.fits')
crds.assign_bestrefs(hst_files,
sync_references=True)
# For WFPC2, the CRDS doesn't download everything
# needed. Download the GEIS data files and
# rerun the bestrefs assignment.
if 'WFPC2' in instrument:
geis_hdrs = glob.glob(os.environ['uref']+'/*h')
for geis_hdr in geis_hdrs:
geis_data = geis_hdr[:-1]+'d'
if not os.path.exists(geis_data):
geis_data = geis_data.split('/')[-1]
print(geis_data)
print(os.environ['uref'])
wget.download(
os.environ['CRDS_SERVER_URL']+'/unchecked_get/references/hst/'+geis_data,
out=os.environ['uref'])
crds.assign_bestrefs(hst_files,sync_references=True)
for hst_file in hst_files:
stwcs.updatewcs.updatewcs(hst_file,
use_db=False)
os.chdir(full_filepath+'/raw')
filename_ext = filename_exts[0]
hst_files = glob.glob('*_'+filename_ext+'.fits')
# Normalize all files.
photeq.photeq(', '.join(hst_files),readonly=False)
os.rename('photeq.log','../outputs/photeq.log')
if 'WFPC' in instrument:
# Using tweakreg, align each frame to GAIA.
gaia_table = Gaia.query_object_async(coordinate=galaxy,
radius=2*radius)
ras = gaia_table['ra']
decs = gaia_table['dec']
source_table = Table([ras,decs])
source_table.write('gaia.cat',
format='ascii.fast_commented_header')
tweakreg.TweakReg(hst_files,
imagefindcfg={'threshold':5,'conv_width':3},
refcat='gaia.cat',
#expand_refcat=True,
enforce_user_order=False,
shiftfile=True,
outshifts='shifts.txt',
searchrad=10,
minobj=5,
separation=0,
updatehdr=True,
reusename=True,
wcsname='TWEAK',
interactive=False,
fitgeometry='general',
clean=True,
see2dplot=False
)
# Update the c1m files to use the TWEAK
# wcs
for hst_file in hst_files:
dq_file = hst_file.replace('c0','c1')
tweakback.tweakback(hst_file,
dq_file,
newname='TWEAK')
plot_files = glob.glob('*.png')
for plot_file in plot_files:
os.remove(plot_file)
cat_files = glob.glob('*.coo')
for cat_file in cat_files:
os.remove(cat_file)
os.rename('shifts_wcs.fits','../outputs/shifts_wcs.fits')
os.rename('tweakreg.log','../outputs/tweakreg.log')
os.rename('shifts.txt','../outputs/shifts.txt')
elif 'ACS' in instrument or 'WFC3' in instrument:
# Correct astrometry using alignimages. First,
# correct each frame separately.
pool = mp.Pool(mp.cpu_count())
suitable_hst_files = pool.map(astrometric_correction,
hst_files)
pool.close()
suitable_hst_files = [x for x in suitable_hst_files
if x is not None]
if len(suitable_hst_files) == 0:
hst_logger.warning('Failure with astrometry corrections. Skipping')
os.chdir(orig_dir)
continue
# Now, align every suitable frame simultaneously.
output_table = astrometric_correction(suitable_hst_files)
with open('../outputs/astrometry.pkl','wb') as table_file:
pickle.dump(output_table,table_file)
else:
raise Exception('Unknown instrument!')
os.chdir(orig_dir)
os.chdir(full_filepath)
if 3 in steps:
os.chdir('raw')
if 'WFPC2' in instrument:
hst_files = glob.glob('*_c0m.fits')
wcskey = 'TWEAK'
elif 'ACS' in instrument or 'WFC3' in instrument:
with open('../outputs/astrometry.pkl','rb') as table_file:
output_table = pickle.load(table_file)
# We only want fits where an acceptable astrometric
# solution has been found.
suitable_fits = np.where(output_table['fit_qual'] < 5)
# (output_table['fit_qual'] >= 1)
hst_files = list(output_table[suitable_fits]['imageName'])
if len(output_table[suitable_fits]) == 0:
hst_logger.warning('Failure with astrometry corrections. Skipping')
os.chdir(orig_dir)
continue
wcskey = ' '
else:
raise Exception('Unknown instrument!')
# Following Dalcanton+ (2012), group exposures into
# long (>50s) and short (<=50s), and process for cosmic
# rays separately
exp_times = []
for hst_file in hst_files:
hdu = fits.open(hst_file)[0]
exp_time = hdu.header['EXPTIME']
exp_times.append(exp_time)
for exp_group in ['short','long']:
hst_files_group = []
for i in range(len(exp_times)):
if exp_times[i] > 50 and exp_group == 'long':
hst_files_group.append(hst_files[i])
elif exp_times[i] <= 50 and exp_group == 'short':
hst_files_group.append(hst_files[i])
if len(hst_files_group) == len(hst_files):
exp_group = ''
if len(hst_files_group) == 0:
continue
if len(exp_group) > 0:
output_name = '../outputs/'+galaxy+'_'+exp_group
drizzle_log_name = '../outputs/astrodrizzle_'+exp_group+'.log'
else:
output_name = '../outputs/'+galaxy
drizzle_log_name = '../outputs/astrodrizzle.log'
# Perform the mosaicking. Generally, use iminmed.
# However, sometimes iminmed will fail so
# for the other instruments we'll use imedian as
# a fallback.
combine_types = ['iminmed','imedian']
if 'WFPC2' in instrument:
combine_nhigh = 1
else:
combine_nhigh = 0
for combine_type in combine_types:
try:
astrodrizzle.AstroDrizzle(
input=hst_files_group,
output=output_name,
preserve=False,
clean=True,
combine_type=combine_type,
combine_nhigh=combine_nhigh,
skymethod=skymethod,
sky_bits=bits,
driz_sep_bits=bits,
driz_sep_fillval=99999,
combine_hthresh=90000,
final_scale=pix_size,
final_bits=bits,
final_fillval=0,
wcskey=wcskey,
final_rot=0,
)
break
except ValueError:
pass
# Move the AstroDrizzle log.
os.rename('astrodrizzle.log',
drizzle_log_name)
# Move back to the original directory.
os.chdir(orig_dir)
if 4 in steps:
mosaic_outputs = glob.glob(full_filepath+'/outputs/*_sci.fits')
for mosaic_output in mosaic_outputs:
# Replace any fillvals with NaNs.
hdu = fits.open(mosaic_output)[0]
hdu.data[hdu.data == 0] = np.nan
fits.writeto(mosaic_output,
hdu.data,hdu.header,
overwrite=True)
if '_long_' in mosaic_output.split('/')[-1]:
new_filename = (galaxy+
'/HST/'
+prop_id
+'/'
+galaxy
+'_'
+instrument.replace('/','_')
+'_'
+hst_filter
+'_'
+prop_id
+'_long.fits')
elif '_short_' in mosaic_output.split('/')[-1]:
new_filename = (galaxy+
'/HST/'
+prop_id
+'/'
+galaxy
+'_'
+instrument.replace('/','_')
+'_'
+hst_filter
+'_'
+prop_id
+'_short.fits')
else:
new_filename = (galaxy+
'/HST/'
+prop_id
+'/'
+galaxy
+'_'
+instrument.replace('/','_')
+'_'
+hst_filter
+'_'
+prop_id
+'.fits')
convert_to_jy(mosaic_output,
new_filename)
if reset_filters:
filters = None
hst_logger.info(' ')
if reset_prop_ids:
prop_ids = None
hst_logger.info(' ')
if original_radius is None:
radius = None
else:
radius = original_radius.copy()
# Clear out the tmp folder and reset to the original.
shutil.rmtree('tmp/', ignore_errors=True)
os.environ['TMPDIR'] = orig_tmpdir
[docs]def download_mast(obs_table,**kwargs):
"""Download data from the MAST archive.
This function gives you an easy way to download data from the MAST
archive, if you already have a table of observations (from Astroquery)
to hand. By default, this will download everything into your current
working directory.
Args:
obs_table (astropy.table.Table): The result of an astroquery.mast
query.
"""
Observations.download_products(obs_table,**kwargs
)
[docs]def astrometric_correction(filename):
"""Perform astrometric corrections on HST data.
Using the astrodrizzle alignimages package, will perform astrometric
corrections on either a single file or a set of files. If a single
file, then it will only provide absolute astrometric corrections
based on Gaia observations. If multiple files, it will also correct
for relative corrections between observations.
Args:
filename (str or list): The filenames for the data to correct.
Can either be a single string or a list of filenames.
IMPORTANT: These filenames should be just the filename with
no directory structure beforehand. Else alignimages gets
confused!
Returns:
str or astropy.table.Table:
The filename for which an astrometric solution has been found
(if input is a string), or information regarding all of the
astrometric fits (if input is a list).
"""
single_file = False
if isinstance(filename,str):
single_file = True
runfile = 'alignimages_'+filename+'.log'
filename_list = [filename]
else:
filename_list = filename.copy()
runfile = 'alignimages.log'
output_table = alignimages.perform_align(filename_list,
runfile=runfile,
print_fit_parameters=False,
update_hdr_wcs=True,
)
os.rename(runfile,
'../outputs/'+runfile)
if single_file:
# Filter the table down to only one where sources have been detected
# and the fit is not totally broken. Else alignimages on the whole
# set can cause issues.
if output_table['fit_qual'] < 5 and output_table['foundSources'] > 0:
return filename
else:
return output_table
[docs]def convert_to_jy(hdu_in,hdu_out=None):
"""Convert from HST electrons/s to Jy/pixel.
HST maps are provided in units of electrons/s. These can be converted
to more helpful units using the keywords in the .fits header, specifically
PHOTFLAM (the sensitivity in ergs/cm^2/Ang/electron), and the pivot
wavelength PHOTPLAM.
Args:
hdu_in (str or astropy.io.fits.PrimaryHDU): File name of HST
.fits file, or an Astropy PrimaryHDU instance (i.e. the
result of ``fits.open(file)[0]``).
hdu_out (str, optional). If not None, will save the hdu to the
string provided. Defaults to None.
Returns:
astropy.io.fits.PrimaryHDU: The HDU in units of Jy/pix.
"""
if isinstance(hdu_in,str):
hdu = fits.open(hdu_in)[0]
else:
hdu = hdu_in.copy()
data = hdu.data.copy()
header = hdu.header.copy()
data *= header['PHOTFLAM']
data *= 3.33564095e4 * header['PHOTPLAM']**2
header['BUNIT'] = 'Jy/pix'
if hdu_out is not None:
fits.writeto(hdu_out,
data,header,
overwrite=True)
return fits.PrimaryHDU(data=data,header=header)