diff --git a/CHANGES.rst b/CHANGES.rst index a76ced54bb..501325fed1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,8 +4,6 @@ New Tools and Services ---------------------- - - API changes ----------- @@ -35,6 +33,7 @@ heasarc - Add ``query_constraints`` to allow querying of different catalog columns. [#3403] - Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] - Add automatic guessing for the data host in ``download_data``. [#3403] +- Adding method heasarc.query_all(). [#3499] gaia ^^^^ diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index f92121ff70..3da1ef16d2 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -8,7 +8,7 @@ from astropy import coordinates from astropy import units as u from astropy.utils.decorators import deprecated, deprecated_renamed_argument - +from astropy.time import Time import pyvo from astroquery import log @@ -612,6 +612,270 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) + def _get_vector(ra=None, dec=None): + """ + If the input is a string name of a column like "a.ra", then this routine + constructs the unit vector column names that can be added to the SQL query + to represent the unit vector. If the input is a number, then it will actually + calculate the unit vector and return the values as strings to be added to the + SQL query. + + The former is used to fetch pre-computed unit vectors columns associated with + the table being queried. The latter is used to compute the input position unit + vector only once and put the numeric value in the query constraint. + """ + # Note that Astropy flips x and y compared to this, which is used internally + # despite the fact that our RA, DEC values are in ICRS. + try: + r, d = np.radians([float(ra), float(dec)]) + return ( + np.cos(d) * np.sin(r), + np.cos(d) * np.cos(r), + np.sin(d) + ) + except ValueError: + prefix = ra.split('.')[0] # e.g., 'a' from 'a.ra' + return (f"{prefix}.__x_ra_dec", f"{prefix}.__y_ra_dec", f"{prefix}.__z_ra_dec") + except Exception as e: + raise e + + def _fast_geometry_constraint(ra, dec, large=False, radius=None): + """ + Construct the spatial constraint to be added to the WHERE clause. It compares + the input position with the catalog's pre-computed unit vector columns + with the computation optimized for speed. The optimization was done by Tom McGlynn + for the Xamin GUI and the algorithm copied here. + + The master position tables are split into those where the default sensible search + radius is larger or smaller than 1 degree. + """ + vec0 = HeasarcClass._get_vector("a.ra", "a.dec") + vec1 = HeasarcClass._get_vector(ra, dec) + dot_product = " + ".join([f"{vec0[i]}*{vec1[i]}" for i in range(3)]) + if radius is not None: + if not isinstance(radius, (int, float)): + radius = radius.value + radius_condition = f"{dot_product} > (cos(radians(({radius}))))" + dec_condition = f"a.dec between {dec} - {radius} and {dec} + {radius}" + else: + # Assuming 'a.dsr' is the default search radius column in degrees. This value is + # defined by HEASARC curators for each table. + radius_condition = f"{dot_product} > (cos(radians((a.dsr))))" + dec_condition = f"a.dec between {dec} - a.dsr and {dec} + a.dsr" + if large: + return f""" + ( ({radius_condition}) + and ({dec_condition}) ) + """ + else: + # Additional constraints on tables with search radii less than 1 deg, + # which speeds up the whole thing. + radius_condition_1deg = f"{dot_product} > {np.cos(np.radians(1.0))}" + dec_condition_1deg = f"a.dec between {float(dec) - 1} and {float(dec)+1}" + return f""" + ( ({radius_condition}) + and ({dec_condition}) + and ({radius_condition_1deg}) + and ({dec_condition_1deg}) + ) + """ + + def _time_constraint(start_time=None, end_time=None): + """" + Converts input string like "2025-01-02T01:00:00..2025-01-05T23:59:59" + into a decimal MJD constraint. + """ + start_mjd = Time(start_time, format='isot').mjd + end_mjd = Time(end_time, format='isot').mjd + return f"end_time > {start_mjd:.6f} AND start_time < {end_mjd:.6f}" + + def _query_matches(ra=None, dec=None, start_time=None, end_time=None, radius=None): + """ + Constructs the full SQL query including the spatial and time constraints. + Note that this queries multiple tables, as the HEASARC database has split + the master tables for efficiency. + """ + offset_def = '' + if ra is not None: + constraint_small = HeasarcClass._fast_geometry_constraint(ra, dec, large=False, radius=radius) + constraint_big = HeasarcClass._fast_geometry_constraint(ra, dec, large=True, radius=radius) + # Note that at least in HEASARC implementation, using DISTANCE in a column + # definition is fine, but it's very slow in a WHERE clause. + offset_def = f''', MAX(DISTANCE(POINT('ICRS', a.ra, a.dec), + POINT('ICRS', {ra}, {dec}))) as max_offset_deg + ''' + + if start_time is not None: + constraint_time = HeasarcClass._time_constraint(start_time, end_time) + + tname1, tname2 = None, None + if ra is not None and start_time is None: + tname1, tname2 = 'pos_small', 'pos_big' + elif ra is not None and start_time is not None: + tname1, tname2 = 'pos_time_small', 'pos_time_big' + constraint_small += f" AND {constraint_time}" + constraint_big += f" AND {constraint_time}" + elif ra is None and start_time is not None: + tname1 = 'time' + else: + raise ValueError("You must specify either a position or time range or both") + + # These columns are only in b, so can remove the b.column + select_block = f''' + SELECT table_name, count(*) AS count, b.description, + b.regime, b.mission, b.type AS obj_type{offset_def} + ''' + + groupby_block = "GROUP BY table_name, b.description, b.regime, b.mission, b.type" + + if ra is not None: + full_query = f''' + {select_block} + FROM master_table.{tname1} AS a, + master_table.indexview AS b + WHERE a.table_name = b.name AND {constraint_small} + {groupby_block} + UNION ALL + {select_block} + FROM master_table.{tname2} AS a, + master_table.indexview AS b + WHERE a.table_name = b.name AND {constraint_big} + {groupby_block} + ORDER BY count DESC + ''' + else: + full_query = f''' + {select_block} + FROM master_table.{tname1} AS a, master_table.indexview AS b + WHERE a.table_name = b.name AND {constraint_time} + {groupby_block} + ORDER BY count DESC + ''' + + # rename for readability + full_query = f""" + select r.table_name as table_name, r.count as count, r.description as description, + r.regime as regime, r.mission as mission, r.obj_type as obj_type, + r.max_offset_deg as max_offset_deg from ({full_query}) as r""" + + # Join all parts of the query, ensuring simple spacing + return HeasarcClass._fix_sql_whitespace(full_query) + + def _fix_sql_whitespace(insql): + import re + return re.sub(r'\s+', ' ', insql).replace("\n", " ").strip() + + def _set_print_formats(table): + """ + Set the Astropy format (e.g., '.5f' or '.3e') so that + the columns look sensible. + """ + for colname in table.columns: + col = table[colname] + if col.dtype.kind not in 'f': + continue + if (abs(col.min()) < 1e-10 and abs(col.min()) > 0.0) or abs(col.max() > 1e10): + col.format = "%10e" + else: + col.format = "%10f" + return (table) + + def query_all(self, position=None, get_query_payload=False, start_time=None, + end_time=None, verbose=False, maxrec=None, radius=None): + """ + Query the HEASARC database to count matches at a given position for all available catalogs. + + Parameters + ---------- + position : str, `astropy.coordinates` object + The position around which to search. Must be a SkyCoord object or a string + that Astropy can convert. + start_time : str, `astropy.time` object, optional + Beginning of time range of interest as a string in ISOT format + or Time object. + end_time : str, `astropy.time` object, optional + End of time range of interest as a string in ISOT format + or Time object. + get_query_payload : bool, optional, optional + If `True` then returns the generated ADQL query as str and does not send the query. + Defaults to `False`. + radius : str or `~astropy.units.Quantity` object, optional + If this radius is None, the specified coordinate is compared to each mission + catalog entry using that catalog's default radius. (See get_default_radius().) + This is based on the approximate location uncertainty for each mission. If you + specify a radius in degrees, it uses that instead. + verbose : bool, optional + If True, prints additional information about the query. Default is False. + maxrec : int, optional + The maximum number of records to return. If None, all matching records are returned up to the server limit. + **kwargs : dict, optional + Additional keyword arguments: + + Returns + ------- + result : `~astropy.table.Table` + A table containing the results of the query, i.e. a list of catalogs + that have entries near the specified position, how many, and quick catalog + information included the computed offset between the two positions in degrees. + If no results are found, an empty table is returned and a warning is issued. + + Raises + ------ + ValueError + If the position is not provided or is not a SkyCoord object. + + Notes + ----- + This method queries all HEASARC catalogs for sources near the specified position. + The results include the table name, number of matches, table description, regime, + mission, and object type for each catalog. + + By default, the search radius foreach table is adjusted according to the positional + accuracy in that table. This gives you results most likely to be relevant to your + search. But if you specify a radius, that will be used in all catalogs. + Be aware that for missions with large uncertainties, e.g., 40 degrees for hete2, + when you search within a very small radius, you may not find some relevant catalog + entries that might be of interest. And conversely, if you specify a larger + radius than the default, you will get more results further from the position specified. + + The user can then select the table name(s) of interest and use the query_object(), + query_region(), etc. + + The query uses the HEASARC TAP service to search position-only master tables efficiently. + + """ + if position is not None: + coords_icrs = parse_coordinates(position).icrs + ra, dec = coords_icrs.ra.deg, coords_icrs.dec.deg + elif position is None and start_time is not None: + ra = None + dec = None + elif ((position is None and start_time is None)): + raise ValueError("A valid position and/or a time range must be provided.") + + full_query = HeasarcClass._query_matches(ra=ra, dec=dec, + start_time=start_time, + end_time=end_time, radius=radius) + + if get_query_payload: + return full_query + + response = self.query_tap(query=full_query, maxrec=maxrec) + + # save the response in case we want to use it later + self._last_result = response + + table = response.to_table() + if len(table) == 0: + warnings.warn( + NoResultsWarning("No matching rows were found in the query.") + ) + return table + + # Because astropy Tables don't keep all the VOTable metadata, + # this prints in more sensible formats and avoids confusion. + return HeasarcClass._set_print_formats(table) + def locate_data(self, query_result=None, catalog_name=None): """Get links to data products Use vo/datalinks to query the data products for some query_results. diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 0091190f06..9dee1f6981 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -7,6 +7,7 @@ from astropy.coordinates import SkyCoord from astropy.table import Table import astropy.units as u +from astropy.time import Time from astroquery.heasarc import Heasarc, HeasarcClass from astroquery.exceptions import InvalidQueryError @@ -719,3 +720,108 @@ def test_s3_mock_directory(s3_mock): assert os.path.exists(f"{tmpdir}/location/file1.txt") assert os.path.exists(f"{tmpdir}/location/sub/file2.txt") assert os.path.exists(f"{tmpdir}/location/sub/sub2/file3.txt") + + +def test__get_vector(): + # Test column name input + assert HeasarcClass._get_vector("a.ra", "a.dec") == \ + ("a.__x_ra_dec", "a.__y_ra_dec", "a.__z_ra_dec") + # Test numeric input + actual = HeasarcClass._get_vector("217.0", "-31.7") + desired = (-0.5120309075160554, -0.6794879643287802, -0.5254716510722678) + # Convert to float for comparison + assert all(abs(d - a) < 0.5 * (10 ** (-6)) for d, a in zip(desired, actual)) + + +def adql_str_comp(testing=str, reference=str): + return HeasarcClass._fix_sql_whitespace(testing) == HeasarcClass._fix_sql_whitespace(reference) + + +def test__constraint_matches(): + # Testing all together because it's easier to read this way. + constraint_small = HeasarcClass._fast_geometry_constraint("217.0", "-31.7", large=False) + desired_small = """ + ( (a.__x_ra_dec*-0.5120309075160554 + a.__y_ra_dec*-0.6794879643287802 + + a.__z_ra_dec*-0.5254716510722678 > (cos(radians((a.dsr))))) and + (a.dec between -31.7 - a.dsr and -31.7 + a.dsr) and + (a.__x_ra_dec*-0.5120309075160554 + a.__y_ra_dec*-0.6794879643287802 + + a.__z_ra_dec*-0.5254716510722678 > 0.9998476951563913) + and (a.dec between -32.7 and -30.7) ) + """ + + assert HeasarcClass._fix_sql_whitespace(constraint_small) == \ + HeasarcClass._fix_sql_whitespace(desired_small) + + constraint_large = HeasarcClass._fast_geometry_constraint("217.0", "-31.7", large=True) + desired_large = """ + ( (a.__x_ra_dec*-0.5120309075160554 + a.__y_ra_dec*-0.6794879643287802 + + a.__z_ra_dec*-0.5254716510722678 > (cos(radians((a.dsr))))) + and (a.dec between -31.7 - a.dsr and -31.7 + a.dsr) ) + """ + assert adql_str_comp(constraint_large, desired_large) + + constraint_large_rad = HeasarcClass._fast_geometry_constraint("217.0", "-31.7", + radius=0.5*u.deg, large=True) + assert "(a.dec between -31.7 - 0.5 and -31.7 + 0.5)" in constraint_large_rad + + constraint_time = HeasarcClass._time_constraint(start_time=Time("2017-01-01"), + end_time=Time("2017-01-02")) + desired_time = "end_time > 57754.000000 AND start_time < 57755.000000" + assert adql_str_comp(constraint_time, desired_time) + + constraint_full = HeasarcClass._query_matches("217.025", "-31.725") + desired_full = """ + select r.table_name as table_name, r.count as count, + r.description as description, r.regime as regime, + r.mission as mission, r.obj_type as obj_type, r.max_offset_deg + as max_offset_deg from ( SELECT table_name, count(*) + AS count, b.description, b.regime, b.mission, b.type + AS obj_type, MAX(DISTANCE(POINT('ICRS', a.ra, a.dec), + POINT('ICRS', 217.025, -31.725))) as max_offset_deg + FROM master_table.pos_small AS a, master_table.indexview + AS b WHERE a.table_name = b.name AND + ( (a.__x_ra_dec*-0.5121892283646801 + a.__y_ra_dec*-0.6790813682341418 + + a.__z_ra_dec*-0.5258428374185955 > (cos(radians((a.dsr))))) + and (a.dec between -31.725 - a.dsr and -31.725 + a.dsr) and + (a.__x_ra_dec*-0.5121892283646801 + a.__y_ra_dec*-0.6790813682341418 + + a.__z_ra_dec*-0.5258428374185955 > 0.9998476951563913) and + (a.dec between -32.725 and -30.725) ) GROUP BY table_name, + b.description, b.regime, b.mission, b.type UNION ALL + SELECT table_name, count(*) AS count, b.description, b.regime, + b.mission, b.type AS obj_type, MAX(DISTANCE(POINT('ICRS', + a.ra, a.dec), POINT('ICRS', 217.025, -31.725))) as max_offset_deg + FROM master_table.pos_big AS a, master_table.indexview AS b + WHERE a.table_name = b.name AND ( (a.__x_ra_dec*-0.5121892283646801 + + a.__y_ra_dec*-0.6790813682341418 + a.__z_ra_dec*-0.5258428374185955 > + (cos(radians((a.dsr))))) and (a.dec between -31.725 - a.dsr and -31.725 + a.dsr) + ) GROUP BY table_name, b.description, b.regime, b.mission, b.type + ORDER BY count DESC ) as r + """ + + assert HeasarcClass._fix_sql_whitespace(constraint_full) == HeasarcClass._fix_sql_whitespace(desired_full) + + constraint_with_time = HeasarcClass._query_matches("217.025", "-31.725", + start_time="2017-01-01", + end_time="2020-01-02") + assert "end_time > 57754.000000 AND start_time < 58850.000000" in constraint_with_time + + constraint_with_radius = HeasarcClass._query_matches("217.0", "-31.7", radius=1.0) + assert "-31.7 - 1.0 and -31.7 + 1.0" in constraint_with_radius + assert "cos(radians((1.0)))" in constraint_with_radius + + +def test__query_all(): + # For some reason, the significant digits here don't give the same result as above. + full_with_strpos = Heasarc.query_all("217.0 -31.7", get_query_payload=True) + # in _query_matches and query_all, whitespaces get removed. + assert HeasarcClass._fix_sql_whitespace("""( (a.__x_ra_dec*-0.5120309075160554 + + a.__y_ra_dec*-0.6794879643287802 + + a.__z_ra_dec*-0.5254716510722678 > + (cos(radians((a.dsr))))) + """) in HeasarcClass._fix_sql_whitespace(full_with_strpos) + + full_with_strtimes = Heasarc.query_all("217.0 -31.7", + start_time="2017-01-01", + end_time="2020-01-02", get_query_payload=True) + assert "end_time > 57754.0" in full_with_strtimes and \ + "start_time < 58850.0" in full_with_strtimes diff --git a/astroquery/heasarc/tests/test_heasarc_remote.py b/astroquery/heasarc/tests/test_heasarc_remote.py index 4d765c7583..0942b22c54 100644 --- a/astroquery/heasarc/tests/test_heasarc_remote.py +++ b/astroquery/heasarc/tests/test_heasarc_remote.py @@ -360,3 +360,20 @@ def test_query_region_nohits(self): assert warnings[0].category == AstropyDeprecationWarning assert warnings[1].category == NoResultsWarning assert len(catalog) == 0 + + +@pytest.mark.remote_data +def test__query_all(): + result = Heasarc.query_all("217.0 -31.70", + start_time="2017-01-01", + end_time="2020-01-01") + assert len(result) == 7 + assert result[0]['table_name'] == 'intscw' + assert result[1]['count'] == 556 + + result = Heasarc.query_all("217.0 -31.70", radius=1, + start_time="2017-01-01", + end_time="2020-01-01") + assert len(result) == 6 + assert result[0]['table_name'] == 'swiftbalog' + assert result[1]['count'] == 45 diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 5aa7a28b36..fc468b2fb5 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -201,6 +201,28 @@ following for instance will find master catalogs that have keywords 'nicer' or ' nicermastr NICER Master Catalog swiftmastr Swift Master Catalog +Query All Available Catalogs +---------------------------- +If you need to know which catalogs are worth querying for your source, you can +use the query_all() function that takes advantage of a fast but limited HEASARC +`trick `. + +.. doctest-remote-data:: + >>> from astroquery.heasarc import Heasarc + >>> from astropy.coordinates import SkyCoord + >>> from astropy import units as u + >>> pos = SkyCoord('217.0 -31.7', unit=u.deg) + >>> matches = Heasarc.query_all(pos) + >>> matches[0:5].pprint() + table_name count description regime mission obj_type max_offset_deg + ---------- ----- ---------------------------------------------------------------- ---------------- -------- -------- ------------------ + hete2tl 6971 HETE-2 Timeline Gamma-ray, X-ray hete-2 39.99700793332599 + intscw 4088 INTEGRAL Science Window Data Gamma-ray, X-ray integral 9.999106184550586 + intscwpub 2039 INTEGRAL Public Pointed Science Window Data Gamma-ray, X-ray integral 9.999106184550586 + icecubepsc 90 IceCube All-Sky Point-Source Neutrino Events Catalog (2008-2018) icecube 0.997001295956274 + comptel 52 CGRO/COMPTEL Low-Level Data and Maps Gamma-ray cgro 39.325681970373786 + + Then as above, you query the table(s) that look likely individually. Adding Column Constraints ---------------------------------------- @@ -221,7 +243,6 @@ Note that when column filters are given and no position is specified, the search defaults to an all-sky search. .. doctest-remote-data:: - >>> from astroquery.heasarc import Heasarc >>> tab = Heasarc.query_region( ... catalog='chanmaster', column_filters={'exposure': ('>', '190000')}