diff --git a/src/GMT.jl b/src/GMT.jl index a34bcab3e..1a80d75b8 100644 --- a/src/GMT.jl +++ b/src/GMT.jl @@ -196,6 +196,7 @@ export # Reexport some from Dates Year, Month, Week, Day, Hour, Minute, Second, year, month, week, day, hour, minute, second, now, today, + bissextile, @?, @G, @dir diff --git a/src/extras/weather.jl b/src/extras/weather.jl index 0977e3a5b..d6f977c37 100644 --- a/src/extras/weather.jl +++ b/src/extras/weather.jl @@ -1,6 +1,6 @@ """ [D =] weather(lon=0.0, lat=0.0; city::String="", last=0, days=7, year::Int=0, starttime::Union{DateTime, String}="", - endtime::Union{DateTime, String}="", variable="temperature_2m", debug=false, show=false, kw...) + endtime::Union{DateTime, String}="", variable="temperature_2m", dryrun=false, show=false, kw...) Plots and/or retrieve weather data obtained from the [Open-Meteo](https://open-meteo.com/en/docs) API. Please consult the site for further details. You will find that there are many variables available to plot and with @@ -25,7 +25,7 @@ that inspired this function, that is much smaller (~25 LOC) and has no further d - `last`: If value is an integer (*e.g.* `last=90`), select the events in the last n days. If it is a string than we expect that it ends with a 'w'(eek), 'm'(onth) or 'y'(ear). Example: `last="2Y"` (period code is caseless) - `year`: An integer, restrict data download to this year. -- `debug`: Print the url of the requested data and return. +- `dryrun`: Print the url of the requested data and return. - `data`: The default is to make a seismicity map but if the `data` option is used (containing whatever) we return the data in a ``GMTdataset`` - `figname`: $(opt_savefig) @@ -43,8 +43,9 @@ weather(city="Copenhagen", year=2023, variable="rain_sum", show=true) ``` """ function weather(lon=0.0, lat=0.0; city::String="", last=0, days=7, year::Int=0, starttime::Union{DateTime, String}="", - endtime::Union{DateTime, String}="", variable="temperature_2m", debug=false, show=false, kw...) + endtime::Union{DateTime, String}="", variable="temperature_2m", dryrun=false, debug=false, show=false, kw...) + # Keep the deprecated option 'debug' for now but remove it in the future (17-May-2025) d = KW(kw) url_forcast = "https://api.open-meteo.com/v1/forecast?format=csv&" url_archive = "https://archive-api.open-meteo.com/v1/archive?format=csv&" @@ -60,7 +61,7 @@ function weather(lon=0.0, lat=0.0; city::String="", last=0, days=7, year::Int=0, hourly = any(variable .== daily_vars) ? false : true url *= hourly ? "&hourly=" * variable : "&daily=" * variable - (debug != 0) && (println(url); return) + (dryrun != 0 || debug != 0) && (println(url); return) file = Downloads.download(url, "_query.csv") D = gmtread(file, h=4) # File has 4 header rows but only the 4rth matters @@ -76,37 +77,37 @@ end # ------------------------------------------------------------------------------------------------------------------------ """ ecmwf(; filename="", cb::Bool=false, dataset="", params::AbstractString="", key::String="", - url::String="", region="", format="netcdf", debug::Bool=false, verbose::Bool=true) + url::String="", region="", format="netcdf", dryrun::Bool=false, verbose::Bool=true) This function retrieves data from the Climate Data Store (CDS) (https://cds.climate.copernicus.eu) service. - `filename`: The name of the file to save the data to. If not provided, the function will generate a - name based on the dataset and the data format. + name based on the dataset and the data format. - `cb`: A boolean indicating whether to use the clipboard contents. If true, the function will use the ``clipboard()`` to fetch its contents. The clipboard should contain a valid API request code as generated - by the CDS website. This site provides a ``Show API request`` button at the bottom of the download tab - of each dataset. After clicking it, the user can copy the request code with a Control-C (or click ``Copy`` - button) which will and paste it into the clipboard. + by the CDS website. This site provides a ``Show API request`` button at the bottom of the download tab + of each dataset. After clicking it, the user can copy the request code with a Control-C (or click ``Copy`` + button) which will and paste it into the clipboard. - `dataset`: The name of the dataset to retrieve. This option can be skipped if the `dataset` option - is provided in the `params` argument, or is included clipboard copy (the `cb` option is set to true). + is provided in the `params` argument, or is included clipboard copy (the `cb` option is set to true). - `params`: A JSON string containing the request parameters. This string should be in the format expected - by the CDSAPI. When using input via this option the `dataset` option is mandatory. - If you feel brave, you can create the request parametrs yourself and pass them as a two elements string - vector with the output of the ``era5vars()`` and ``era5time()`` functions. In this case, a region selection - and pressure levels, if desired, must be provided via the `region` and `levlist` options. The `region` - option has the same syntax in all other GMT.jl modules that use it, _e.g._ the ``coast`` function. + by the CDSAPI. When using input via this option the `dataset` option is mandatory. + If you feel brave, you can create the request parametrs yourself and pass them as a two elements string + vector with the output of the ``era5vars()`` and ``era5time()`` functions. In this case, a region selection + and pressure levels, if desired, must be provided via the `region` and `levlist` options. The `region` + option has the same syntax in all other GMT.jl modules that use it, _e.g._ the ``coast`` function. - `key`: The API key for the CDSAPI server. Default is the value in the ``.cdsapirc`` file in the home directory. - but if that file does not exist, the user can provide the `key` and `url` as arguments. Instructions on how - to create the ``.cdsapirc`` file for your user account can be found at https://cds.climate.copernicus.eu/how-to-api + but if that file does not exist, the user can provide the `key` and `url` as arguments. Instructions on how + to create the ``.cdsapirc`` file for your user account can be found at https://cds.climate.copernicus.eu/how-to-api - `url`: The URL of the CDS API server. Default is https://cds.climate.copernicus.eu/api - `levlist`: List of pressure levels to retrieve. It can be a string to select a unique level, or a vector - of strings or Ints to select multiple levels. But it can also be a range of levels, e.g. "1000:-100:500". - This option is only used when the `params` argument is provided as a string vector. + of strings or Ints to select multiple levels. But it can also be a range of levels, e.g. "1000:-100:500". + This option is only used when the `params` argument is provided as a string vector. - `region`: Specify a region of a specific geographic area. It can be provided as a string with form "N/W/S/E" - or a 4-element vector or tuple with numeric data. This option is only used when the `params` argument is - provided as a string vector. + or a 4-element vector or tuple with numeric data. This option is only used when the `params` argument is + provided as a string vector. - `format`: The format of the data to download. Default is "netcdf". Other options is "grib". -- `debug`: A boolean indicating whether to print the `params` from the outputs of the `era5vars()` and +- `dryrun`: A boolean indicating whether to print the `params` from the outputs of the `era5vars()` and `era5time()` functions. I this case, we just print the `params` and return without trying to download any file. - `verbose`: A boolean indicating whether to print the attemps to connect to the CDS server. Default is true. @@ -148,24 +149,25 @@ ecmwf(dataset="reanalysis-era5-land", params=[var, datetime], region=(-10, 0, 30 Download a forecast dataset from the ECMWF. ### Kwargs -- `cube`: If true, when downloading pressure levels variables, save them data as a netCDF 3D cubes instead - of one file per layer. Unfortunately, currently that cube file handle is not closed in Julia and we must close - the Julia session to get access to the data. Because of this, the default for the `cube` option is false. +- `cube`: If true [the default], when downloading pressure levels variables, save them data as a netCDF 3D cubes instead + of one file per layer (when `cube=false`). - `date`: The date to select. It can be a string to select a unique date, a ``DateTime`` object, or a Int. - Where the Int is the number of days to go back from today. If the Int is greater than 3, an error is raised. - If the date is a string, it must be in the form YYYYMMDD or YYYY-MM-DD. + Where the Int is the number of days to go back from today. If the Int is greater than 3, an error is raised. + If the date is a string, it must be in the form YYYYMMDD or YYYY-MM-DD. +- `dryrun`: Print the URL of the requested data and return without trying to download anything. - `format`: The format in which to save the downloaded data. It can be "grib" or "netcdf". Default is "netcdf". - `levlist`: The pressure levels to select. It can be a string to select a unique pressure level, - or a vector of strings or Ints to select multiple pressure levels. + or a vector of strings or Ints to select multiple pressure levels. - `model`: A string with the model to select. Either "ifs" or "aifs". Default is "ifs". -- `param, variable, var, vars`: The variable(s) to select. It can be a string to select a unique variable, - or a vector of strings or Ints to select multiple variables. When variable(s) is requested, we download only those - variables as separate files. The names of those files are the same as the variable names with the .grib2 extension. +- `param, variable, var, vars`: The variable(s) to select. It can be a string to select a unique variable, or a vector + of strings or Ints to select multiple variables. When variable(s) is requested, we download only those + variables as separate files. The names of those files are the same as the variable names with the .grib2 extension. + NOTE: Not specifying a variable will download the entire forecast grib file for each forecast step selected with the `step` option. - `root`: The root URL of the CDS ERA5 dataset. Default is "https://data.ecmwf.int/forecasts". - `step`: An Int with the forecast step to select. - `stream`: The stream to select. It can be one of: "oper", "enfo", "waef", "wave", "scda", "scwv", "mmsf". Default is "oper". - `time`: The time in hours to select. It can be a string a ``Time`` object, or a Int. What ever it is, - it will floored to 0, 6, 12 or 18. The default is the current hour. + it will floored to 0, 6, 12 or 18. The default is the current hour. - `stream`: A string with the stream to select, it must be one of: "oper", "enfo", "waef", "wave", "scda", "scwv", "mmsf". Default is "oper". - `type`: A string with the type of forecast to select, it must be one of: "fc", "ef", "ep", "tf". Default is "fc". @@ -176,9 +178,9 @@ the data is likely not available yet. Adding a good `date` will make it work. ecmwf(:forecast, vars=["10u", "2t"]) ``` """ -function ecmwf(source::Symbol=:reanalysis; filename="", cb::Bool=false, dataset="", params::Union{AbstractString, Vector{String}}="", key::String="", url::String="", wait=1.0, levlist="", region="", format="netcdf", debug::Bool=false, verbose::Bool=true, kw...) +function ecmwf(source::Symbol=:reanalysis; filename="", cb::Bool=false, dataset="", params::Union{AbstractString, Vector{String}}="", key::String="", url::String="", wait=1.0, levlist="", region="", format="netcdf", dryrun=false, verbose::Bool=true, kw...) - (source == :forecast) && return ecmwf_fc(; filename=filename, levlist=levlist, kw...) + (source == :forecast) && return ecmwf_fc(; levlist=levlist, dryrun=dryrun, kw...) function cdsapikey()::Tuple{String, String} # Get the API key and URL from the ~/.cdsapirc file @@ -260,7 +262,7 @@ function ecmwf(source::Symbol=:reanalysis; filename="", cb::Bool=false, dataset= params *= "\"area\": [" * optR[1][4:end] * ", " * optR[4] * ", " * optR[2] * ", " * optR[3] * "]\n" end params = "{\"product_type\": [\"reanalysis\"],\n" * params * "}" - debug && (println(params); return nothing) # <== EXIT here + dryrun && (println(params); return nothing) # <== EXIT here end body, _dataset = parse_request(params) end @@ -269,7 +271,7 @@ function ecmwf(source::Symbol=:reanalysis; filename="", cb::Bool=false, dataset= s = curl_post(URL * "/retrieve/v1/processes/$dataset/execute", body, KEY) ind::Union{Nothing, Int} = findfirst(startswith.(s,"\"status")) # Annotate it otherwise it's a spee Anys - (ind === nothing) && throw(ArgumentError("The request was not accepted, probably a malformed one. Check it the 'debug' option.")) + (ind === nothing) && throw(ArgumentError("The request was not accepted, probably a malformed one. Check it the 'dryrun' option.")) status = s[ind][11:end-1] # It has the form "{\"status\":\"accepted\"" if (contains(s[ind], ":4")) ind = findfirst(startswith.(s,"\"title")) @@ -303,152 +305,8 @@ function ecmwf(source::Symbol=:reanalysis; filename="", cb::Bool=false, dataset= run(`curl --show-error $urlname -o $filename`) end -# ------------------------------------------------------------------------------------------------------------------------ -""" - listecmwfvars(source::Symbol=:reanalysis; single::Bool=true, levlist::Bool=false, contain::AbstractString="") - -Print a list of CDS ERA5 variables. - -### Args -- `source`: The source of the data. It can be either ``:reanalysis`` or ``:forecast``. Default is `:reanalysis`. - -### Kwargs -- `single`: If true, only single-level variables are listed. If false, pressure-level variables are listed [Default is true]. -- `pressure`: If true, only pressure-level variables are listed. If false, single-level variables are listed. -- `contain`: A string to filter the variables by their Name. Only those containing this string (case sensitive) - are listed. If not provided, all variables are listed. - -#### Example -```julia -# Print all pressure-level variables. -listecmwfvars(pressure=true) - -# Print only single-level variables containing "Temperature" in their name from the foorecast datasets. -listecmwfvars(:forecast, contain="Temperature") -``` -""" -function listecmwfvars(source::Symbol=:reanalysis; single::Bool=true, pressure::Bool=false, contain::AbstractString="", test::Bool=false) - what = (source == :reanalysis) ? "era5" : "fc" - d, title_str = helper_ecmwf_vars(single, pressure, what) - if (contain != "") - d = filter(((k,v),) -> contains(v[2], contain), d) - isempty(d) && (@info "No variables found in the dataset for the search string \"$contain\""; return nothing) - end - ds = sort(d, by=first) - header = (what == "fc") ? ["ID","Name","Units"] : ["ID","Long-Name (nc var name)","Name","Units"] - align = (what == "fc") ? [:c,:l,:c] : [:c,:l,:l,:c] - t = (what == "fc") ? " (Forecast)" : " (ERA5)"; - test && return nothing - pretty_table(hcat(collect(keys(ds)),stack(values(ds), dims=1)), header=header, alignment=align, - title=title_str * "-level variables" * t, title_alignment=:l, crop=:horizontal) -end - -# ------------------------------------------------------------------------------------------------------------------------ -""" - era5vars(varID; single::Bool=true, pressure::Bool=false) -> String - -Selec one or more variables from a CDS ERA5 dataset. - -This function returns a JSON formatted string that can be used as an input to the ``ecmwf()`` function `params` option. -See the ``listecmwfvars()`` function for a list of available variables. - -### Args -- `varID`: The variable name. It can be a string or a symbol to select a unique variavle, or a vector of - strings/symbols to select multiple variables. - -### Kwargs -- `single`: If true, only single-level variables are listed. If false, pressure-level variables are listed [Default is true]. -- `pressure`: If true, only pressure-level variables are listed. If false, single-level variables are listed. - [Default is false]. - -### Returns -A string with the JSON formatted variable name. - -### Example -```julia -# "t2m" is the 2m temperature and "skt" is the skin temperature -var = era5vars(["t2m", "skt"]) -``` -""" -era5vars(varID::Union{String, Symbol}; single::Bool=true, pressure::Bool=false) = era5vars([string(varID)], single=single, pressure=pressure) -function era5vars(varID::Union{Vector{String}, Vector{Symbol}}; single::Bool=true, pressure::Bool=false)::String - d = helper_ecmwf_vars(single, pressure, "era5")[1] - _vars = (eltype(varID) == Symbol) ? string.(varID) : varID - for k = 1:numel(_vars) - !haskey(d, _vars[k]) && error("Variable \"$_vars[$k]\" not found in the dataset") - end - # Next line took me HOURS to figure it out. Shame on your printf Julia, shame on you. - @sprintf("\"variable\": [\n\t\"%s\"\n],", join([d[k][1] for k in _vars], "\",\n\t\"")) -end - -# --------------------------------------------------------------------------------------------------------------- -function helper_ecmwf_vars(single::Bool, pressure::Bool, prefix::String)::Tuple{Dict{String, Vector{String}}, String} - pressure && (single = false); !single && (pressure = true) - dim_char = (single) ? "2" : "3"; title_str = ((single) ? "\nSingle" : "\nPressure") - return include(joinpath(dirname(pathof(GMT)), "extras/" * prefix * "vars" * dim_char * "d.jl")), title_str -end - -# --------------------------------------------------------------------------------------------------------------- -""" - era5time(; year="", month="", day="", hour="") -> String - -Select one or more date-times from a CDS ERA5 dataset. - -This function returns a JSON formatted string that can be used as an input to the ``ecmwf()`` function `params` option. - -### Kwargs -- `year`: The year(s) to select. It can be a string to select a unique year, or a vector of strings or Ints to select multiple years. - It can also be a range of years, e.g. "2010:2020". -- `month`: The month(s) to select. It can be a string to select a unique month, or a vector of strings or Ints to select multiple months. - It can also be a range of months, e.g. "01:06". -- `day`: The day(s) to select. It can be a string to select a unique day, or a vector of strings or Ints to select multiple days. - It can also be a range of days, e.g. "01:20". -- `hour`: The hour(s) to select. It can be a string to select a unique hour, or a vector of strings or Ints to select multiple hours. - It can also be a range of hours, e.g. "01:10". - -### Returns -A string with the JSON formatted date-time. - -### Example -```julia -# All times in 2023 -var = era5time(year="2023") -``` -""" -function era5time(; year="", month="", day="", hour="") - _y, _m, _d, _h = agora() - yr = getdtp(year, [_y]); (yr === nothing) && error("Unknown type for 'year'") - mo = getdtp(month, [_m]); (mo === nothing) && error("Unknown type for 'month'") - dy = getdtp(day, [_d]); (dy === nothing) && error("Unknown type for 'day'") - hr = getdtp(hour, [_h]); (hr === nothing) && error("Unknown type for 'hour'") - s = @sprintf("\"year\": [\"%s\"],\n\"month\": [\"%s\"],\n\"day\": [\"%s\"],\n\"time\": [\"%s\"],\n", yr, mo, dy, hr) - s = replace(s, "[\"[" => "["); s = replace(s, "]\"]" => "]"); # Remove double [[ & ]] - return s -end - -function getdtp(x, def)::Union{Vector{String}, Nothing} # used also in ecmwf() to get the pressure levels - (x == "") ? def : (typeof(x) <: OrdinalRange) ? string.(collect(x)) : isa(x, Vector{Int}) ? string.(x) : isa(x, Vector{String}) ? x : isa(x, String) ? [x] : nothing -end - -function agora() # Must put this in a separate function because I want to use the keywords year, month, etc - t = now() - return string(year(t)), string(month(t)), string(day(t)), string(hour(t)) -end - # --------------------------------------------------------------------------------------------------------------- -#ecmwf(:forecast, vars=["10u", "2t"]) -""" -- `format`: The format in which to save the downloaded data. It can be "grib" or "netcdf". Default is "netcdf". -- `root`: The root URL of the CDS ERA5 dataset. Default is "https://data.ecmwf.int/forecasts". -- `levlist`: The pressure levels to select. It can be a string to select a unique pressure level, - or a vector of strings or Ints to select multiple pressure levels. -- `stream`: The stream to select. It can be one of: "oper", "enfo", "waef", "wave", "scda", "scwv", "mmsf". Default is "oper". -- `type`: The type of forecast to select. -- `cube`: If true, when downloading pressure levels variables, save them data as a netCDF 3D cubes instead - of one file per layer. Unfortunately, currently that cube file handle is not closed in Julia and we must close - the Julia session to get access to the data. Because of this, the default for the `cube` option is false. -""" -function ecmwf_fc(; filename="", levlist="", kw...) +function ecmwf_fc(; levlist="", kw...) d = KW(kw) root = "https://data.ecmwf.int/forecasts" @@ -498,8 +356,8 @@ function ecmwf_fc(; filename="", levlist="", kw...) step = ((val = find_in_dict(d, [:step :steps])[1]) === nothing) ? ["0"] : getdtp(val, ["0"]) (step === nothing) && error("Unknown type for 'step'") - # Check if we want to save multi-layer variables as cubes - cubeit = (find_in_dict(d, [:cube])[1] !== nothing) ? true : false + # Check if we want to save multi-layer variables as cubes. Default is TRUE + cubeit = ((val = find_in_dict(d, [:cube])[1]) !== nothing && val == 0) ? false : true if (cubeit && length(step) > 1 && length(levels) > 1) @warn "Sorry, selecting multi-time-steps and multi-pressure-layers cubes are not supported yet." return nothing @@ -520,95 +378,39 @@ function ecmwf_fc(; filename="", levlist="", kw...) end format = (type == "tf") ? "bufr" : "grib2" # No choices here - # ---------- Date ------------------------------------------------------- - date::String, tim::String, _h::Int = "", "", 0 - if ((val = find_in_dict(d, [:date])[1]) !== nothing) - if (isa(val, String)) - data::String = val # To f the Any - date = (length(data) == 8) ? data : (length(data) == 10 && contains(data, "-")) ? data[1:4] * data[6:7] * data[9:10] : error("When string, 'date' must be in the form YYYYMMDD") - elseif (isa(val, Date) || isa(val, DateTime)) - date = Dates.format(val, dateformat"yyyymmdd") - isa(val, DateTime) && (_h = hour(val)) - elseif (isa(val, Int)) - n::Int = Int(abs(val))::Int - (n > 3) && error(" Forecasts older than 3 days are no longer available") - date = Dates.format(now()-Day(n), dateformat"yyyymmdd"); - end - autodate = false - else # If no date is given, use the current date - date = Dates.format(now(), dateformat"yyyymmdd"); _h = hour(now()) - !check_url(root * "/$date") && (date = Dates.format(now()-Day(1), dateformat"yyyymmdd"); _h = 18) # Too soon? Get yesterday's date - # And the 'ifs' dir exists too? - (_h == 18 && model == "ifs" && !check_url(root * "/$date/$tim" * "z/ifs")) && (tim = @sprintf("%.02d", (_h -= 6))) - autodate = true # To know if we need to check the time too. - end - - # ---------- Time ------------------------------------------------------- - if ((val = find_in_dict(d, [:time])[1]) !== nothing) # Even if gotten above, a explicit mention to time takes precedence - if (isa(val, String)) _h = parse(Int, val)::Int - elseif (isa(val, Time)) _h = hour(val)::Int - elseif (isa(val, Int)) _h = val - end - end - _h -= rem(_h, 6) # To get only 0, 6, 12 or 18 hours - tim = @sprintf("%.02d", _h) - - # --------- Now test if this date/time is available in the server -------- - if (!check_url(root * "/$date/$(tim)z")) # Check if the URL exists - if (autodate) - found = false - while (_h > 0 && !found) # Check if the date is available in the server. If not, go back 6 hours and check again. - tim = @sprintf("%.02d", (_h -= 6)) - found = check_url(root * "/$date/$(tim)z") - end - # OK, but does the 'model' have this date/time? If not, go back another 6 hours and check again. - @label try_again - if (!check_url(root * "/$date/$(tim)z/$model")) - tim = @sprintf("%.02d", (_h -= 6)) - (!check_url(root * "/$date/$(tim)z/$(model)/0p25/$stream")) && (tim = @sprintf("%.02d", (_h -= 6))) - (_h < 0) && (@info "Sorry, giving up trying to find the latest dataset for the requested parameters. Must pass a date/time manually."; return nothing) - else # And does the 'stream' exist? - if !check_url(root * "/$date/$(tim)z/$(model)/0p25/$stream") # Ghrrr!, no. Must go back to another time - tim = @sprintf("%.02d", (_h -= 6)) - (!check_url(root * "/$date/$(tim)z/$(model)/0p25/$stream")) && (tim = @sprintf("%.02d", (_h -= 6))) - else # OK, but does the .index file exist? - if !check_url(root * "/$date/$(tim)z/$(model)/0p25/$(stream)/$(date)$(tim)0000-$(step[1])h-$(stream)-$(type).index") # F... - tim = @sprintf("%.02d", (_h -= 6)) - @goto try_again - end - end - end - else - @info "The requested combination date $date and time $tim is not available in the server. Either too soon or too old. \nManually check the $root to see what is available." - return nothing - end - end - - # All in all, does the base directory exist? (Above checks are not complete) - if (!check_url(root * "/$date/$(tim)z/$(model)/0p25/$stream")) - @info "The requested $root/$date/$(tim)z/$(model)/0p25/$stream directory does not (yet?) exists. You must manually check what's available." - return nothing - end + # ---------- Date-Time --------------------------------------------------- + date, tim, msg = parse_date_time_ecmwf(d, step, root, model, stream, type) + (msg != "") && (@info msg; return nothing) # Requested a non existing thing # [ROOT]/[yyyymmdd]/[HH]z/[model]/[resol]/[stream]/[yyyymmdd][HH]0000-[step][U]-[stream]-[type].[format] + dryrun = ((val = find_in_dict(d, [:dryrun])[1]) !== nothing && val == 1) ? true : false if (vars == "") - println(root * "/$date/$(tim)z$(model)/0p25/$stream/$(date)$(tim)0000-$(step[1])h-$(stream)-$(type).$format") + destdir::String = ((val = find_in_dict(d, [:destdir])[1]) !== nothing) ? string(val)::String * "/" : "" + for ns = 1:numel(step) + fname = "$(date)/$(tim)z/$(model)/0p25/$(stream)/$(date)$(tim)0000-$(step[ns])h-$(stream)-$(type).$format" + dryrun ? println("$(root)/$(fname)") : run(`curl --show-error $(root)/$(fname) -o $(destdir)$fname`) + end return nothing end - grdclip_cmd = parse_R(d, "")[1] * " -Sr1/1 -G" + opt_R = parse_R(d, "")[1] + grdclip_cmd = opt_R * " -Sr1/1 -G" # Not always used. To be replaced when grdconvert GMT bug is fixed + (opt_R != "") && (opt_R = opt_R[4:end]) EXT = (((val = find_in_dict(d, [:format])[1]) !== nothing) && val == "grib") ? ".grib" : ".grd" fmt_info = (EXT == ".grib") ? "GRIB" : "netCDF" # For printing info local mat, G::GMTgrid{Float32,2}, x, y, range, inc tmp = tempname() for ns = 1:numel(step) # Loop over the steps - Downloads.download(root * "/$date/$(tim)z/$(model)/0p25/$stream/$(date)$(tim)0000-$(step[ns])h-$(stream)-$(type).index", tmp) + thisFile = root * "/$(date)/$(tim)z/$(model)/0p25/$(stream)/$(date)$(tim)0000-$(step[ns])h-$(stream)-$(type)." + dryrun && (println(thisFile * "grib2"); continue) # Dry run + + Downloads.download(thisFile * "index", tmp) # Find the 'offset' and 'length' of each variable off_len = fish_var_off_len_in_index_file(tmp, vars, levels, check_var) - url = root * "/$date/$(tim)z/$(model)/0p25/$stream/$(date)$(tim)0000-$(step[ns])h-$(stream)-$(type).$format" + url = thisFile * format for k = 1:numel(vars) # Loop over the vars @info "Downloading $(vars[k]) from $url and saving to $fmt_info format" @@ -617,13 +419,14 @@ function ecmwf_fc(; filename="", levlist="", kw...) n_levels = numel(levels) for nl = 1:n_levels start, len, stop = off_len[k,1,nl], off_len[k,2,nl], off_len[k,1,nl] + off_len[k,2,nl] - 1 # This var byte range + uname = "/vsisubfile/$(start)_$(len),/vsicurl/" * url # URL name if (cubeit && n_levels > 1) # Create a cube with all levels if (nl == 1) # First time, read the first level and create a 3d array with the right dims - G = gmt("grdclip /vsisubfile/$(start)_$(len)" * ",/vsicurl/" * url * grdclip_cmd) + G = gmtread(uname, R=opt_R, grd=true, layout="TRB") x, y, range, inc = G.x, G.y, G.range, G.inc mat = Array{Float32, 3}(undef, size(G,1), size(G,2), length(levels)) else - G = gmt("grdclip /vsisubfile/$(start)_$(len)" * ",/vsicurl/" * url * grdclip_cmd) + G = gmtread(uname, R=opt_R, grd=true, layout="TRB") end mat[:,:,nl] .= G.z else @@ -633,27 +436,25 @@ function ecmwf_fc(; filename="", levlist="", kw...) end end - if (cubeit && n_levels > 1) # Save the cube on a file + if (cubeit && n_levels > 1) # Save the cube (implied by 'cubeit') on a 3D file fname = vars[k] * "_step$(step[ns])_$(model)_$(stream)_$(type)_$(date)_$(tim).nc" # This cube fname make_cube_ecmwf(G, mat, x, y, range, inc, levels, "millibars", levels, "Pressure", vars[1], fname) end else # A surface variable start, len, stop = off_len[k,1,1], off_len[k,2,1], off_len[k,1,1] + off_len[k,2,1] - 1 # This var byte range - fname = vars[k] * "_step$(step[ns])_$(model)_$(stream)_$(type)_$(date)_$(tim)$(EXT)" # This var fname + fname = vars[k] * "_step$(step[ns])_$(model)_$(stream)_$(type)_$(date)_$(tim)$(EXT)" # This var fname (EXT == ".grd") ? gmt("grdclip /vsisubfile/$(start)_$(len),/vsicurl/" * url * grdclip_cmd * fname) : run(`curl --show-error --range $(start)-$(stop) $url -o $fname`) end - #run(`curl --show-error --range $(start)-$(stop) $url -o $fname`) - #gdaltranslate("/vsisubfile/$(start)_$(len)" * ",/vsicurl/" * url, save=vars[k] * "_step$(step[ns]).nc") if (cubeit && multi_steps) if (ns == 1) - G = gmtread(fname) + G = gmtread(fname, layout="TRB") x, y, range, inc = G.x, G.y, G.range, G.inc mat = Array{Float32, 3}(undef, size(G,1), size(G,2), length(step)) else - G = gmtread(fname) + G = gmtread(fname, layout="TRB") end mat[:,:,ns] .= G.z rm(fname) @@ -665,9 +466,86 @@ function ecmwf_fc(; filename="", levlist="", kw...) fname = vars[1] * "_step$(step[1])-$(step[end])_$(model)_$(stream)_$(type)_$(date)_$(tim).nc" # This cube fname make_cube_ecmwf(G, mat, x, y, range, inc, step, "hour", step, "time", vars[1], fname) end - rm(tmp) + !dryrun && rm(tmp) end +# -------------------------------------------------------------------------------------------------------- +function parse_date_time_ecmwf(d, step, root::String, model::String, stream::String, type::String) + # Parse the date and time and mostrly check that the dir/data exists. When no date is given + # we use the current date and try to find the directory of the most recent data. + + # ---------- Date ------------------------------------------------------- + date::String, tim::String, _h::Int = "", "", 0 + if ((val = find_in_dict(d, [:date])[1]) !== nothing) + if (isa(val, String)) + data::String = val # To f the Any + date = (length(data) == 8) ? data : (length(data) == 10 && contains(data, "-")) ? data[1:4] * data[6:7] * data[9:10] : error("When string, 'date' must be in the form YYYYMMDD") + elseif (isa(val, Date) || isa(val, DateTime)) + date = Dates.format(val, dateformat"yyyymmdd") + isa(val, DateTime) && (_h = hour(val)) + elseif (isa(val, Int)) + n::Int = Int(abs(val))::Int + (n > 3) && error(" Forecasts older than 3 days are no longer available") + date = Dates.format(now()-Day(n), dateformat"yyyymmdd"); + end + autodate = false + else # If no date is given, use the current date + date = Dates.format(now(), dateformat"yyyymmdd"); _h = hour(now()) + !check_url(root * "/$date") && (date = Dates.format(now()-Day(1), dateformat"yyyymmdd"); _h = 18) # Too soon? Get yesterday's date + # And the 'ifs' dir exists too? + (_h == 18 && model == "ifs" && !check_url(root * "/$(date)/$(tim)z/ifs")) && (tim = @sprintf("%.02d", (_h -= 6))) + autodate = true # To know if we need to check the time too. + end + + # ---------- Time ------------------------------------------------------- + if ((val = find_in_dict(d, [:time])[1]) !== nothing) # Even if gotten above, a explicit mention to time takes precedence + if (isa(val, String)) _h = parse(Int, val)::Int + elseif (isa(val, Time)) _h = hour(val)::Int + elseif (isa(val, Int)) _h = val + end + end + _h -= rem(_h, 6) # To get only 0, 6, 12 or 18 hours + tim = @sprintf("%.02d", _h) + + # --------- Now test if this date/time is available in the server -------- + if (!check_url(root*"/$(date)/$(tim)z/$(model)/0p25/$(stream)")) # Check if the URL exists + if (autodate) + found = false + while (_h > 0 && !found) # Check if the date is available in the server. If not, go back 6 hours and check again. + tim = @sprintf("%.02d", (_h -= 6)) + found = check_url(root * "/$(date)/$(tim)z") + end + # OK, but does the 'model' have this date/time? If not, go back another 6 hours and check again. + @label try_again + if (!check_url(root * "/$(date)/$(tim)z/$model")) + tim = @sprintf("%.02d", (_h -= 6)) + (!check_url(root * "/$(date)/$(tim)z/$(model)/0p25/$stream")) && (tim = @sprintf("%.02d", (_h -= 6))) + (_h < 0) && return "","", "Sorry, giving up trying to find the latest dataset for the requested parameters. Must pass a date/time manually." + else # And does the 'stream' exist? + if !check_url(root * "/$(date)/$(tim)z/$(model)/0p25/$stream") # Ghrrr!, no. Must go back to another time + tim = @sprintf("%.02d", (_h -= 6)) + (!check_url(root * "/$(date)/$(tim)z/$(model)/0p25/$stream")) && (tim = @sprintf("%.02d", (_h -= 6))) + else # OK, but does the .index file exist? + if !check_url(root * "/$(date)/$(tim)z/$(model)/0p25/$(stream)/$(date)$(tim)0000-$(step[1])h-$(stream)-$(type).index") # F... + tim = @sprintf("%.02d", (_h -= 6)) + @goto try_again + end + end + end + else + return "", "", "The requested combination date $date and time $tim is not available in the server. Either too soon or too old. \nManually check the $root to see what is available." + end + end + + # All in all, does the base directory exist? (Above checks are not complete) + if (!check_url(root * "/$(date)/$(tim)z/$(model)/0p25/$(stream)")) + return "", "", "The requested $(root)/$(date)/$(tim)z/$(model)/0p25/$(stream) directory does not (yet?) exists. You must manually check what's available." + end + + return date, tim, "" +end + +# -------------------------------------------------------------------------------------------------------- function make_cube_ecmwf(G, mat, x, y, range, inc, the_levels, z_unit, names, dim_name, band_name, cube_name) # Make a cube from a 3D array of size (size(G,1), size(G,2), length(names)) cube = mat2grid(mat, G) @@ -677,6 +555,7 @@ function make_cube_ecmwf(G, mat, x, y, range, inc, the_levels, z_unit, names, di gdalwrite(cube, cube_name, cube.v, dim_name=dim_name, band_name=band_name) end +# -------------------------------------------------------------------------------------------------------- function check_url(url::AbstractString)::Bool # Check if the URL exists. Returns true if it does, false otherwise. ret = true @@ -689,6 +568,7 @@ function check_url(url::AbstractString)::Bool return ret end +# -------------------------------------------------------------------------------------------------------- function fish_var_off_len_in_index_file(fname, vars, levels, check_var) # Find the 'offset' and 'length' of each variable in 'vars' in the index file 'fname' # Return a 3D array of size (length(vars), 2, length(levels)) with the offsets and lengths in col1 and col2 @@ -722,3 +602,135 @@ function fish_var_off_len_in_index_file(fname, vars, levels, check_var) close(fid) return off_len end + +# ------------------------------------------------------------------------------------------------------------------------ +""" + listecmwfvars(source::Symbol=:reanalysis; single::Bool=true, levlist::Bool=false, contain::AbstractString="") + +Print a list of CDS ERA5 variables. + +### Args +- `source`: The source of the data. It can be either ``:reanalysis`` or ``:forecast``. Default is `:reanalysis`. + +### Kwargs +- `single`: If true, only single-level variables are listed. If false, pressure-level variables are listed [Default is true]. +- `pressure`: If true, only pressure-level variables are listed. If false, single-level variables are listed. +- `contain`: A string to filter the variables by their Name. Only those containing this string (case sensitive) + are listed. If not provided, all variables are listed. + +#### Example +```julia +# Print all pressure-level variables. +listecmwfvars(pressure=true) + +# Print only single-level variables containing "Temperature" in their name from the foorecast datasets. +listecmwfvars(:forecast, contain="Temperature") +``` +""" +function listecmwfvars(source::Symbol=:reanalysis; single::Bool=true, pressure::Bool=false, contain::AbstractString="", test::Bool=false) + what = (source == :reanalysis) ? "era5" : "fc" + d, title_str = helper_ecmwf_vars(single, pressure, what) + if (contain != "") + d = filter(((k,v),) -> contains(v[2], contain), d) + isempty(d) && (@info "No variables found in the dataset for the search string \"$contain\""; return nothing) + end + ds = sort(d, by=first) + header = (what == "fc") ? ["ID","Name","Units"] : ["ID","Long-Name (nc var name)","Name","Units"] + align = (what == "fc") ? [:c,:l,:c] : [:c,:l,:l,:c] + t = (what == "fc") ? " (Forecast)" : " (ERA5)"; + test && return nothing + pretty_table(hcat(collect(keys(ds)),stack(values(ds), dims=1)), header=header, alignment=align, + title=title_str * "-level variables" * t, title_alignment=:l, crop=:horizontal) +end + +# ------------------------------------------------------------------------------------------------------------------------ +""" + era5vars(varID; single::Bool=true, pressure::Bool=false) -> String + +Selec one or more variables from a CDS ERA5 dataset. + +This function returns a JSON formatted string that can be used as an input to the ``ecmwf()`` function `params` option. +See the ``listecmwfvars()`` function for a list of available variables. + +### Args +- `varID`: The variable name. It can be a string or a symbol to select a unique variavle, or a vector of + strings/symbols to select multiple variables. + +### Kwargs +- `single`: If true, only single-level variables are listed. If false, pressure-level variables are listed [Default is true]. +- `pressure`: If true, only pressure-level variables are listed. If false, single-level variables are listed. + [Default is false]. + +### Returns +A string with the JSON formatted variable name. + +### Example +```julia +# "t2m" is the 2m temperature and "skt" is the skin temperature +var = era5vars(["t2m", "skt"]) +``` +""" +era5vars(varID::Union{String, Symbol}; single::Bool=true, pressure::Bool=false) = era5vars([string(varID)], single=single, pressure=pressure) +function era5vars(varID::Union{Vector{String}, Vector{Symbol}}; single::Bool=true, pressure::Bool=false)::String + d = helper_ecmwf_vars(single, pressure, "era5")[1] + _vars = (eltype(varID) == Symbol) ? string.(varID) : varID + for k = 1:numel(_vars) + !haskey(d, _vars[k]) && error("Variable \"$_vars[$k]\" not found in the dataset") + end + # Next line took me HOURS to figure it out. Shame on your printf Julia, shame on you. + @sprintf("\"variable\": [\n\t\"%s\"\n],", join([d[k][1] for k in _vars], "\",\n\t\"")) +end + +# --------------------------------------------------------------------------------------------------------------- +function helper_ecmwf_vars(single::Bool, pressure::Bool, prefix::String)::Tuple{Dict{String, Vector{String}}, String} + pressure && (single = false); !single && (pressure = true) + dim_char = (single) ? "2" : "3"; title_str = ((single) ? "\nSingle" : "\nPressure") + return include(joinpath(dirname(pathof(GMT)), "extras/" * prefix * "vars" * dim_char * "d.jl")), title_str +end + +# --------------------------------------------------------------------------------------------------------------- +""" + era5time(; year="", month="", day="", hour="") -> String + +Select one or more date-times from a CDS ERA5 dataset. + +This function returns a JSON formatted string that can be used as an input to the ``ecmwf()`` function `params` option. + +### Kwargs +- `year`: The year(s) to select. It can be a string to select a unique year, or a vector of strings or Ints to select multiple years. + It can also be a range of years, e.g. "2010:2020". +- `month`: The month(s) to select. It can be a string to select a unique month, or a vector of strings or Ints to select multiple months. + It can also be a range of months, e.g. "01:06". +- `day`: The day(s) to select. It can be a string to select a unique day, or a vector of strings or Ints to select multiple days. + It can also be a range of days, e.g. "01:20". +- `hour`: The hour(s) to select. It can be a string to select a unique hour, or a vector of strings or Ints to select multiple hours. + It can also be a range of hours, e.g. "01:10". + +### Returns +A string with the JSON formatted date-time. + +### Example +```julia +# All times in 2023 +var = era5time(year="2023") +``` +""" +function era5time(; year="", month="", day="", hour="") + _y, _m, _d, _h = agora() + yr = getdtp(year, [_y]); (yr === nothing) && error("Unknown type for 'year'") + mo = getdtp(month, [_m]); (mo === nothing) && error("Unknown type for 'month'") + dy = getdtp(day, [_d]); (dy === nothing) && error("Unknown type for 'day'") + hr = getdtp(hour, [_h]); (hr === nothing) && error("Unknown type for 'hour'") + s = @sprintf("\"year\": [\"%s\"],\n\"month\": [\"%s\"],\n\"day\": [\"%s\"],\n\"time\": [\"%s\"],\n", yr, mo, dy, hr) + s = replace(s, "[\"[" => "["); s = replace(s, "]\"]" => "]"); # Remove double [[ & ]] + return s +end + +function getdtp(x, def)::Union{Vector{String}, Nothing} # used also in ecmwf() to get the pressure levels + (x == "") ? def : (typeof(x) <: OrdinalRange) ? string.(collect(x)) : isa(x, Vector{Int}) ? string.(x) : isa(x, Vector{String}) ? x : isa(x, String) ? [x] : nothing +end + +function agora() # Must put this in a separate function because I want to use the keywords year, month, etc + t = now()-Day(5) # 5 days ago because the 5 in ERA5 means that the most recent data is 5 days ago. + return string(year(t)), string(month(t)), string(day(t)), string(hour(t)) +end diff --git a/src/utils.jl b/src/utils.jl index ae6ade43f..6b56099dc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1400,6 +1400,20 @@ function bituncat2(N::Int64)::Tuple{Int, Int} return parse(Int, s[1:32]; base=2), parse(Int, s[33:64]; base=2) end +# --------------------------------------------------------------------------------------------------- +""" + bissextile(year::Integer) -> Bool + +Extremly fast function to check if a year is bissextile (leap year). +See https://hueffner.de/falk/blog/a-leap-year-check-in-three-instructions.html + +It falls back to classic method for years < 0. +""" +function bissextile(year::Integer)::Bool + year >= 0 && return ((year * 1073750999) & 3221352463) <= 126976 + (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)) +end + # --------------------------------------------------------------------------------------------------- include("makeDCWs.jl") diff --git a/test/test_misc.jl b/test/test_misc.jl index 850c0dc67..b36e0ce50 100644 --- a/test/test_misc.jl +++ b/test/test_misc.jl @@ -308,9 +308,9 @@ var = era5vars(["t2m", "skt"]); # "t2m" is the 2m temperature and "skt" is the skin temperature dt = era5time(hour=10:14); @test_throws ArgumentError ecmwf(dataset="reanalysis-era5-land", params=[var, dt], pressure=[1000, 900], region=(-10, 0, 30, 45), key="blabla") - ecmwf(:forecast, time=0, type="fc", stream="oper", levels=["1000", "925"]); - ecmwf(:forecast, var="t", R="PTC", levlist=["1000", "925", "850"], cube=1) - ecmwf(:forecast, var="t", R="PTC", steps=0:3:6, cube=1) + ecmwf(:forecast, time=0, type="fc", stream="oper", levels=["1000", "925"], dryrun=true); + ecmwf(:forecast, var="t", R="PTC", levlist=["1000", "925", "850"]) + ecmwf(:forecast, var="t", R="PTC", steps=0:3:6) # MB-System println(" MB-System") diff --git a/test/test_utils.jl b/test/test_utils.jl index 2be48ef4b..9e2e82d5c 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -39,4 +39,7 @@ n = GMT.bitcat2(10, 20); n1, n2 = GMT.bituncat2(n); @test n1 == 10 && n2 == 20 + + @test !bissextile(100) + @test bissextile(-4) end diff --git a/test/test_views.jl b/test/test_views.jl index db3d557ea..19561926b 100644 --- a/test/test_views.jl +++ b/test/test_views.jl @@ -69,11 +69,11 @@ imshow(I, coast=(land=:red,), show=false) I = mat2img(rand(UInt16,32,32),x=[220800 453600], y=[3.5535e6 3.7902e6]); imshow(I, show=false) x = range(-10, 10, length = 30); -f(x,y) = sqrt(x^2 + y^2); -imshow(x,x,f, Vd=dbg2); -imshow(x,f, Vd=dbg2); -imshow(f, x, Vd=dbg2); -imshow(f, x, x, Vd=dbg2); +f2(x,y) = sqrt(x^2 + y^2); +imshow(x,x,f2, Vd=dbg2); +imshow(x,f2, Vd=dbg2); +imshow(f2, x, Vd=dbg2); +imshow(f2, x, x, Vd=dbg2); imshow(-2:0.1:2, -1:0.1:3,"rosenbrock", Vd=dbg2); imshow(-2:0.1:2, "rosenbrock", Vd=dbg2); imshow("lixo", Vd=dbg2);