Skip to content

Commit c3c66be

Browse files
authored
Merge pull request #114 from MetOffice/update/aws-input
Update/aws input
2 parents 2ca552e + 95f8155 commit c3c66be

7 files changed

Lines changed: 132 additions & 58 deletions

File tree

notebooks/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def copy_s3_files(in_fileglob, out_folder):
8181
in_fileglob: s3 uri of flies (wild card can be used)
8282
out_folder: local path where data will be stored
8383
'''
84+
if os.path.isdir(out_folder) == 0:
85+
mode = 0o777
86+
os.makedirs(out_folder, mode, exist_ok = False)
8487
matching_keys = find_matching_s3_keys(in_fileglob)
8588
in_bucket_name = _split_s3_uri(in_fileglob)[0]
8689
out_scheme = urlparse(out_folder).scheme
@@ -120,7 +123,6 @@ def load_data(inpath):
120123
for key in keys:
121124
file = key.split('/')[-1]
122125
if os.path.exists(os.path.join(temp_path, file)) == 0:
123-
print(os.path.join(s3dir, file))
124126
copy_s3_files(os.path.join(s3dir, file), temp_path)
125127
else:
126128
print(key, ' already exist')

notebooks/worksheet1.ipynb

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,19 @@
317317
{
318318
"cell_type": "code",
319319
"execution_count": null,
320-
"metadata": {
321-
"scrolled": false
322-
},
320+
"metadata": {},
321+
"outputs": [],
322+
"source": [
323+
"# download data from S3 bucket in data directory\n",
324+
"from utils import copy_s3_files, flush_data\n",
325+
"\n",
326+
"copy_s3_files('s3://ias-pyprecis/data/sample_data.nc', 'data/')"
327+
]
328+
},
329+
{
330+
"cell_type": "code",
331+
"execution_count": null,
332+
"metadata": {},
323333
"outputs": [],
324334
"source": [
325335
"# import the necessary modules\n",
@@ -331,7 +341,7 @@
331341
"%matplotlib inline \n",
332342
"\n",
333343
"# provide the path of your sample data\n",
334-
"sample_data = '/project/ciid/projects/PRECIS/worksheets/data/sample_data.nc'\n",
344+
"sample_data = 'data/sample_data.nc'\n",
335345
"\n",
336346
"# Constraint the reading to a single variable and load it into an Iris cube\n",
337347
"cube = iris.load_cube(sample_data)\n",
@@ -454,18 +464,29 @@
454464
{
455465
"cell_type": "code",
456466
"execution_count": null,
457-
"metadata": {
458-
"scrolled": false
459-
},
467+
"metadata": {},
468+
"outputs": [],
469+
"source": [
470+
"# download data from S3 buket to data directory\n",
471+
"from utils import copy_s3_files\n",
472+
"\n",
473+
"copy_s3_files('s3://ias-pyprecis/data/pp/cahpa/*', 'data/pp/cahpa/')\n",
474+
"copy_s3_files('s3://ias-pyprecis/data/pp/cahpb/*', 'data/pp/cahpb/')"
475+
]
476+
},
477+
{
478+
"cell_type": "code",
479+
"execution_count": null,
480+
"metadata": {},
460481
"outputs": [],
461482
"source": [
462-
"datadir = '/project/ciid/projects/PRECIS/worksheets/data'\n",
483+
"datadir = 'data/'\n",
463484
"\n",
464485
"rim_width = 8 # width of rim (in number of grid boxes)\n",
465486
"\n",
466487
"for runid in ['cahpa', 'cahpb']:\n",
467488
" ppdir = os.path.join(datadir, 'pp', runid)\n",
468-
" \n",
489+
"\n",
469490
" # find all the files from which to remove the rim\n",
470491
" file_list = glob.glob(ppdir + '/*pm[ghij]*.pp')\n",
471492
" \n",
@@ -483,13 +504,28 @@
483504
" # add meta data stating that rim has been removed\n",
484505
" rrcube.attributes['rim_removed'] = '{} point rim removed'.format(rim_width)\n",
485506
" trimmed_cubes.append(rrcube)\n",
507+
" \n",
486508
" rrcubes = iris.cube.CubeList(trimmed_cubes)\n",
487509
" # Write out the trimmed data file\n",
488-
" outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
510+
" #outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
511+
" outfile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
512+
"\n",
489513
" iris.save(rrcubes, outfile)\n",
490514
" print('Saved {}'.format(outfile))"
491515
]
492516
},
517+
{
518+
"cell_type": "code",
519+
"execution_count": null,
520+
"metadata": {},
521+
"outputs": [],
522+
"source": [
523+
"# Delete pp data from the disk\n",
524+
"from utils import flush_data\n",
525+
"flush_data('data/pp/cahpa/*')\n",
526+
"flush_data('data/pp/cahpb/*')"
527+
]
528+
},
493529
{
494530
"cell_type": "markdown",
495531
"metadata": {},
@@ -564,14 +600,14 @@
564600
"\n",
565601
"for runid in ['cahpa', 'cahpb']:\n",
566602
" # Get data directory\n",
567-
" infile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
603+
" infile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
568604
" # This will load all the variables in the file into a CubeList\n",
569605
" datacubes = iris.load(infile)\n",
570606
" for cube in datacubes:\n",
571607
" # get the STASH code\n",
572608
" cubeSTASH = cube.attributes['STASH']\n",
573609
" # Make the output file name\n",
574-
" outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
610+
" outfile = os.path.join(datadir, runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
575611
" # Save the file\n",
576612
" iris.save(cube, outfile)\n",
577613
" print('Saved {}'.format(outfile)) "
@@ -653,10 +689,11 @@
653689
}
654690
],
655691
"metadata": {
692+
"instance_type": "ml.t3.medium",
656693
"kernelspec": {
657-
"display_name": "pyprecis-environment",
694+
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
658695
"language": "python",
659-
"name": "pyprecis-environment"
696+
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
660697
},
661698
"language_info": {
662699
"codemirror_mode": {
@@ -676,5 +713,5 @@
676713
}
677714
},
678715
"nbformat": 4,
679-
"nbformat_minor": 2
716+
"nbformat_minor": 4
680717
}

notebooks/worksheet2.ipynb

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@
7575
"import iris.quickplot as qplt\n",
7676
"import cartopy.crs as ccrs\n",
7777
"from mpl_toolkits.axes_grid1 import AxesGrid\n",
78-
"from cartopy.mpl.geoaxes import GeoAxes"
78+
"from cartopy.mpl.geoaxes import GeoAxes\n",
79+
"from utils import copy_s3_files, flush_data"
7980
]
8081
},
8182
{
@@ -104,21 +105,31 @@
104105
"Before running the code, take a look at it line-by-line to understand what steps are being taken. Then click in the box and press <kbd>ctrl</kbd> + <kbd>enter</kbd> to run the code."
105106
]
106107
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": null,
111+
"metadata": {},
112+
"outputs": [],
113+
"source": [
114+
"# we first need to download APHRODITE data\n",
115+
"copy_s3_files('s3://ias-pyprecis/data/APHRODITE/*.nc', 'data/APHRODITE/')"
116+
]
117+
},
107118
{
108119
"cell_type": "code",
109120
"execution_count": null,
110121
"metadata": {},
111122
"outputs": [],
112123
"source": [
113124
"# Provide the names of the directories where the netCDF model files are stored\n",
114-
"DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
125+
"DATADIR = 'data/'\n",
115126
"\n",
116127
"# Load and print the HadCM3Q0 (cahpa) model cube data\n",
117-
"infile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.nc')\n",
128+
"infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
118129
"cahpaData = iris.load_cube(infile)\n",
119130
"\n",
120131
"# Load and print the ECHAM5 (cahpb) model cube data\n",
121-
"infile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.nc')\n",
132+
"infile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.nc')\n",
122133
"cahpbData = iris.load_cube(infile)\n",
123134
"\n",
124135
"# Load and print the APHRODITE observation cube data\n",
@@ -309,7 +320,7 @@
309320
"cahpaData.remove_coord('forecast_period')\n",
310321
"cahpaData.remove_coord('forecast_reference_time')\n",
311322
"# Save the new cube as a new netCDF file\n",
312-
"outfile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
323+
"outfile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
313324
"iris.save(cahpaData, outfile)"
314325
]
315326
},
@@ -338,7 +349,7 @@
338349
"# Remove extraneous cube metadata. This helps make cube comparisons easier later.\n",
339350
"\n",
340351
"# Save the new cube as a new netCDF file using the `outfile` filename we've provided below!\n",
341-
"outfile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
352+
"outfile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
342353
"\n"
343354
]
344355
},
@@ -373,7 +384,7 @@
373384
"\n",
374385
"# Loop through two model runs\n",
375386
"for jobid in ['cahpa', 'cahpb']:\n",
376-
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
387+
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
377388
"\n",
378389
" # Load the data\n",
379390
" data = iris.load_cube(infile)\n",
@@ -437,14 +448,24 @@
437448
"Follow step d) and complete the code yourself. The file name to load is: `aphro.mon.1961_1990.nc`. We've given you the infile and outfile names to make sure you load and save it in the right place for later!"
438449
]
439450
},
451+
{
452+
"cell_type": "code",
453+
"execution_count": null,
454+
"metadata": {},
455+
"outputs": [],
456+
"source": [
457+
"# we first need to download APHRODITE data\n",
458+
"copy_s3_files('s3://ias-pyprecis/data/climatology/*.nc', 'data/climatology/')"
459+
]
460+
},
440461
{
441462
"cell_type": "code",
442463
"execution_count": null,
443464
"metadata": {},
444465
"outputs": [],
445466
"source": [
446467
"# Directory names where data is read from and stored to\n",
447-
"infile = os.path.join(DATADIR, 'APHRODITE', 'aphro.mon.1961_1990.nc')\n",
468+
"infile = os.path.join(DATADIR, 'climatology', 'aphro.mon.1961_1990.nc')\n",
448469
"\n",
449470
"\n",
450471
"# Load the aphrodite data\n",
@@ -460,7 +481,7 @@
460481
"\n",
461482
"\n",
462483
"# save the seasonal mean cube as a NetCDF file\n",
463-
"outfile = os.path.join(DATADIR, 'climatology', 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
484+
"outfile = os.path.join(DATADIR, 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
464485
"\n",
465486
"\n",
466487
"# print the APHRODITE seasonal mean cube\n",
@@ -550,7 +571,7 @@
550571
"outputs": [],
551572
"source": [
552573
"# Directory name where data is read from\n",
553-
"indir = os.path.join(DATADIR, 'climatology')\n",
574+
"indir = DATADIR\n",
554575
"\n",
555576
"# load cahpa model data\n",
556577
"infile = os.path.join(indir, 'cahpa.OND.mean.1961_1990.pr.mmday-1.nc')\n",
@@ -663,10 +684,11 @@
663684
}
664685
],
665686
"metadata": {
687+
"instance_type": "ml.t3.medium",
666688
"kernelspec": {
667-
"display_name": "pyprecis-environment",
689+
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
668690
"language": "python",
669-
"name": "pyprecis-environment"
691+
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
670692
},
671693
"language_info": {
672694
"codemirror_mode": {
@@ -686,5 +708,5 @@
686708
}
687709
},
688710
"nbformat": 4,
689-
"nbformat_minor": 1
711+
"nbformat_minor": 4
690712
}

notebooks/worksheet3.ipynb

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,11 @@
6363
"import cartopy.crs as ccrs\n",
6464
"from mpl_toolkits.axes_grid1 import AxesGrid\n",
6565
"from cartopy.mpl.geoaxes import GeoAxes\n",
66+
"from utils import copy_s3_files, flush_data\n",
67+
"\n",
6668
"\n",
6769
"# Provide the names of the directories where the netCDF model files are stored\n",
68-
"DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
70+
"DATADIR = 'data/'\n",
6971
"\n",
7072
"# Directory name where data is read from\n",
7173
"HISTDIR = os.path.join(DATADIR, 'historical')\n",
@@ -179,7 +181,7 @@
179181
"outputs": [],
180182
"source": [
181183
"# Load the HadCM3Q0 (cahpa) model cube data as need grid information from it\n",
182-
"infile = os.path.join(HISTDIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
184+
"infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
183185
"cahpa_cube = iris.load_cube(infile)\n",
184186
"\n",
185187
"pole_lat = cahpa_cube.coord_system().grid_north_pole_latitude\n",
@@ -226,8 +228,8 @@
226228
"\n",
227229
"for jobid in ['cahpa', 'cahpb']:\n",
228230
" # Directory name where data are read from and stored to\n",
229-
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
230-
" \n",
231+
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
232+
" print(infile)\n",
231233
" # Load the baseline precipitation data using the KL_constraint - the command below\n",
232234
" # loads the data into a cube constrained by the area chosen\n",
233235
" data = iris.load_cube(infile)\n",
@@ -236,7 +238,7 @@
236238
" grid_latitude=rotated_lats)\n",
237239
"\n",
238240
" # save the constrained cube\n",
239-
" outfile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
241+
" outfile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
240242
" iris.save(data_KL, outfile)\n",
241243
" print('Saved: {}'.format(outfile))"
242244
]
@@ -302,7 +304,7 @@
302304
"source": [
303305
"for jobid in ['cahpa', 'cahpb']:\n",
304306
" # Set up the path to the data\n",
305-
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
307+
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
306308
" \n",
307309
" # Load the data extracted around Kuala Lumpur created in previous step\n",
308310
" data = iris.load_cube(infile)\n",
@@ -745,6 +747,17 @@
745747
"**j) Plot a series of figures** that shows 1) the monthly cycles of temperature and rainfall comparing the 6 models and the observations; and 2) the monthly differences between the models and observations"
746748
]
747749
},
750+
{
751+
"cell_type": "code",
752+
"execution_count": null,
753+
"metadata": {},
754+
"outputs": [],
755+
"source": [
756+
"# we first need to download CRU and netcdf data\n",
757+
"copy_s3_files('s3://ias-pyprecis/data/CRU/*.nc', 'data/CRU/')\n",
758+
"copy_s3_files('s3://ias-pyprecis/data/netcdf/*.nc', 'data/netcdf/')"
759+
]
760+
},
748761
{
749762
"cell_type": "code",
750763
"execution_count": null,
@@ -755,7 +768,7 @@
755768
"Here are some useful varibles you might like to use in your scripts\n",
756769
"'''\n",
757770
"# Some helpful data locations\n",
758-
"DATADIR = '/project/precis/worksheets/data'\n",
771+
"DATADIR = 'data'\n",
759772
"APHRODIR = os.path.join(DATADIR, 'APHRODITE')\n",
760773
"CRUDIR = os.path.join(DATADIR, 'CRU')\n",
761774
"CLIMDIR = os.path.join(DATADIR, 'climatology')\n",
@@ -991,10 +1004,11 @@
9911004
}
9921005
],
9931006
"metadata": {
1007+
"instance_type": "ml.t3.medium",
9941008
"kernelspec": {
995-
"display_name": "pyprecis-environment",
1009+
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
9961010
"language": "python",
997-
"name": "pyprecis-environment"
1011+
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
9981012
},
9991013
"language_info": {
10001014
"codemirror_mode": {
@@ -1014,5 +1028,5 @@
10141028
}
10151029
},
10161030
"nbformat": 4,
1017-
"nbformat_minor": 1
1031+
"nbformat_minor": 4
10181032
}

0 commit comments

Comments
 (0)