-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathusingr.html
More file actions
802 lines (713 loc) · 36.2 KB
/
usingr.html
File metadata and controls
802 lines (713 loc) · 36.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<title>usingr.utf8.md</title>
<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
<link href="site_libs/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<style type="text/css">
h1 {
font-size: 34px;
}
h1.title {
font-size: 38px;
}
h2 {
font-size: 30px;
}
h3 {
font-size: 24px;
}
h4 {
font-size: 18px;
}
h5 {
font-size: 16px;
}
h6 {
font-size: 12px;
}
.table th:not([align]) {
text-align: left;
}
</style>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
</style>
<style type="text/css">
/* padding for bootstrap navbar */
body {
padding-top: 51px;
padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar) */
.section h1 {
padding-top: 56px;
margin-top: -56px;
}
.section h2 {
padding-top: 56px;
margin-top: -56px;
}
.section h3 {
padding-top: 56px;
margin-top: -56px;
}
.section h4 {
padding-top: 56px;
margin-top: -56px;
}
.section h5 {
padding-top: 56px;
margin-top: -56px;
}
.section h6 {
padding-top: 56px;
margin-top: -56px;
}
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #ffffff;
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script>
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark it active
menuAnchor.parent().addClass('active');
// if it's got a parent navbar menu mark it active as well
menuAnchor.closest('li.dropdown').addClass('active');
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html">CABW 2018 R training</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="index.html">Home</a>
</li>
<li>
<a href="usingr.html">What, why, and how of R</a>
</li>
<li>
<a href="wrangleplot.html">Wrangling and plotting</a>
</li>
<li>
<a href="mapping.html">Mapping</a>
</li>
<li>
<a href="resources.html">
<span class="fa fa-list"></span>
Resources
</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div class="fluid-row" id="header">
</div>
<div id="using-r-for-bioassessment-data-what-why-and-how" class="section level1">
<h1>Using R for bioassessment data: What, why, and how</h1>
<div id="lesson-outline" class="section level2">
<h2>Lesson Outline</h2>
<ul>
<li><a href="#goals-and-motivation">Goals and Motivation</a></li>
<li><a href="#rstudio">RStudio</a></li>
<li><a href="#r-language-fundamentals">R language fundamentals</a></li>
<li><a href="#data-structures-in-r">Data structures in R</a></li>
<li><a href="#getting-your-data-into-r">Getting your data into R</a></li>
</ul>
</div>
<div id="lesson-exercises" class="section level2">
<h2>Lesson Exercises</h2>
<ul>
<li><a href="#exercise-1">Exercise 1</a></li>
<li><a href="#exercise-2">Exercise 2</a></li>
<li><a href="#exercise-3">Exercise 3</a></li>
</ul>
</div>
<div id="goals-and-motivation" class="section level2">
<h2>Goals and Motivation</h2>
<p><a href="https://www.r-project.org/">R</a> is a language for statistical computing as well as a general purpose programming language. Increasingly, it has become one of the primary languages used in data science and for data analysis across many of the natural sciences.</p>
<p>The goals of this training are to expose you to fundamentals and to develop an appreciation of what’s possible with this software. We also provide resources that you can use for follow-up learning on your own. You should be able to answer these questions at the end of this session:</p>
<ul>
<li>What is R and why should I use it?</li>
<li>Why would I use RStudio and RStudio projects?</li>
<li>How can I write, save, and run scripts in RStudio?</li>
<li>Where can I go for help?</li>
<li>What are the basic data structures in R?</li>
<li>How do I import data?</li>
</ul>
<div id="why-should-i-invest-time-in-r" class="section level3">
<h3>Why should I invest time in R?</h3>
<p>There are many programming languages available and each has it’s specific benefits. R was originally created as a statistical programming language but now it is largely viewed as a ‘data science’ language. Why would you invest time in learning R compared to other languages?</p>
<ul>
<li>The growth of R as explained in the <a href="https://stackoverflow.blog/2017/10/10/impressive-growth-r/">Stack Overflow blog</a>, <a href="http://blog.revolutionanalytics.com/2016/07/r-moves-up-to-5th-place-in-ieee-language-rankings.html">IEEE rating</a></li>
</ul>
<p>R is also an open-source programming language - not only is it free, but this means anybody can contribute to it’s development. As of 2020-01-05, there are 15316 supplemental packages for R on CRAN!</p>
</div>
</div>
<div id="rstudio" class="section level2">
<h2>RStudio</h2>
<p>In the old days, the only way to use R was directly from the Console - this is a bare bones way of running R only with direct input of commands. Now, <a href="https://www.rstudio.com/">RStudio</a> is the go-to Interactive Development Environment (IDE) for R. Think of it like a car that is built around an engine. It is integrated with the console (engine) and includes many other features to improve the user’s experience, such as version control, debugging, dynamic documents, package manager and creation, and code highlighting and completion.</p>
<p>Let’s get familiar with RStudio before we go on.</p>
<div id="open-r-and-rstudio" class="section level3">
<h3>Open R and RStudio</h3>
<p>If you haven’t done so, download and install RStudio from the link above. After it’s installed, find the RStudio shortcut and fire it up (just watch for now). You should see something like this:</p>
<p><img src="figure/rstudio.png" /></p>
<p>There are four panes in RStudio:</p>
<ul>
<li>Source</li>
<li>console</li>
<li>Environment, History, etc.</li>
<li>Files, plots, etc.</li>
</ul>
</div>
<div id="rstudio-projects" class="section level3">
<h3>RStudio projects</h3>
<p>I strongly encourage you to use RStudio projects when you are working with R. The RStudio project provides a central location for working on a particular task. It helps with file management and is portable because all the files live in the same project. RStudio projects also remember history - what commands you used and what data objects are in your enviornment.</p>
<p>To create a new project, click on the File menu at the top and select ‘New project…’</p>
<p><img src="figure/rstudio_proj.jpg" /></p>
<p>Now we can use this project for our data and any scripts we create.</p>
</div>
<div id="scripting" class="section level3">
<h3>Scripting</h3>
<p>In most cases, you will not enter and execute code directly in the console. Code can be written in a script and then sent directly to the console when you’re ready to run it. The key difference here is that a script can be saved and shared.</p>
<p>Open a new script from the File menu…</p>
<p><img src="figure/rstudio_script.jpg" /></p>
</div>
<div id="executing-code-in-rstudio" class="section level3">
<h3>Executing code in RStudio</h3>
<p>After you write your script it can be sent to the Console to run the code in R. Any variables you create in your script will not be available in your working environment until this is done. There are two ways to send code to the console. First, you can hit the <code>Run</code> button at the top right of the scripting window. Second, and preferred, you can use <code>ctrl+enter</code> (<code>cmd+enter</code> on a Mac). Both approaches will send the selected line to the console, then move to the next line in your script. You can also highlight and send an entire block of code.</p>
<p><img src="figure/rstudio_run.jpg" /></p>
</div>
<div id="what-is-the-environment" class="section level3">
<h3>What is the environment?</h3>
<p>There are two outcomes when you run code. First, the code will simply print output directly in the console. Second, there is no output because you have stored it as a variable (we’ll talk about variable assignment later). Output that is stored is actually saved in the <code>environment</code>. The environment is the collection of named objects that are stored in memory for your current R session. Anything stored in memory will be accessible by it’s name without running the original script that was used to create it.</p>
</div>
</div>
<div id="exercise-1" class="section level2">
<h2>Exercise 1</h2>
<p>This exercise will make sure R and RStudio are working and that you can get around the basics in RStudio. Use the blue stickies when you have completed, and red stickies if you are running into problems.</p>
<ol style="list-style-type: decimal">
<li><p>Start RStudio: To start both R and RStudio requires only firing up RStudio. RStudio should be available from All Programs at the Start Menu. Fire up RStudio.</p></li>
<li><p>Create a new project. Name it “cabw_r_workshop”. We will use this for the rest of the workshop.</p></li>
<li><p>Create a new “R Script” in the Source Pane, save that file into your newly created project and name it “cabw_script.R”. It’ll just be a blank text file at this point.</p></li>
<li><p>Add in a comment line to separate this section. It should look something like: <code># Exercise 1: Just Getting used to RStudio and Scripts</code>.</p></li>
<li><p>Lastly, we need to get this project set up with some example data for our exercises. You should have downloaded this already, but if not, the data are available <a href="https://SCCWRP.github.io/CABW2018_R_training/data/datazip.zip">here</a>. The data are in a zipped folder. Download the file to your computer (anywhere). Create a folder in your new project named <code>data</code> and extract the files into this location.</p></li>
</ol>
</div>
<div id="r-language-fundamentals" class="section level2">
<h2>R language fundamentals</h2>
<p>The basic syntax of a function follows the form: <code>function_name(arg1, arg2, ...)</code>.</p>
<p>With the base install, you will gain access to many functions (2682, to be exact). Some examples:</p>
<pre class="r"><code># print
print('hello world!')</code></pre>
<pre><code>## [1] "hello world!"</code></pre>
<pre class="r"><code># sequence
seq(1, 10)</code></pre>
<pre><code>## [1] 1 2 3 4 5 6 7 8 9 10</code></pre>
<pre class="r"><code># random numbers
rnorm(100, mean = 10, sd = 2)</code></pre>
<pre><code>## [1] 15.029863 9.504855 8.495452 10.735446 10.147796 11.155330 8.059115
## [8] 11.183410 7.636009 13.084366 8.176553 11.556673 10.870692 11.791386
## [15] 9.911749 10.933113 9.625723 8.604161 10.833915 11.185085 14.206069
## [22] 9.738960 9.776363 10.969604 8.976868 9.456172 8.813325 13.220348
## [29] 6.255681 11.233555 8.403339 7.312847 10.753612 11.922396 7.476220
## [36] 9.773417 9.397070 9.161689 9.166348 15.131267 9.604910 10.148325
## [43] 8.029399 10.743290 10.391792 11.176691 7.758645 11.497596 10.710358
## [50] 11.530855 6.672026 9.071498 12.772938 10.127421 11.664292 9.122088
## [57] 7.476628 9.210463 8.123345 9.385807 9.547906 10.980478 10.760303
## [64] 11.153934 9.599652 9.204581 6.608733 6.840840 10.146724 9.660492
## [71] 9.772478 7.465127 9.246003 11.814800 9.514882 7.540854 9.893602
## [78] 10.258499 8.565367 12.852896 12.131330 10.838035 10.210317 12.011242
## [85] 9.407239 12.972351 10.379250 8.412654 9.893556 13.424042 8.356429
## [92] 11.928442 10.658048 7.511820 12.253362 10.378984 10.950845 9.504111
## [99] 6.661446 12.076945</code></pre>
<pre class="r"><code># average
mean(rnorm(100))</code></pre>
<pre><code>## [1] -0.006075047</code></pre>
<pre class="r"><code># sum
sum(rnorm(100))</code></pre>
<pre><code>## [1] -7.906235</code></pre>
<p>Very often you will see functions used like this:</p>
<pre class="r"><code>my_random_sum <- sum(rnorm(100))</code></pre>
<p>In this case the first part of the line is the name of an object. You make this up. Ideally it should have some meaning, but the only rules are that it can’t start with a number and must not have any spaces. The second bit, <code><-</code>, is the assignment operator. This tells R to take the result of <code>sum(rnorm(100))</code> and store it in an object named, <code>my_random_sum</code>. It is stored in the environment and can be used by just executing it’s name in the console.</p>
<pre class="r"><code>my_random_sum</code></pre>
<pre><code>## [1] -8.787773</code></pre>
<p>With this, you have the very basics of how we write R code and save objects that can be used later.</p>
<div id="packages" class="section level3">
<h3>Packages</h3>
<p>The base install of R is quite powerful, but you will soon have a need or desire to go beyond this. Packages provide this ability. They are a standardized way of extending R with new methods, techniques, and programming functionality. There is a lot to say about packages regarding finding them, using them, etc., but for now let’s focus just on the basics.</p>
</div>
<div id="cran" class="section level3">
<h3>CRAN</h3>
<p>One of the reasons for R’s popularity is CRAN, <a href="http://cran.r-project.org/">The Comprehensive R Archive Network</a>. This is where you download R and also where most will gain access to packages (there are other places, but that is for later). Not much else to say about this now other than to be aware of it. As of 2020-01-05, there are 15316 on CRAN!</p>
</div>
<div id="installing-packages" class="section level3">
<h3>Installing packages</h3>
<p>When a package gets installed, that means the source code is downloaded and put into your library. A default library location is set for you so no need to worry about that. In fact, on Windows most of this is pretty automatic. Let’s give it a shot.</p>
</div>
</div>
<div id="exercise-2" class="section level2">
<h2>Exercise 2</h2>
<p>We’re going to install some packages from CRAN that will give us the tools for our workshop today. We’ll use the tidyverse, sf, mapview, viridis, and USAboundaries packages. Later, we’ll explain in detail what each of these packages provide.</p>
<ol style="list-style-type: decimal">
<li><p>At the top of the script you just created, type the following functions.</p>
<pre class="r"><code># install packages from CRAN
install.packages("tidyverse")
install.packages("sf")
install.packages("mapview")
install.packages("viridis")
install.packages("USAboundaries")</code></pre></li>
<li><p>Select all the lines by clicking and dragging the mouse pointer over the text.</p></li>
<li><p>Send all the commands to the console using <code>ctrl+enter</code>. You should see some text output on the console about the installation process. The installation may take a few minutes so don’t be alarmed.</p></li>
<li><p>After the packages are done installing, verify that there were no errors during the process (this should be pretty obvious, i.e., error text in big scary red letters).</p></li>
<li><p>Load the packages after they’ve installed.</p>
<pre class="r"><code>library("tidyverse")
library("sf")
library("mapview")
library("viridis")
library("USAboundaries")</code></pre></li>
</ol>
<div id="getting-help" class="section level3">
<h3>Getting Help</h3>
<p>Being able to find help and interpret that help is probably one of the most important skills for learning a new language. R is no different. Help on functions and packages can be accessed directly from R, can be found on CRAN and other official R resources, searched on Google, found on StackOverflow, or from any number of fantastic online resources. I will cover a few of these here.</p>
</div>
<div id="help-from-the-console" class="section level3">
<h3>Help from the console</h3>
<p>Getting help from the console is straightforward and can be done numerous ways.</p>
<pre class="r"><code># Using the help command/shortcut
# When you know the name of a function
help("print") # Help on the print command
?print # Help on the print command using the `?` shortcut
# When you know the name of the package
help(package = "sf") # Help on the package `dplyr`
# Don't know the exact name or just part of it
apropos("print") # Returns all available functions with "print" in the name
??print # shortcut, but also searches demos and vignettes in a formatted page</code></pre>
</div>
<div id="official-r-resources" class="section level3">
<h3>Official R Resources</h3>
<p>In addition to help from within R itself, CRAN and the R-Project have many resources available for support. Two of the most notable are the mailing lists and the <a href="http://cran.r-project.org/web/views/">task views</a>.</p>
<ul>
<li><a href="https://stat.ethz.ch/mailman/listinfo/r-help">R Help Mailing List</a>: The main mailing list for R help. Can be a bit daunting and some (although not most) senior folks can be, um, curmudgeonly…</li>
<li><a href="https://stat.ethz.ch/mailman/listinfo/r-sig-ecology">R-sig-ecology</a>: A special interest group for use of R in ecology. Less daunting than the main help with participation from some big names in ecological modelling and statistics (e.g., Ben Bolker, Gavin Simpson, and Phil Dixon).</li>
<li><a href="http://cran.r-project.org/web/views/Environmetrics.html">Environmetrics Task View</a>: Task views are great in that they provide an annotated list of packages relevant to a particular field. This one is maintained by Gavin Simpson and has great info on packages relevant to much of the work at EPA.</li>
<li><a href="http://cran.r-project.org/web/views/Spatial.html">Spatial Analysis Task View</a>: One I use a lot that lists all the relevant packages for spatial analysis, GIS, and Remote Sensing in R.</li>
</ul>
</div>
<div id="google-and-stackoverflow" class="section level3">
<h3>Google and StackOverflow</h3>
<p>While the resources already mentioned are useful, often the quickest way is to just turn to Google. However, a search for “R” is a bit challenging. A few ways around this. Google works great if you search for a given package or function name. You can also search for mailing lists directly (i.e. “R-sig-geo”), although Google often finds results from these sources.</p>
<p>Blind googling can require a bit of strategy to get the info you want. Some pointers:</p>
<ul>
<li>Always preface the search with “r”</li>
<li>Understand which sources are reliable</li>
<li>Take note of the number of hits and date of a web page</li>
<li>When in doubt, search with the exact error message (see here for <a href="https://cran.r-project.org/doc/manuals/R-lang.html#Exception-handling">details</a> about warnings vs errors)</li>
</ul>
<p>One specific resource that I use quite a bit is <a href="http://stackoverflow.com/questions/tagged/r">StackOverflow with the ‘r’ tag</a>. StackOverflow is a discussion forum for all things related to programming. You can then use this tag and the search functions in StackOverflow and find answers to almost anything you can think of. However, these forums are also very strict and I typically use them to find answers not to ask questions.</p>
</div>
<div id="other-resources" class="section level3">
<h3>Other Resources</h3>
<p>As I mentioned earlier, there are TOO many resources to list here and everyone has their favorites. Below are just a few that I like.</p>
<ul>
<li><a href="http://rforcats.net/">R For Cats</a>: Basic introduction site, meant to be a gentle and light-hearted introduction</li>
<li><a href="http://adv-r.had.co.nz/">Advanced R</a>: Web home of Hadley Wickham’s new book. Gets into more advanced topics, but also covers the basics in a great way.</li>
<li><a href="http://cran.r-project.org/doc/contrib/Short-refcard.pdf">CRAN Cheatsheets</a>: A good cheat sheet from the official source</li>
<li><a href="http://www.rstudio.com/resources/cheatsheets/">RStudio Cheatsheets</a>: Additional cheat sheets from RStudio. I am especially fond of the data wrangling one.</li>
</ul>
</div>
</div>
<div id="data-structures-in-r" class="section level2">
<h2>Data structures in R</h2>
<p>Now that you know how to get started in R and where to find resources, we can begin talking about R data structures. Simply put, a data structure is a way for programming languages to handle information storage.</p>
<p>There is a bewildering amount of formats for storing data and R is no exception. Understanding the basic building blocks that make up data types is essential. All functions in R require specific types of input data and the key to using functions is knowing how these types relate to each other.</p>
<div id="vectors-one-dimensional-data" class="section level3">
<h3>Vectors (one-dimensional data)</h3>
<p>The basic data format in R is a vector - a one-dimensional grouping of elements that have the same type. These are all vectors and they are created with the <code>c</code> function:</p>
<pre class="r"><code>dbl_var <- c(1, 2.5, 4.5)
int_var <- c(1L, 6L, 10L)
log_var <- c(TRUE, FALSE, T, F)
chr_var <- c("a", "b", "c")</code></pre>
<p>The four types of atomic vectors (think atoms that make up a molecule aka vector) are <code>double</code> (or numeric), <code>integer</code>, <code>logical</code>, and <code>character</code>. For most purposes you can ignore the <code>integer</code> class, so there are basically three types. Each type has some useful properties:</p>
<pre class="r"><code>class(dbl_var)</code></pre>
<pre><code>## [1] "numeric"</code></pre>
<pre class="r"><code>length(log_var)</code></pre>
<pre><code>## [1] 4</code></pre>
<p>These properties are useful for not only describing an object, but they define limits on which functions or types of operations that can be used. That is, some functions require a character string input while others require a numeric input. Similarly, vectors of different types or properties may not play well together. Let’s look at some examples:</p>
<pre class="r"><code># taking the mean of a character vector
mean(chr_var)
# adding two numeric vectors of different lengths
vec1 <- c(1, 2, 3, 4)
vec2 <- c(2, 3, 5)
vec1 + vec2</code></pre>
</div>
<div id="dimensional-data" class="section level3">
<h3>2-dimensional data</h3>
<p>A collection of vectors represented as a single data object are often described as two-dimensional data. A more common way of storing two-dimensional data is in a data frame (i.e., <code>data.frame</code>). Think of them like your standard spreadsheet, where each column describes a variable and rows link observations between columns. Here’s a simple example:</p>
<pre class="r"><code>ltrs <- c('a', 'b', 'c')
nums <- c(1, 2, 3)
logs <- c(T, F, T)
mydf <- data.frame(ltrs, nums, logs)
mydf</code></pre>
<pre><code>## ltrs nums logs
## 1 a 1 TRUE
## 2 b 2 FALSE
## 3 c 3 TRUE</code></pre>
<p>The only constraints required to make a data frame are:</p>
<ol style="list-style-type: decimal">
<li><p>Each column contains the same type of data</p></li>
<li><p>The number of observations in each column is equal.</p></li>
</ol>
</div>
</div>
<div id="getting-your-data-into-r" class="section level2">
<h2>Getting your data into R</h2>
<p>It is the rare case when you manually enter your data in R, not to mention impractical for most datasets. Most data analysis workflows typically begin with importing a dataset from an external source. Literally, this means committing a dataset to memory (i.e., storing it as a variable) as one of R’s data structure formats.</p>
<p>Flat data files (text only, rectangular format) present the least complications on import because there is very little to assume about the structure of the data. On import, R tries to guess the data type for each column and this is fairly unambiguous with flat files. The base installation of R comes with some easy to use functions for importing flat files, such as <code>read.table()</code> and <code>read.csv()</code>.</p>
<div id="exercise-3" class="section level3">
<h3>Exercise 3</h3>
<p>Now that we have the data downloaded and extracted to our data folder, we’ll use <code>read.csv</code> to import two files.</p>
<ol style="list-style-type: decimal">
<li><p>Type the following in your script. Note the use of <em>relative</em> file paths within your project.</p>
<pre class="r"><code>cscidat <- read.csv('data/cscidat.csv', stringsAsFactors = F)
ascidat <- read.csv('data/ascidat.csv', stringsAsFactors = F)</code></pre></li>
<li><p>Send the commands to the console with <code>ctrl+enter</code>.</p></li>
<li><p>Verify that the data imported correctly by viewing the first six rows of each dataset. Use the <code>head()</code> function directly in the console, e.g., <code>head(cscidat)</code></p></li>
</ol>
<p>Let’s explore the datasets a bit. There are many useful functions for exploring the characteristics of a dataset. This is always a good idea when you first import something.</p>
<pre class="r"><code># get the dimensions
dim(cscidat)</code></pre>
<pre><code>## [1] 1613 10</code></pre>
<pre class="r"><code>dim(ascidat)</code></pre>
<pre><code>## [1] 2585 3</code></pre>
<pre class="r"><code># get the column names
names(cscidat)</code></pre>
<pre><code>## [1] "SampleID_old" "StationCode" "New_Lat" "New_Long"
## [5] "COMID" "E" "OE" "pMMI"
## [9] "CSCI" "SampleID_old.1"</code></pre>
<pre class="r"><code>names(ascidat)</code></pre>
<pre><code>## [1] "id" "site_type" "ASCI"</code></pre>
<pre class="r"><code># see the first six rows
head(cscidat)</code></pre>
<pre><code>## SampleID_old StationCode New_Lat New_Long COMID E
## 1 000CAT148_8.10.10_1 000CAT148 39.07523 -119.8994 8942501 16.05804
## 2 000CAT228_8.10.10_1 000CAT228 39.07307 -119.9201 8942503 16.08960
## 3 102PS0139_8.9.10_1 102PS0139 41.99595 -122.9597 23936337 15.46439
## 4 103CDCHHR_9.14.10_1 103CDCHHR 41.78890 -124.0778 22226836 21.10443
## 5 103FC1106_7.15.14_1 103FC1106 41.93407 -124.1081 22226634 16.83757
## 6 103FCA168_7.24.13_1 103FCA168 41.64962 -124.0912 22226990 19.07408
## OE pMMI CSCI SampleID_old.1
## 1 0.9309977 1.0449580 0.9879779 000CAT148_8.10.10_1
## 2 0.9726777 0.9896232 0.9811505 000CAT228_8.10.10_1
## 3 1.0896002 1.0535386 1.0715694 102PS0139_8.9.10_1
## 4 1.0898184 1.0834653 1.0866419 103CDCHHR_9.14.10_1
## 5 1.0779468 0.9163731 0.9971599 103FC1106_7.15.14_1
## 6 1.0931064 1.0335179 1.0633122 103FCA168_7.24.13_1</code></pre>
<pre class="r"><code>head(ascidat)</code></pre>
<pre><code>## id site_type ASCI
## 1 000CAT148_8.10.10_1 Reference 1.1950555
## 2 000CAT228_8.10.10_1 Reference 1.1514480
## 3 102PS0139_8.9.10_1 Intermediate 0.9345882
## 4 102PS0177_8.28.12_1 Reference 1.1965128
## 5 102PS0177_8.28.12_2 Reference 1.2091360
## 6 103CDCHHR_9.14.10_1 Reference 0.8369236</code></pre>
<pre class="r"><code># get the overall structure
str(cscidat)</code></pre>
<pre><code>## 'data.frame': 1613 obs. of 10 variables:
## $ SampleID_old : chr "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "103CDCHHR_9.14.10_1" ...
## $ StationCode : chr "000CAT148" "000CAT228" "102PS0139" "103CDCHHR" ...
## $ New_Lat : num 39.1 39.1 42 41.8 41.9 ...
## $ New_Long : num -120 -120 -123 -124 -124 ...
## $ COMID : int 8942501 8942503 23936337 22226836 22226634 22226990 22227592 22226948 22226612 22226750 ...
## $ E : num 16.1 16.1 15.5 21.1 16.8 ...
## $ OE : num 0.931 0.973 1.09 1.09 1.078 ...
## $ pMMI : num 1.045 0.99 1.054 1.083 0.916 ...
## $ CSCI : num 0.988 0.981 1.072 1.087 0.997 ...
## $ SampleID_old.1: chr "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "103CDCHHR_9.14.10_1" ...</code></pre>
<pre class="r"><code>str(ascidat)</code></pre>
<pre><code>## 'data.frame': 2585 obs. of 3 variables:
## $ id : chr "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "102PS0177_8.28.12_1" ...
## $ site_type: chr "Reference" "Reference" "Intermediate" "Reference" ...
## $ ASCI : num 1.195 1.151 0.935 1.197 1.209 ...</code></pre>
<p>You can also view each dataset in a spreadsheet style in the scripting window:</p>
<pre class="r"><code>View(cscidat)
View(ascidat)</code></pre>
</div>
<div id="other-ways-to-import-data" class="section level3">
<h3>Other ways to import data</h3>
<p>More often you will probably have an Excel spreadsheet to import. In the old days, importing spreadsheets into R was almost impossible given the proprietary data structure of Excel. The tools available in R have since matured and it’s now pretty painless to import a spreadsheet. The <code>readxl</code> package is the most recent and by far most flexible data import package for Excel files. It comes with the <code>tidyverse</code> family of packages.</p>
<p>Once installed, we can load it to access the import functions.</p>
<pre class="r"><code>library(readxl)
dat <- read_excel('location/of/excel/file.xlsx')</code></pre>
</div>
</div>
<div id="summary" class="section level2">
<h2>Summary</h2>
<p>In this lesson we learned about R and Rstudio, some of the basic syntax and data structures in R, and how to import files. We’ve just imported some provisional data for the California Stream Condition Index (CSCI) and the Algal Stream Condition Index (ASCI) that we’ll continue to use for the rest of the workshop. These data represent a portion of the sampling sites that were used to develop each index. Next we’ll learn how to process and plot these data to gain insight into bioassessment patterns throughout the state.</p>
</div>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open')
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>