Duke-GCB
diff --git a/‎intro-programming-python-instructor.ipynb‎
Lines changed: 63 additions & 68 deletions b/‎intro-programming-python-instructor.ipynb‎
Lines changed: 63 additions & 68 deletions
@@ -30,7 +30,7 @@
     }
    },
    "source": [
-    "## Python & Notebook"
+    "## Python in the Jupyter Notebook Environment"
    ]
   },
   {
@@ -46,7 +46,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The gray boxes are cells. We write code in the cells with instructions for the computer to execute. Let's start by telling the computer to add 5 and 3 and print the result. "
+    "The gray boxes are cells. We write code in the cells with instructions for the computer to execute. Let's start by adding 5 and 3 and printing the result. "
    ]
   },
   {
@@ -87,10 +87,10 @@
    "outputs": [
     {
      "ename": "SyntaxError",
-     "evalue": "invalid syntax (<ipython-input-2-50b4ae29d403>, line 1)",
+     "evalue": "invalid syntax (3449611317.py, line 1)",
      "output_type": "error",
      "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-2-50b4ae29d403>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    print(Hello world)\u001b[0m\n\u001b[0m                    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+      "\u001b[0;36m  Input \u001b[0;32mIn [2]\u001b[0;36m\u001b[0m\n\u001b[0;31m    print(Hello world)\u001b[0m\n\u001b[0m                ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
      ]
     }
    ],
@@ -102,7 +102,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "OK so Python didn't like that, but there's a lot to learn here\n",
+    "OK so Python returned an error, but there's a lot to learn here:\n",
     "\n",
     "- Errors can be really helpful.\n",
     "- It's common to make mistakes, don't be discouraged\n",
@@ -205,7 +205,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "And we can print multiple things by inserting commas. So let's all greet our neighbor:"
+    "And we can print multiple items by inserting commas. So we can combine our greeting and neighbor to greet our neighbor:"
    ]
   },
   {
@@ -340,7 +340,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-11-6fb37bd547ba>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mmy_float\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m50.0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mmy_string\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Hello\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmy_float\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmy_string\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "Input \u001b[0;32mIn [11]\u001b[0m, in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m my_float \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m50.0\u001b[39m\n\u001b[1;32m      2\u001b[0m my_string \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHello\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mmy_float\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmy_string\u001b[49m)\n",
       "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'float' and 'str'"
      ]
     }
@@ -508,7 +508,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "One more optional parameter you can use is called a step in the format **[start:end:step]**. This parameter determines what you \"count by\" and is 1 by default. "
+    "One more optional parameter you can use is called a **step** in the format **[start:end:step]**. This parameter determines what you \"count by\" and is 1 by default. "
    ]
   },
   {
@@ -793,10 +793,10 @@
    "outputs": [
     {
      "ename": "SyntaxError",
-     "evalue": "invalid syntax (<ipython-input-28-3051ded84a5c>, line 1)",
+     "evalue": "invalid syntax (3233142308.py, line 1)",
      "output_type": "error",
      "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-28-3051ded84a5c>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    1a\u001b[0m\n\u001b[0m     ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+      "\u001b[0;36m  Input \u001b[0;32mIn [28]\u001b[0;36m\u001b[0m\n\u001b[0;31m    1a\u001b[0m\n\u001b[0m     ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
      ]
     }
    ],
@@ -866,7 +866,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-31-74803fcdf01e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mr3\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'G'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "Input \u001b[0;32mIn [31]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m r3[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mG\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
       "\u001b[0;31mTypeError\u001b[0m: 'str' object does not support item assignment"
      ]
     }
@@ -1110,7 +1110,7 @@
     }
    ],
    "source": [
-    "my_dictionary = {\"key\":\"value\", \"second_key\":2, 3:\"third_value\"}\n",
+    "my_dictionary = {\"key\": \"value\", \"second_key\": 2, 3: \"third_value\"}\n",
     "print(my_dictionary)"
    ]
   },
@@ -1281,7 +1281,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As an exercise, let's get the total quantity of nucleotides we have. This loop should look a lot like counting the bases in the sequence"
+    "As an exercise, let's get the total quantity of nucleotides we have in the dictionary counts. This loop should look a lot like counting the bases in the sequence."
    ]
   },
   {
@@ -1404,7 +1404,7 @@
     "\n",
     "We've looked at strings and dictionaries as collection types. There's another one that's really useful, and it's called a list. It's an ordered collection of elements. Lists use the `[]` square brackets, and elments are separated by commas.\n",
     "\n",
-    "Here's one that contains our 3 sequences.:"
+    "Here's one that contains our 3 sequences."
    ]
   },
   {
@@ -1516,7 +1516,7 @@
    "source": [
     "### Making choices / Conditionals\n",
     "\n",
-    "When writing programs, it's very handy to examine some data or some results, and make a decision about what to do next. This is called a conditional, and most common version is the `if-else`.\n",
+    "When writing programs, it's usefull to examine some data or some results, and make a decision about what to do next. This is called a conditional, and most common version is the `if-else`.\n",
     "\n",
     "Here's how it works. we start with an `if` keyword, and then we write some expression that will either be **True** or **False**. Then we write a colon, and indent what should happen **if** the expression was **True**. \n",
     "\n",
@@ -2034,7 +2034,7 @@
       " |  index(...)\n",
       " |      S.index(sub[, start[, end]]) -> int\n",
       " |      \n",
-      " |      Return the lowest index in S where substring sub is found, \n",
+      " |      Return the lowest index in S where substring sub is found,\n",
       " |      such that sub is contained within S[start:end].  Optional\n",
       " |      arguments start and end are interpreted as in slice notation.\n",
       " |      \n",
@@ -2073,8 +2073,8 @@
       " |  isidentifier(self, /)\n",
       " |      Return True if the string is a valid Python identifier, False otherwise.\n",
       " |      \n",
-      " |      Use keyword.iskeyword() to test for reserved identifiers such as \"def\" and\n",
-      " |      \"class\".\n",
+      " |      Call keyword.iskeyword(s) to test whether string s is a reserved identifier,\n",
+      " |      such as \"def\" or \"class\".\n",
       " |  \n",
       " |  islower(self, /)\n",
       " |      Return True if the string is a lowercase string, False otherwise.\n",
@@ -2143,6 +2143,19 @@
       " |      If the separator is not found, returns a 3-tuple containing the original string\n",
       " |      and two empty strings.\n",
       " |  \n",
+      " |  removeprefix(self, prefix, /)\n",
+      " |      Return a str with the given prefix string removed if present.\n",
+      " |      \n",
+      " |      If the string starts with the prefix string, return string[len(prefix):].\n",
+      " |      Otherwise, return a copy of the original string.\n",
+      " |  \n",
+      " |  removesuffix(self, suffix, /)\n",
+      " |      Return a str with the given suffix string removed if present.\n",
+      " |      \n",
+      " |      If the string ends with the suffix string and that suffix is not empty,\n",
+      " |      return string[:-len(suffix)]. Otherwise, return a copy of the original\n",
+      " |      string.\n",
+      " |  \n",
       " |  replace(self, old, new, count=-1, /)\n",
       " |      Return a copy with all occurrences of substring old replaced by new.\n",
       " |      \n",
@@ -2268,7 +2281,7 @@
       " |  __new__(*args, **kwargs) from builtins.type\n",
       " |      Create and return a new object.  See help(type) for accurate signature.\n",
       " |  \n",
-      " |  maketrans(x, y=None, z=None, /)\n",
+      " |  maketrans(...)\n",
       " |      Return a translation table usable for str.translate().\n",
       " |      \n",
       " |      If there is only one argument, it must be a dictionary mapping Unicode\n",
@@ -2303,7 +2316,7 @@
     "3. Reverses their order\n",
     "4. (Bonus) Print the first letter from each word\n",
     "\n",
-    "Hint: Use the `help()` function on `str` and `list` to see what methods are available to assist with the task. "
+    "**Hint:** Use the `help()` function on `str` and `list` to see what methods are available to assist with the task. "
    ]
   },
   {
@@ -2536,7 +2549,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To test that the function works, we could eyeball it, and we can also test that double-reversing gives us the original sequence:"
+    "To test that the function works, we could:\n",
+    "- see if the output is correct ourselves\n",
+    "- test that double-reversing gives us the original sequence"
    ]
   },
   {
@@ -3037,14 +3052,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We've done a good job of organizing our code into functions here, but we've only been running them from this notebook. So next, we're going to take our code and put it in a script - starting with the `read_sequence` function.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This is going to be very familiar, since we're doing the same thing we did in bash, just with a different language!"
+    "We've done a good job of organizing our code into functions here, but we've only been running them from this notebook. So next, we're going to take our code and put it in a script - starting with the `read_fasta` function."
    ]
   },
   {
@@ -3081,7 +3089,7 @@
     "    f = open(filename)\n",
     "    for line in f:\n",
     "        line = line.strip()\n",
-    "        if not '>' in line:\n",
+    "        if '>' not in line:\n",
     "            # Append to the last sequence\n",
     "            sequence = sequence + line\n",
     "    f.close()\n",
@@ -3090,6 +3098,19 @@
     "print(read_fasta('ae.fa'))\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Our script reads our `ae.fa` file every time we run it, but we know most programs don't work that way. The programs we used in bash expected a data file as an *argument*, and that's a good convention for programs we write too.\n",
+    "\n",
+    "In Python, our program can get these arguments, but we have to load a module called `sys` from the standard library, a collection of modules included in python but not available by default. The documentation for these is part of the documentation for python: https://docs.python.org/3/library/sys.html\n",
+    "\n",
+    "Libraries are incredibly useful - there are libraries for working with numeric and scientific data, generating plots, fetching data from the web, working with image and document files, databases, etc. And of course, there's a library for getting things like your script's command-line arguments.\n",
+    "\n",
+    "So, let's change our `read_fasta.py` program slightly."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -3105,7 +3126,7 @@
     "    f = open(filename)\n",
     "    for line in f:\n",
     "        line = line.strip()\n",
-    "        if not '>' in line:\n",
+    "        if '>' not in line:\n",
     "            # Append to the last sequence\n",
     "            sequence = sequence + line\n",
     "    f.close()\n",
@@ -3114,6 +3135,13 @@
     "print(read_fasta(sys.argv[1]))\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "But what happens if we don't have an input file name? According to the documentation, sys.argv, returns a list where the first item sys.argv[0] is the name of the script by default, and each additional item in the list are the command line arguments. If no argument was passed, sys.argv should be a list of just the script name."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -3129,52 +3157,19 @@
     "    f = open(filename)\n",
     "    for line in f:\n",
     "        line = line.strip()\n",
-    "        if not '>' in line:\n",
+    "        if '>' not in line:\n",
     "            # Append to the last sequence\n",
     "            sequence = sequence + line\n",
     "    f.close()\n",
     "    return sequence\n",
     "\n",
     "if len(sys.argv) < 2:\n",
     "    print('Usage:', sys.argv[0], '<sequence.fa>')\n",
-    "    exit(1)\n",
+    "    sys.exit(1)\n",
     "\n",
     "print(read_fasta(sys.argv[1]))\n"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "`$ python read_fasta.py`"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Our script reads our `ae.fa` file every time we run it, but we know most programs don't work that way. The programs we used in bash expected a data file as an *argument*, and that's a good convention for programs we write too.\n",
-    "\n",
-    "In Python, our program can get these arguments, but we have to load a library to access them. Google it!\n",
-    "\n",
-    "`import sys`\n",
-    "\n",
-    "https://docs.python.org/3/library/sys.html\n",
-    "\n",
-    "Libraries are incredibly useful - there are libraries for working with numeric and scientific data, generating plots, fetching data from the web, working with image and document files, databases, etc. And of course, there's a library for getting things like your script's command-line arguments.\n",
-    "\n",
-    "So, let's change our `read_fasta.py` program slightly\n",
-    "\n",
-    "    import sys\n",
-    "    read_fasta(sys.argv[1])\n",
-    "\n",
-    "And run it. But what happens if we don't have a file name\n",
-    "\n",
-    "    if len(sys.argv) < 2:\n",
-    "        print 'Usage:', sys.argv[0], '<sequence.fa>'\n",
-    "        exit(1)\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -3186,7 +3181,7 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -3200,7 +3195,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,