|
30 | 30 | } |
31 | 31 | }, |
32 | 32 | "source": [ |
33 | | - "## Python & Notebook" |
| 33 | + "## Python in the Jupyter Notebook Environment" |
34 | 34 | ] |
35 | 35 | }, |
36 | 36 | { |
|
46 | 46 | "cell_type": "markdown", |
47 | 47 | "metadata": {}, |
48 | 48 | "source": [ |
49 | | - "The gray boxes are cells. We write code in the cells with instructions for the computer to execute. Let's start by telling the computer to add 5 and 3 and print the result. " |
| 49 | + "The gray boxes are cells. We write code in the cells with instructions for the computer to execute. Let's start by adding 5 and 3 and printing the result. " |
50 | 50 | ] |
51 | 51 | }, |
52 | 52 | { |
|
87 | 87 | "outputs": [ |
88 | 88 | { |
89 | 89 | "ename": "SyntaxError", |
90 | | - "evalue": "invalid syntax (<ipython-input-2-50b4ae29d403>, line 1)", |
| 90 | + "evalue": "invalid syntax (3449611317.py, line 1)", |
91 | 91 | "output_type": "error", |
92 | 92 | "traceback": [ |
93 | | - "\u001b[0;36m File \u001b[0;32m\"<ipython-input-2-50b4ae29d403>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m print(Hello world)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" |
| 93 | + "\u001b[0;36m Input \u001b[0;32mIn [2]\u001b[0;36m\u001b[0m\n\u001b[0;31m print(Hello world)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" |
94 | 94 | ] |
95 | 95 | } |
96 | 96 | ], |
|
102 | 102 | "cell_type": "markdown", |
103 | 103 | "metadata": {}, |
104 | 104 | "source": [ |
105 | | - "OK so Python didn't like that, but there's a lot to learn here\n", |
| 105 | + "OK so Python returned an error, but there's a lot to learn here:\n", |
106 | 106 | "\n", |
107 | 107 | "- Errors can be really helpful.\n", |
108 | 108 | "- It's common to make mistakes, don't be discouraged\n", |
|
205 | 205 | "cell_type": "markdown", |
206 | 206 | "metadata": {}, |
207 | 207 | "source": [ |
208 | | - "And we can print multiple things by inserting commas. So let's all greet our neighbor:" |
| 208 | + "And we can print multiple items by inserting commas. So we can combine our greeting and neighbor to greet our neighbor:" |
209 | 209 | ] |
210 | 210 | }, |
211 | 211 | { |
|
340 | 340 | "traceback": [ |
341 | 341 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
342 | 342 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", |
343 | | - "\u001b[0;32m<ipython-input-11-6fb37bd547ba>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mmy_float\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m50.0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mmy_string\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Hello\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmy_float\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmy_string\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
| 343 | + "Input \u001b[0;32mIn [11]\u001b[0m, in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m my_float \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m50.0\u001b[39m\n\u001b[1;32m 2\u001b[0m my_string \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHello\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mmy_float\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmy_string\u001b[49m)\n", |
344 | 344 | "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'float' and 'str'" |
345 | 345 | ] |
346 | 346 | } |
|
508 | 508 | "cell_type": "markdown", |
509 | 509 | "metadata": {}, |
510 | 510 | "source": [ |
511 | | - "One more optional parameter you can use is called a step in the format **[start:end:step]**. This parameter determines what you \"count by\" and is 1 by default. " |
| 511 | + "One more optional parameter you can use is called a **step** in the format **[start:end:step]**. This parameter determines what you \"count by\" and is 1 by default. " |
512 | 512 | ] |
513 | 513 | }, |
514 | 514 | { |
|
793 | 793 | "outputs": [ |
794 | 794 | { |
795 | 795 | "ename": "SyntaxError", |
796 | | - "evalue": "invalid syntax (<ipython-input-28-3051ded84a5c>, line 1)", |
| 796 | + "evalue": "invalid syntax (3233142308.py, line 1)", |
797 | 797 | "output_type": "error", |
798 | 798 | "traceback": [ |
799 | | - "\u001b[0;36m File \u001b[0;32m\"<ipython-input-28-3051ded84a5c>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 1a\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" |
| 799 | + "\u001b[0;36m Input \u001b[0;32mIn [28]\u001b[0;36m\u001b[0m\n\u001b[0;31m 1a\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" |
800 | 800 | ] |
801 | 801 | } |
802 | 802 | ], |
|
866 | 866 | "traceback": [ |
867 | 867 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
868 | 868 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", |
869 | | - "\u001b[0;32m<ipython-input-31-74803fcdf01e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mr3\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'G'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
| 869 | + "Input \u001b[0;32mIn [31]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m r3[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mG\u001b[39m\u001b[38;5;124m'\u001b[39m\n", |
870 | 870 | "\u001b[0;31mTypeError\u001b[0m: 'str' object does not support item assignment" |
871 | 871 | ] |
872 | 872 | } |
|
1110 | 1110 | } |
1111 | 1111 | ], |
1112 | 1112 | "source": [ |
1113 | | - "my_dictionary = {\"key\":\"value\", \"second_key\":2, 3:\"third_value\"}\n", |
| 1113 | + "my_dictionary = {\"key\": \"value\", \"second_key\": 2, 3: \"third_value\"}\n", |
1114 | 1114 | "print(my_dictionary)" |
1115 | 1115 | ] |
1116 | 1116 | }, |
|
1281 | 1281 | "cell_type": "markdown", |
1282 | 1282 | "metadata": {}, |
1283 | 1283 | "source": [ |
1284 | | - "As an exercise, let's get the total quantity of nucleotides we have. This loop should look a lot like counting the bases in the sequence" |
| 1284 | + "As an exercise, let's get the total quantity of nucleotides we have in the dictionary counts. This loop should look a lot like counting the bases in the sequence." |
1285 | 1285 | ] |
1286 | 1286 | }, |
1287 | 1287 | { |
|
1404 | 1404 | "\n", |
1405 | 1405 | "We've looked at strings and dictionaries as collection types. There's another one that's really useful, and it's called a list. It's an ordered collection of elements. Lists use the `[]` square brackets, and elments are separated by commas.\n", |
1406 | 1406 | "\n", |
1407 | | - "Here's one that contains our 3 sequences.:" |
| 1407 | + "Here's one that contains our 3 sequences." |
1408 | 1408 | ] |
1409 | 1409 | }, |
1410 | 1410 | { |
|
1516 | 1516 | "source": [ |
1517 | 1517 | "### Making choices / Conditionals\n", |
1518 | 1518 | "\n", |
1519 | | - "When writing programs, it's very handy to examine some data or some results, and make a decision about what to do next. This is called a conditional, and most common version is the `if-else`.\n", |
| 1519 | + "When writing programs, it's usefull to examine some data or some results, and make a decision about what to do next. This is called a conditional, and most common version is the `if-else`.\n", |
1520 | 1520 | "\n", |
1521 | 1521 | "Here's how it works. we start with an `if` keyword, and then we write some expression that will either be **True** or **False**. Then we write a colon, and indent what should happen **if** the expression was **True**. \n", |
1522 | 1522 | "\n", |
|
2034 | 2034 | " | index(...)\n", |
2035 | 2035 | " | S.index(sub[, start[, end]]) -> int\n", |
2036 | 2036 | " | \n", |
2037 | | - " | Return the lowest index in S where substring sub is found, \n", |
| 2037 | + " | Return the lowest index in S where substring sub is found,\n", |
2038 | 2038 | " | such that sub is contained within S[start:end]. Optional\n", |
2039 | 2039 | " | arguments start and end are interpreted as in slice notation.\n", |
2040 | 2040 | " | \n", |
|
2073 | 2073 | " | isidentifier(self, /)\n", |
2074 | 2074 | " | Return True if the string is a valid Python identifier, False otherwise.\n", |
2075 | 2075 | " | \n", |
2076 | | - " | Use keyword.iskeyword() to test for reserved identifiers such as \"def\" and\n", |
2077 | | - " | \"class\".\n", |
| 2076 | + " | Call keyword.iskeyword(s) to test whether string s is a reserved identifier,\n", |
| 2077 | + " | such as \"def\" or \"class\".\n", |
2078 | 2078 | " | \n", |
2079 | 2079 | " | islower(self, /)\n", |
2080 | 2080 | " | Return True if the string is a lowercase string, False otherwise.\n", |
|
2143 | 2143 | " | If the separator is not found, returns a 3-tuple containing the original string\n", |
2144 | 2144 | " | and two empty strings.\n", |
2145 | 2145 | " | \n", |
| 2146 | + " | removeprefix(self, prefix, /)\n", |
| 2147 | + " | Return a str with the given prefix string removed if present.\n", |
| 2148 | + " | \n", |
| 2149 | + " | If the string starts with the prefix string, return string[len(prefix):].\n", |
| 2150 | + " | Otherwise, return a copy of the original string.\n", |
| 2151 | + " | \n", |
| 2152 | + " | removesuffix(self, suffix, /)\n", |
| 2153 | + " | Return a str with the given suffix string removed if present.\n", |
| 2154 | + " | \n", |
| 2155 | + " | If the string ends with the suffix string and that suffix is not empty,\n", |
| 2156 | + " | return string[:-len(suffix)]. Otherwise, return a copy of the original\n", |
| 2157 | + " | string.\n", |
| 2158 | + " | \n", |
2146 | 2159 | " | replace(self, old, new, count=-1, /)\n", |
2147 | 2160 | " | Return a copy with all occurrences of substring old replaced by new.\n", |
2148 | 2161 | " | \n", |
|
2268 | 2281 | " | __new__(*args, **kwargs) from builtins.type\n", |
2269 | 2282 | " | Create and return a new object. See help(type) for accurate signature.\n", |
2270 | 2283 | " | \n", |
2271 | | - " | maketrans(x, y=None, z=None, /)\n", |
| 2284 | + " | maketrans(...)\n", |
2272 | 2285 | " | Return a translation table usable for str.translate().\n", |
2273 | 2286 | " | \n", |
2274 | 2287 | " | If there is only one argument, it must be a dictionary mapping Unicode\n", |
|
2303 | 2316 | "3. Reverses their order\n", |
2304 | 2317 | "4. (Bonus) Print the first letter from each word\n", |
2305 | 2318 | "\n", |
2306 | | - "Hint: Use the `help()` function on `str` and `list` to see what methods are available to assist with the task. " |
| 2319 | + "**Hint:** Use the `help()` function on `str` and `list` to see what methods are available to assist with the task. " |
2307 | 2320 | ] |
2308 | 2321 | }, |
2309 | 2322 | { |
|
2536 | 2549 | "cell_type": "markdown", |
2537 | 2550 | "metadata": {}, |
2538 | 2551 | "source": [ |
2539 | | - "To test that the function works, we could eyeball it, and we can also test that double-reversing gives us the original sequence:" |
| 2552 | + "To test that the function works, we could:\n", |
| 2553 | + "- see if the output is correct ourselves\n", |
| 2554 | + "- test that double-reversing gives us the original sequence" |
2540 | 2555 | ] |
2541 | 2556 | }, |
2542 | 2557 | { |
|
3037 | 3052 | "cell_type": "markdown", |
3038 | 3053 | "metadata": {}, |
3039 | 3054 | "source": [ |
3040 | | - "We've done a good job of organizing our code into functions here, but we've only been running them from this notebook. So next, we're going to take our code and put it in a script - starting with the `read_sequence` function.\n" |
3041 | | - ] |
3042 | | - }, |
3043 | | - { |
3044 | | - "cell_type": "markdown", |
3045 | | - "metadata": {}, |
3046 | | - "source": [ |
3047 | | - "This is going to be very familiar, since we're doing the same thing we did in bash, just with a different language!" |
| 3055 | + "We've done a good job of organizing our code into functions here, but we've only been running them from this notebook. So next, we're going to take our code and put it in a script - starting with the `read_fasta` function." |
3048 | 3056 | ] |
3049 | 3057 | }, |
3050 | 3058 | { |
|
3081 | 3089 | " f = open(filename)\n", |
3082 | 3090 | " for line in f:\n", |
3083 | 3091 | " line = line.strip()\n", |
3084 | | - " if not '>' in line:\n", |
| 3092 | + " if '>' not in line:\n", |
3085 | 3093 | " # Append to the last sequence\n", |
3086 | 3094 | " sequence = sequence + line\n", |
3087 | 3095 | " f.close()\n", |
|
3090 | 3098 | "print(read_fasta('ae.fa'))\n" |
3091 | 3099 | ] |
3092 | 3100 | }, |
| 3101 | + { |
| 3102 | + "cell_type": "markdown", |
| 3103 | + "metadata": {}, |
| 3104 | + "source": [ |
| 3105 | + "Our script reads our `ae.fa` file every time we run it, but we know most programs don't work that way. The programs we used in bash expected a data file as an *argument*, and that's a good convention for programs we write too.\n", |
| 3106 | + "\n", |
| 3107 | + "In Python, our program can get these arguments, but we have to load a module called `sys` from the standard library, a collection of modules included in python but not available by default. The documentation for these is part of the documentation for python: https://docs.python.org/3/library/sys.html\n", |
| 3108 | + "\n", |
| 3109 | + "Libraries are incredibly useful - there are libraries for working with numeric and scientific data, generating plots, fetching data from the web, working with image and document files, databases, etc. And of course, there's a library for getting things like your script's command-line arguments.\n", |
| 3110 | + "\n", |
| 3111 | + "So, let's change our `read_fasta.py` program slightly." |
| 3112 | + ] |
| 3113 | + }, |
3093 | 3114 | { |
3094 | 3115 | "cell_type": "code", |
3095 | 3116 | "execution_count": null, |
|
3105 | 3126 | " f = open(filename)\n", |
3106 | 3127 | " for line in f:\n", |
3107 | 3128 | " line = line.strip()\n", |
3108 | | - " if not '>' in line:\n", |
| 3129 | + " if '>' not in line:\n", |
3109 | 3130 | " # Append to the last sequence\n", |
3110 | 3131 | " sequence = sequence + line\n", |
3111 | 3132 | " f.close()\n", |
|
3114 | 3135 | "print(read_fasta(sys.argv[1]))\n" |
3115 | 3136 | ] |
3116 | 3137 | }, |
| 3138 | + { |
| 3139 | + "cell_type": "markdown", |
| 3140 | + "metadata": {}, |
| 3141 | + "source": [ |
| 3142 | + "But what happens if we don't have an input file name? According to the documentation, sys.argv, returns a list where the first item sys.argv[0] is the name of the script by default, and each additional item in the list are the command line arguments. If no argument was passed, sys.argv should be a list of just the script name." |
| 3143 | + ] |
| 3144 | + }, |
3117 | 3145 | { |
3118 | 3146 | "cell_type": "code", |
3119 | 3147 | "execution_count": null, |
|
3129 | 3157 | " f = open(filename)\n", |
3130 | 3158 | " for line in f:\n", |
3131 | 3159 | " line = line.strip()\n", |
3132 | | - " if not '>' in line:\n", |
| 3160 | + " if '>' not in line:\n", |
3133 | 3161 | " # Append to the last sequence\n", |
3134 | 3162 | " sequence = sequence + line\n", |
3135 | 3163 | " f.close()\n", |
3136 | 3164 | " return sequence\n", |
3137 | 3165 | "\n", |
3138 | 3166 | "if len(sys.argv) < 2:\n", |
3139 | 3167 | " print('Usage:', sys.argv[0], '<sequence.fa>')\n", |
3140 | | - " exit(1)\n", |
| 3168 | + " sys.exit(1)\n", |
3141 | 3169 | "\n", |
3142 | 3170 | "print(read_fasta(sys.argv[1]))\n" |
3143 | 3171 | ] |
3144 | 3172 | }, |
3145 | | - { |
3146 | | - "cell_type": "markdown", |
3147 | | - "metadata": {}, |
3148 | | - "source": [ |
3149 | | - "`$ python read_fasta.py`" |
3150 | | - ] |
3151 | | - }, |
3152 | | - { |
3153 | | - "cell_type": "markdown", |
3154 | | - "metadata": {}, |
3155 | | - "source": [ |
3156 | | - "Our script reads our `ae.fa` file every time we run it, but we know most programs don't work that way. The programs we used in bash expected a data file as an *argument*, and that's a good convention for programs we write too.\n", |
3157 | | - "\n", |
3158 | | - "In Python, our program can get these arguments, but we have to load a library to access them. Google it!\n", |
3159 | | - "\n", |
3160 | | - "`import sys`\n", |
3161 | | - "\n", |
3162 | | - "https://docs.python.org/3/library/sys.html\n", |
3163 | | - "\n", |
3164 | | - "Libraries are incredibly useful - there are libraries for working with numeric and scientific data, generating plots, fetching data from the web, working with image and document files, databases, etc. And of course, there's a library for getting things like your script's command-line arguments.\n", |
3165 | | - "\n", |
3166 | | - "So, let's change our `read_fasta.py` program slightly\n", |
3167 | | - "\n", |
3168 | | - " import sys\n", |
3169 | | - " read_fasta(sys.argv[1])\n", |
3170 | | - "\n", |
3171 | | - "And run it. But what happens if we don't have a file name\n", |
3172 | | - "\n", |
3173 | | - " if len(sys.argv) < 2:\n", |
3174 | | - " print 'Usage:', sys.argv[0], '<sequence.fa>'\n", |
3175 | | - " exit(1)\n" |
3176 | | - ] |
3177 | | - }, |
3178 | 3173 | { |
3179 | 3174 | "cell_type": "markdown", |
3180 | 3175 | "metadata": {}, |
|
3186 | 3181 | "metadata": { |
3187 | 3182 | "anaconda-cloud": {}, |
3188 | 3183 | "kernelspec": { |
3189 | | - "display_name": "Python 3", |
| 3184 | + "display_name": "Python 3 (ipykernel)", |
3190 | 3185 | "language": "python", |
3191 | 3186 | "name": "python3" |
3192 | 3187 | }, |
|
3200 | 3195 | "name": "python", |
3201 | 3196 | "nbconvert_exporter": "python", |
3202 | 3197 | "pygments_lexer": "ipython3", |
3203 | | - "version": "3.7.7" |
| 3198 | + "version": "3.9.12" |
3204 | 3199 | } |
3205 | 3200 | }, |
3206 | 3201 | "nbformat": 4, |
|
0 commit comments