Professional Documents
Culture Documents
solutionsExerciseMaster11 23
solutionsExerciseMaster11 23
11.py
Code
#!/usr/bin/python3
import sys
counter = 0
else:
1
12.py
Code
#!/usr/bin/python3
import sys
if len(sys.argv) < 3:
print('Error: Not enough arguments. Usage: 12.py inputfile outputfile')
sys.exit()
inputname = sys.argv[1]
outputname = sys.argv[2]
2
13.py
Code
#!/usr/bin/python3
# Exercise: Calculate GC content in multi-fasta file
# Author: Martin Basterrechea
import sys
GC = 0
AT = 0
if len(sys.argv) < 2:
print('Error: Not enough arguments. Usage: 13.py inputfile')
sys.exit()
inputname = sys.argv[1]
print(round(GC/(GC+AT), 2))
3
14.py
Code
#!/usr/bin/python3
# Exercise: Calculate average length of sequences in multi-fasta file
# Author: Martin Basterrechea
import sys
inputname = sys.argv[1]
# We get the sum of the sequence lengths and divide it by the number of sequences
total_length = 0
seq_count = 0
print(round(total_length/seq_count,1))
4
15.py
Code
#!/usr/bin/python3
# Exercise: Calculate GC content in fastq file
import sys
GC = 0
AT = 0
if len(sys.argv) < 2:
print('Error: Not enough arguments. Usage: 15.py fastqFile')
sys.exit()
inputname = sys.argv[1]
countLines = 0
print(round(GC/(GC+AT), 2))
5
16.py
Code
#!/usr/bin/python3
import sys
import re
# Compiling the pattern is not necessary, but faster than using the string directly
lengthPattern = re.compile('length=(\d+)')
countId = 0
countNt = 0
headerLength = 0
print('IDs: {}'.format(countId))
print('Total counted nucl: {} nt, Average: {} nt'.format(countNt, round(countNt / countId, 1)))
print('Total label length: {} nt, Average: {} nt'.format(headerLength, round(headerLength / cou
6
17.py
Code
#!/usr/bin/python3
import sys
import re
target_id = sys.argv[1]
found_id = False
'''
# A regular expression solution
# The added \s to the pattern match a trailing space, tab or newline,
# preventing a match with only part of the ID
search_pattern = re.compile('>' + target_id + '\s')
7
18.py
Code
#!/usr/bin/python3
i = 0
with open(sys.argv[1], 'r') as f:
for line in f:
i += 1
line = line.rstrip()
if '>' in line:
if i == 1:
print(line)
else:
print('\n' + line)
else:
print(line, end='')
8
19.py
Code
#!/usr/bin/python3
# Exercise: Transcribe DNA fasta to RNA
# Author: Martin Basterrechea
import sys
total_mass = 0
for aa in prot:
# This will return the value if it exists, or None if it doesn't
mass = mass_dict.get(aa.upper())
if mass: #Any number (except 0) will be considered as true
total_mass += mass
else: #None is considered as false
print('Invalid amino acid!: {}'.format(aa))
sys.exit()
print('Total mass: ' + str(total_mass))
9
20.py
Code
#!/usr/bin/python3
import sys
import re
targetOligo = sys.argv[1]
reverseOligo = targetOligo[::-1]
reverseOligo = reverseOligo.translate(str.maketrans('acgtACGT', 'tgcaTGCA'))
print('#Oligo\t{}'.format(targetOligo))
print('#id\tabundance')
if line[0] == '>':
line = line.rstrip() # Save for future use
idLine = line.split()[0]
numberOfOligos = 0
else:
numberOfOligos += len(re.findall(targetOligo, line, re.I))
numberOfOligos += len(re.findall(reverseOligo, line, re.I))
if numberOfOligos > 0:
print('{}\t{}'.format(idLine, numberOfOligos))
10
21.py
Code
#!/usr/bin/env python3
#Exercise: Translate DNA into AA
#Note that this script only works with non interleaved sequences
import sys
if len(sys.argv) < 3:
print('Error, not enough arguments. Usage: 21.py input.fasta output.faa')
sys.exit()
11
22.py
Code
#!/usr/bin/python3
import sys
12
23.py
Code
#!/usr/bin/python3
import sys
if len(sys.argv) != 2:
print('Usage: {} gene_fasta_file'.format(sys.argv[0]))
codon_total_count = [0, 0, 0]
codon_gc_count = [0, 0, 0]
allowed_letters = ['a', 'c', 'g', 't']
gc_letters = ['c', 'g']
countLines = 0
with open(sys.argv[1], 'r') as in_fh:
for line in in_fh:
line = line.lower().rstrip()
countLines += 1
if countLines % 10000 == 0:
print('{} lines processed'.format(countLines), file=sys.stderr, end='\r')
if letter in allowed_letters:
codon_total_count[current_pos] += 1
if letter in gc_letters:
codon_gc_count[current_pos] += 1
13