Source code for pydna.download
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Provides a class for downloading sequences from genbank.
'''
import re
import os
import urllib2
from urlparse import urlparse
from urlparse import urlunparse
from Bio import SeqIO
from Bio import Entrez
from Bio.SeqUtils.CheckSum import seguid
[docs]class Genbank():
'''Class to facilitate download from genbank.
genbank(users_email, proxy = None, tool="biopython")
Example:
import pydna
gb=pydna.Genbank("me@mail.se", proxy = "http://proxy.com:3128")
gb.nucleotide("L09137") <- this method does the downloading from genbank
SeqRecord(seq=Seq('TCGCGCGTTTCGGTGATGACGGTGAAAACCTCT.....
'''
def __init__(self, users_email, proxy = None, tool="biopython"):
if not re.match("[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}",users_email,re.IGNORECASE):
raise(ValueError("Not a valid user email!"))
self.email=users_email #Always tell NCBI who you are
if proxy:
parsed = urlparse(proxy)
scheme = parsed.scheme
hostname = parsed.hostname
test = urlunparse((scheme, hostname,'','','','',))
try:
response=urllib2.urlopen(test, timeout=1)
except urllib2.URLError as err:
print test
raise(ValueError("could not contact proxy server."))
self.proxy = urllib2.ProxyHandler({ scheme : parsed.geturl() })
else:
os.environ['http_proxy']=''
self.proxy = urllib2.ProxyHandler()
self.opener = urllib2.build_opener(self.proxy)
urllib2.install_opener(self.opener)
[docs] def test(self):
'''Test downloading the pUC19 plasmid sequence from genbank'''
result = self.nucleotide("L09137") # pUC19
assert seguid(result.seq) == "71B4PwSgBZ3htFjJXwHPxtUIPYE"
return True
[docs] def nucleotide(self, item):
'''Download a genbank record using an instance of the Genbank
class.
item is a string containing one genbank acession number
for a nucleotide file:
A12345 = 1 letter + 5 numerals
AB123456 = 2 letters + 6 numerals
http://www.dsimb.inserm.fr/~fuchs/M2BI/AnalSeq/Annexes/Sequences/Accession_Numbers.htm
'''
Entrez.email = self.email
handle = Entrez.efetch(db ="nucleotide",
id = item,
rettype = "gb",
retmode = "text")
return SeqIO.read(handle, "genbank")
if __name__=="__main__":
import doctest
doctest.testmod()
#a=genbank("bjornjobb@gmail.com")
#a.test()
# a=genbank("bjornjobb@gmail.com","http://proxy.uminho.pt:3128")
# a.test()
# print a.proxy.proxies
# a=genbank("bjornjobb@gmail.com","http://username:password@proxy.uminho.pt:1000")
# print a.proxy.proxies
# a=genbank("bjornjobb@gmail.com")
# a.test()