Source code for pydna.download

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Provides a class for downloading sequences from genbank.


'''
import re
import os
import urllib2
from urlparse import urlparse
from urlparse import urlunparse
from Bio import SeqIO
from Bio import Entrez
from Bio.SeqUtils.CheckSum import seguid

[docs]class Genbank(): '''Class to facilitate download from genbank. genbank(users_email, proxy = None, tool="biopython") Example: import pydna gb=pydna.Genbank("me@mail.se", proxy = "http://proxy.com:3128") gb.nucleotide("L09137") <- this method does the downloading from genbank SeqRecord(seq=Seq('TCGCGCGTTTCGGTGATGACGGTGAAAACCTCT..... ''' def __init__(self, users_email, proxy = None, tool="biopython"): if not re.match("[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}",users_email,re.IGNORECASE): raise(ValueError("Not a valid user email!")) self.email=users_email #Always tell NCBI who you are if proxy: parsed = urlparse(proxy) scheme = parsed.scheme hostname = parsed.hostname test = urlunparse((scheme, hostname,'','','','',)) try: response=urllib2.urlopen(test, timeout=1) except urllib2.URLError as err: print test raise(ValueError("could not contact proxy server.")) self.proxy = urllib2.ProxyHandler({ scheme : parsed.geturl() }) else: os.environ['http_proxy']='' self.proxy = urllib2.ProxyHandler() self.opener = urllib2.build_opener(self.proxy) urllib2.install_opener(self.opener)
[docs] def test(self): '''Test downloading the pUC19 plasmid sequence from genbank''' result = self.nucleotide("L09137") # pUC19 assert seguid(result.seq) == "71B4PwSgBZ3htFjJXwHPxtUIPYE" return True
[docs] def nucleotide(self, item): '''Download a genbank record using an instance of the Genbank class. item is a string containing one genbank acession number for a nucleotide file: A12345 = 1 letter + 5 numerals AB123456 = 2 letters + 6 numerals http://www.dsimb.inserm.fr/~fuchs/M2BI/AnalSeq/Annexes/Sequences/Accession_Numbers.htm ''' Entrez.email = self.email handle = Entrez.efetch(db ="nucleotide", id = item, rettype = "gb", retmode = "text") return SeqIO.read(handle, "genbank")
if __name__=="__main__": import doctest doctest.testmod() #a=genbank("bjornjobb@gmail.com") #a.test() # a=genbank("bjornjobb@gmail.com","http://proxy.uminho.pt:3128") # a.test() # print a.proxy.proxies # a=genbank("bjornjobb@gmail.com","http://username:password@proxy.uminho.pt:1000") # print a.proxy.proxies # a=genbank("bjornjobb@gmail.com") # a.test()