00001 """
00002 Author: Marco Biasini
00003 """
00004
00005 import re
00006 from ost import seq
00007
00008 class HHSearchHit:
00009 def __init__(self, summary, alignment):
00010 self.summary=summary
00011 self.alignment=alignment
00012
00013 class HitSummary:
00014 def __init__(self, pdb_id, chain, prob, e_value, query_start, query_end,
00015 template_start, template_end):
00016 self.prob=prob
00017 self.pdb_id=pdb_id
00018 self.chain=chain
00019 self.e_value=e_value
00020 self.query_start=query_start
00021 self.query_end=query_end
00022 self.template_start=template_start
00023 self.template_end=template_end
00024
00025 class HHSearchResult:
00026 """
00027 Read HHSearch result file. The result is stored in a list of hh search hits.
00028
00029 Usage:
00030
00031 result=HHSearchResult('output.hhr')
00032 for hit in result.hits:
00033 print hit.pdb_id, hit.chain
00034 print hit.alignment.ToString(80)
00035 """
00036 def __init__(self, filename, pipe_separated=False):
00037 self.pipe_separated=pipe_separated
00038 self._Read(filename)
00039 def _Read(self, filename):
00040 ifile=open(filename)
00041 self._ReadHeader(ifile)
00042 summaries=self._ReadHitSummaries(ifile)
00043 self.hits=self._ReadHitDetails(ifile, summaries)
00044
00045 def _ReadHeader(self, ifile):
00046 header_lines=[]
00047 for line in ifile:
00048 stripped_line=line.strip()
00049 if stripped_line=='':
00050 break
00051 header_lines.append(stripped_line)
00052 for header_line in header_lines:
00053 var, value=re.split('\s+', header_line, 1)
00054 setattr(self, var.lower(), value)
00055
00056 def _ReadHitSummaries(self, ifile):
00057 summary_lines=[]
00058 skip_header=True
00059 for line in ifile:
00060 if skip_header==True:
00061 skip_header=False
00062 continue
00063
00064 stripped_line=line.strip()
00065 if stripped_line=='':
00066 break
00067 summary_lines.append(line)
00068 summaries=[]
00069 for summary_line in summary_lines:
00070 pdb_id, chain=(None, None)
00071 if self.pipe_separated:
00072 parts=summary_line[4:37].split('|')
00073 pdb_id=parts[1][:4]
00074 chain=parts[1][4]
00075 else:
00076 pdb_id=summary_line[4:8]
00077 chain=summary_line[9]
00078 prob=float(summary_line[36:40])
00079 e_value=0.0
00080 query_range=summary_line[76:84].split('-')
00081 query_start=int(query_range[0].strip())
00082 query_end=int(query_range[1].strip())
00083 template_range=summary_line[86:94].split('-')
00084 template_start=int(template_range[0].strip())
00085 template_end=int(template_range[1].strip())
00086 summaries.append(HitSummary(pdb_id, chain, prob, e_value, query_start,
00087 query_end, template_start, template_end))
00088 return summaries
00089
00090 def _ReadHitDetails(self, ifile, summaries):
00091 hits=[]
00092 for summary in summaries:
00093 alignment=self._ReadHitDetail(ifile)
00094 hits.append(HHSearchHit(summary, alignment))
00095 return hits
00096 def _ReadHitDetail(self, ifile):
00097 skip_header=True
00098 q_seq, t_seq=('', '')
00099 for line in ifile:
00100 if skip_header==True:
00101 if line.startswith('>'):
00102 skip_header=False
00103 continue
00104 if line.startswith('No'):
00105 break
00106 if line.strip()=='':
00107 continue
00108 if line.startswith('Q ss_pred') or line.startswith('Q Consensus'):
00109 continue
00110 if line.startswith('T ss_pred') or line.startswith('T Consensus'):
00111 continue
00112 if line.startswith(' '):
00113 continue
00114 if line.startswith('Q'):
00115 q_seq+=re.split('\s+', line)[3]
00116 if line.startswith('T'):
00117 t_seq+=re.split('\s+', line)[3]
00118 ali=seq.AlignmentHandle()
00119 ali.AddSequence(seq.Sequence.FromString('query', q_seq))
00120 ali.AddSequence(seq.Sequence.FromString('target', t_seq))
00121 return ali