深圳的seo网站排名优化,销售网站设计方案,怎么才能成功做网站,套餐template_hits(Sequence[TemplateHit]数据格式)来自结构数据库搜索结果 python运行hhsearch二进制命令的包装器类 映射索引计算#xff1a;TemplateHit 中含有 indices_query#xff0c;需要换算成在原始query序列中的index#xff0c;hit 中indices_hit 需要减去最小index…template_hits(Sequence[TemplateHit]数据格式)来自结构数据库搜索结果 python运行hhsearch二进制命令的包装器类 映射索引计算TemplateHit 中含有 indices_query需要换算成在原始query序列中的indexhit 中indices_hit 需要减去最小index-1 gap 除外
import pickle
import dataclasses
from typing import Optional, List, Sequence, Mappingdataclasses.dataclass(frozenTrue)
class TemplateHit:Class representing a template hit.index: intname: straligned_cols: intsum_probs: Optional[float]query: strhit_sequence: strindices_query: List[int]indices_hit: List[int]### 读入Sequence[TemplateHit]数据
with open(test_pdb_hits.pkl, rb) as file:# 使用 pickle.load 从文件中加载对象test_pdb_hits pickle.load(file)#test_pdb_hits.pkl由python运行hhsearch二进制命令的包装器类 的结果 template_hits 保存得到
#import pickle
#with open(test_pdb_hits.pkl, wb) as file:
# pickle.dump(template_hits, file)def build_query_to_hit_index_mapping(hit_query_sequence: str,hit_sequence: str,indices_hit: Sequence[int],indices_query: Sequence[int],original_query_sequence: str) - Mapping[int, int]:Gets mapping from indices in original query sequence to indices in the hit.hit_query_sequence and hit_sequence are two aligned sequences containing gapcharacters. hit_query_sequence contains only the part of the original querysequence that matched the hit. When interpreting the indices from the .hhr, weneed to correct for this to recover a mapping from original query sequence tothe hit sequence.Args:hit_query_sequence: The portion of the query sequence that is in the .hhrhithit_sequence: The portion of the hit sequence that is in the .hhrindices_hit: The indices for each aminoacid relative to the hit sequenceindices_query: The indices for each aminoacid relative to the original querysequenceoriginal_query_sequence: String describing the original query sequence.Returns:Dictionary with indices in the original query sequence as keys and indicesin the hit sequence as values.# If the hit is empty (no aligned residues), return empty mappingif not hit_query_sequence:return {}# Remove gaps and find the offset of hit.query relative to original query.hhsearch_query_sequence hit_query_sequence.replace(-, )hit_sequence hit_sequence.replace(-, )hhsearch_query_offset original_query_sequence.find(hhsearch_query_sequence)print(fhhsearch_query_offset:{hhsearch_query_offset})# Index of -1 used for gap characters. Subtract the min index ignoring gaps.min_idx min(x for x in indices_hit if x -1)fixed_indices_hit [x - min_idx if x -1 else -1 for x in indices_hit]print(ffixed_indices_hit:{fixed_indices_hit})min_idx min(x for x in indices_query if x -1)fixed_indices_query [x - min_idx if x -1 else -1 for x in indices_query]print(ffixed_indices_query:{fixed_indices_query})# Zip the corrected indices, ignore case where both seqs have gap characters.mapping {}for q_i, q_t in zip(fixed_indices_query, fixed_indices_hit):if q_t ! -1 and q_i ! -1:if (q_t len(hit_sequence) orq_i hhsearch_query_offset len(original_query_sequence)):continuemapping[q_i hhsearch_query_offset] q_treturn mappinghit test_pdb_hits[0]
input_fasta_file Q94K49.fasta
## 从fasta文件提取 query_sequencestr格式
query_sequence
with open(input_fasta_file) as f:for line in f.readlines():if line.startswith():continuequery_sequence line.strip()print(fhit.query:{hit.query})
print(fhit.hit_sequence:{hit.hit_sequence})
print(fhit.indices_hit:{hit.indices_hit})
print(fhit.indices_query:{hit.indices_query})
print(fquery_sequence:{query_sequence})##query和hit序列比对上的氨基酸在各自多肽链上索引的对应字典
mapping build_query_to_hit_index_mapping(hit.query, hit.hit_sequence, hit.indices_hit, hit.indices_query,query_sequence)
print(mapping)