[ PROMPT_NODE_26794 ]
Matchms 工作流
[ SKILL_DOCUMENTATION ]
# Matchms 常用工作流
本文档提供了使用 matchms 进行质谱分析的常用工作流详细示例。
## 工作流 1:基础谱图库匹配
将未知谱图与参考库进行匹配以鉴定化合物。
python
from matchms.importing import load_from_mgf
from matchms.filtering import default_filters, normalize_intensities
from matchms.filtering import select_by_relative_intensity, require_minimum_number_of_peaks
from matchms import calculate_scores
from matchms.similarity import CosineGreedy
# 加载参考库
print("正在加载参考库...")
library = list(load_from_mgf("reference_library.mgf"))
# 加载查询谱图(未知物)
print("正在加载查询谱图...")
queries = list(load_from_mgf("unknown_spectra.mgf"))
# 处理库谱图
print("正在处理库...")
processed_library = []
for spectrum in library:
spectrum = default_filters(spectrum)
spectrum = normalize_intensities(spectrum)
spectrum = select_by_relative_intensity(spectrum, intensity_from=0.01)
spectrum = require_minimum_number_of_peaks(spectrum, n_required=5)
if spectrum is not None:
processed_library.append(spectrum)
# 处理查询谱图
print("正在处理查询...")
processed_queries = []
for spectrum in queries:
spectrum = default_filters(spectrum)
spectrum = normalize_intensities(spectrum)
spectrum = select_by_relative_intensity(spectrum, intensity_from=0.01)
spectrum = require_minimum_number_of_peaks(spectrum, n_required=5)
if spectrum is not None:
processed_queries.append(spectrum)
# 计算相似度
print("正在计算相似度...")
scores = calculate_scores(references=processed_library,
queries=processed_queries,
similarity_function=CosineGreedy(tolerance=0.1))
# 获取每个查询的顶级匹配结果
print("n顶级匹配:")
for i, query in enumerate(processed_queries):
top_matches = scores.scores_by_query(query, sort=True)[:5]
query_name = query.get("compound_name", f"Query {i}")
print(f"n{query_name}:")
for ref_idx, score in top_matches:
ref_spectrum = processed_library[ref_idx]
ref_name = ref_spectrum.get("compound_name", f"Ref {ref_idx}")
print(f" {ref_name}: {score:.4f}")
---
## 工作流 2:质量控制与数据清洗
在分析前对谱图数据进行过滤和清洗。
python
from matchms.importing import load_from_mgf
from matchms.exporti