[ PROMPT_NODE_22220 ]
data_connectors
[ SKILL_DOCUMENTATION ]
# LlamaIndex 数据连接器指南
通过 LlamaHub 提供 300+ 数据连接器。
## 内置加载器
### SimpleDirectoryReader
python
from llama_index.core import SimpleDirectoryReader
# 加载所有文件
documents = SimpleDirectoryReader("./data").load_data()
# 按扩展名过滤
documents = SimpleDirectoryReader(
"./data",
required_exts=[".pdf", ".docx", ".txt"]
).load_data()
# 递归加载
documents = SimpleDirectoryReader("./data", recursive=True).load_data()
### 网页
python
from llama_index.readers.web import SimpleWebPageReader, BeautifulSoupWebReader
# 简单加载器
reader = SimpleWebPageReader()
documents = reader.load_data(["https://example.com"])
# 高级加载器 (BeautifulSoup)
reader = BeautifulSoupWebReader()
documents = reader.load_data(urls=[
"https://docs.python.org",
"https://numpy.org"
])
### PDF
python
from llama_index.readers.file import PDFReader
reader = PDFReader()
documents = reader.load_data("paper.pdf")
### GitHub
python
from llama_index.readers.github import GithubRepositoryReader
reader = GithubRepositoryReader(
owner="facebook",
repo="react",
filter_file_extensions=[".js", ".jsx"],
verbose=True
)
documents = reader.load_data(branch="main")
## LlamaHub 连接器
访问 https://llamahub.ai 获取 300+ 连接器:
- Notion, Google Docs, Confluence
- Slack, Discord, Twitter
- PostgreSQL, MongoDB, MySQL
- S3, GCS, Azure Blob
- Stripe, Shopify, Salesforce
### 从 LlamaHub 安装
bash
pip install llama-index-readers-notion
python
from llama_index.readers.notion import NotionPageReader
reader = NotionPageReader(integration_token="your-token")
documents = reader.load_data(page_ids=["page-id"])
## 自定义加载器
python
from llama_index.core.readers.base import BaseReader
from llama_index.core import Document
class CustomReader(BaseReader):
def load_data(self, file_path: str):
# 您的自定义加载逻辑
with open(file_path) as f:
text = f.read()
return [Document(text=text, metadata={"source": file_path})]
reader = CustomReader()
documents = reader.load_data("data.txt")
## 资源
- **LlamaHub**: https://llamahub.ai
- **数据连接器文档**: https://developers.llamaindex.ai/python/framework/modules/data_connectors/