shelve的优势在于:
• 使用简单,学习成本极低 • 支持几乎所有Python对象 • 性能优秀,适合中小型数据存储 • 无需额外依赖,跨平台兼容性好 使用建议:
- 总是使用with语句安全关闭文件
- 对于频繁写入场景,考虑使用writeback=True
- 注意键必须是字符串类型
- 大规模数据建议使用专业数据库替代 shelve虽然低调,但确实是Python世界中数据持久化的一颗明珠。它平衡了易用性和功能性,特别适合需要快速实现数据存储的场景。下次遇到类似需求,不妨试试这个内置神器!
持久化
shelve最强大之处在于它能像字典一样操作,但数据会自动持久化到磁盘:
代码
自动持久化到磁盘
with shelve.open('mydata') as db:
# 存储各种Python对象
db['list'] = [1, 2, 3, 4]
db['dict'] = {'name': 'Python', 'age': 30}
db['set'] = {1, 2, 3}
# 像字典一样读取
my_list = db['list']
print(my_list) # [1, 2, 3, 4]
# 支持字典常用方法
print(len(db)) # 获取键值对数量
print(list(db.keys())) # 获取所有键
原子性操作
with shelve.open('mydata', writeback=True) as db:
# writeback=True启用缓存,提升性能
if 'counter' not in db:
db['counter'] = 0
db['counter'] += 1 # 自动同步到磁盘
批量操作优化
def batch_save(items, batch_size=1000):
with shelve.open('mydata') as db:
for i, item in enumerate(items):
db[f'item_{i}'] = item
if i % batch_size == 0:
db.sync() # 手动同步到磁盘
爬网页
import shelve
from datetime import datetime
from dataclasses import dataclass
@dataclass
classWebPage:
url: str
content: str
timestamp: datetime
classCacheManager:
def__init__(self, filename):
self.filename = filename
defsave_page(self, url, content):
with shelve.open(self.filename) as cache:
cache[url] = WebPage(
url=url,
content=content,
timestamp=datetime.now()
)
defget_page(self, url, max_age_hours=24):
with shelve.open(self.filename) as cache:
if url in cache:
page = cache[url]
age = datetime.now() - page.timestamp
if age.total_seconds() < max_age_hours * 3600:
return page.content
returnNone
# 使用示例
cache = CacheManager('webcache')
cache.save_page('https://python.org', '<!DOCTYPE html>...')
content = cache.get_page('https://python.org')