数据验证与 Pydantic
2026/3/20大约 11 分钟
数据验证与 Pydantic
第一章:Pydantic 基础
什么是 Pydantic?
Pydantic 是 Python 中最流行的数据验证库,FastAPI 的核心依赖之一。它使用 Python 类型提示来进行数据验证、序列化和文档生成。
基础模型定义
from pydantic import BaseModel, Field
from typing import Optional, List
from datetime import datetime
from enum import Enum
# 基础模型
class User(BaseModel):
id: int
username: str
email: str
is_active: bool = True # 默认值
created_at: datetime = Field(default_factory=datetime.now)
# 使用模型
user = User(id=1, username="john", email="john@example.com")
print(user.model_dump()) # 转换为字典
print(user.model_dump_json()) # 转换为 JSON 字符串
# 从字典创建
data = {"id": 2, "username": "jane", "email": "jane@example.com"}
user2 = User.model_validate(data)
# 从 JSON 创建
json_str = '{"id": 3, "username": "bob", "email": "bob@example.com"}'
user3 = User.model_validate_json(json_str)
常用字段类型
from pydantic import BaseModel, EmailStr, HttpUrl, IPvAnyAddress
from typing import Optional, List, Dict, Set, Tuple, Union, Any
from datetime import datetime, date, time, timedelta
from decimal import Decimal
from uuid import UUID
from pathlib import Path
class AllFieldTypes(BaseModel):
# 基础类型
name: str
age: int
score: float
is_active: bool
# 可选类型
nickname: Optional[str] = None
# 集合类型
tags: List[str] = []
metadata: Dict[str, Any] = {}
unique_ids: Set[int] = set()
coordinates: Tuple[float, float] = (0.0, 0.0)
# 联合类型
value: Union[int, str, None] = None
# 日期时间类型
created_at: datetime
birth_date: date
start_time: time
duration: timedelta
# 特殊类型
price: Decimal
user_id: UUID
file_path: Path
# 验证类型(需要 pip install pydantic[email])
email: EmailStr
website: HttpUrl
ip_address: IPvAnyAddress
枚举和字面量类型
from pydantic import BaseModel
from typing import Literal
from enum import Enum, IntEnum
# 字符串枚举
class UserRole(str, Enum):
admin = "admin"
moderator = "moderator"
user = "user"
# 整数枚举
class Priority(IntEnum):
low = 1
medium = 2
high = 3
# 使用字面量类型
class Task(BaseModel):
title: str
status: Literal["pending", "in_progress", "completed"]
priority: Priority
assigned_role: UserRole
# 验证
task = Task(
title="Fix bug",
status="pending",
priority=Priority.high,
assigned_role=UserRole.admin
)
第二章:字段验证
Field 配置
from pydantic import BaseModel, Field
from typing import Optional
from decimal import Decimal
class Product(BaseModel):
# 必填字段,带描述
name: str = Field(
..., # 必填
min_length=1,
max_length=100,
title="商品名称",
description="商品的显示名称"
)
# 字符串验证
sku: str = Field(
...,
pattern=r"^[A-Z]{3}-\d{4}$", # 正则验证
examples=["ABC-1234"]
)
# 数值验证
price: Decimal = Field(
...,
gt=0, # 大于 0
le=10000, # 小于等于 10000
decimal_places=2
)
# 整数验证
quantity: int = Field(
default=0,
ge=0, # 大于等于 0
lt=1000000 # 小于 1000000
)
# 可选字段
description: Optional[str] = Field(
None,
max_length=1000
)
# 带别名的字段(JSON 中使用不同的名称)
product_id: int = Field(..., alias="id")
# 弃用字段
old_code: Optional[str] = Field(None, deprecated=True)
class Config:
populate_by_name = True # 允许使用字段名或别名
# 数值约束选项
# gt: 大于
# ge: 大于等于
# lt: 小于
# le: 小于等于
# multiple_of: 倍数
# strict: 严格类型检查
自定义验证器
from pydantic import BaseModel, field_validator, model_validator, ValidationError
from typing import Optional, List
import re
class UserRegistration(BaseModel):
username: str
email: str
password: str
password_confirm: str
phone: Optional[str] = None
tags: List[str] = []
# 字段验证器
@field_validator("username")
@classmethod
def validate_username(cls, v: str) -> str:
if not v.isalnum():
raise ValueError("用户名只能包含字母和数字")
if len(v) < 3 or len(v) > 20:
raise ValueError("用户名长度必须在 3-20 之间")
return v.lower() # 转换为小写
@field_validator("email")
@classmethod
def validate_email(cls, v: str) -> str:
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("邮箱格式不正确")
return v.lower()
@field_validator("password")
@classmethod
def validate_password(cls, v: str) -> str:
if len(v) < 8:
raise ValueError("密码至少 8 位")
if not re.search(r"[A-Z]", v):
raise ValueError("密码必须包含大写字母")
if not re.search(r"[a-z]", v):
raise ValueError("密码必须包含小写字母")
if not re.search(r"\d", v):
raise ValueError("密码必须包含数字")
if not re.search(r"[!@#$%^&*(),.?\":{}|<>]", v):
raise ValueError("密码必须包含特殊字符")
return v
@field_validator("phone")
@classmethod
def validate_phone(cls, v: Optional[str]) -> Optional[str]:
if v is None:
return v
# 中国手机号验证
pattern = r"^1[3-9]\d{9}$"
if not re.match(pattern, v):
raise ValueError("手机号格式不正确")
return v
@field_validator("tags")
@classmethod
def validate_tags(cls, v: List[str]) -> List[str]:
if len(v) > 10:
raise ValueError("最多 10 个标签")
return [tag.strip().lower() for tag in v if tag.strip()]
# 模型验证器(验证多个字段)
@model_validator(mode="after")
def validate_passwords_match(self):
if self.password != self.password_confirm:
raise ValueError("两次密码输入不一致")
return self
# 使用示例
try:
user = UserRegistration(
username="john123",
email="john@example.com",
password="Password123!",
password_confirm="Password123!",
phone="13800138000",
tags=["python", "fastapi"]
)
print(user.model_dump())
except ValidationError as e:
print(e.errors())
验证模式
from pydantic import BaseModel, field_validator, model_validator
from typing import Any
class Data(BaseModel):
value: int
# mode="before": 在类型转换之前验证
@field_validator("value", mode="before")
@classmethod
def parse_value(cls, v: Any) -> Any:
if isinstance(v, str):
# 支持 "100k" -> 100000
if v.endswith("k"):
return int(v[:-1]) * 1000
if v.endswith("m"):
return int(v[:-1]) * 1000000
return v
# mode="after": 在类型转换之后验证(默认)
@field_validator("value", mode="after")
@classmethod
def validate_positive(cls, v: int) -> int:
if v < 0:
raise ValueError("必须为正数")
return v
class Order(BaseModel):
items: list
total: float = 0
# mode="before": 在所有字段解析之前
@model_validator(mode="before")
@classmethod
def calculate_total(cls, data: Any) -> Any:
if isinstance(data, dict):
items = data.get("items", [])
if items and "total" not in data:
data["total"] = sum(item.get("price", 0) for item in items)
return data
# mode="after": 在所有字段解析之后
@model_validator(mode="after")
def validate_order(self):
if len(self.items) == 0 and self.total > 0:
raise ValueError("空订单不能有金额")
return self
验证上下文
from pydantic import BaseModel, field_validator, ValidationInfo
from typing import Optional
class User(BaseModel):
username: str
role: str
@field_validator("role")
@classmethod
def validate_role(cls, v: str, info: ValidationInfo) -> str:
# 获取验证上下文
context = info.context
if context:
allowed_roles = context.get("allowed_roles", [])
if v not in allowed_roles:
raise ValueError(f"角色必须是 {allowed_roles} 之一")
return v
# 使用上下文验证
user = User.model_validate(
{"username": "john", "role": "admin"},
context={"allowed_roles": ["admin", "user"]}
)
第三章:模型配置
模型配置选项
from pydantic import BaseModel, ConfigDict
from datetime import datetime
class User(BaseModel):
model_config = ConfigDict(
# 字段配置
str_strip_whitespace=True, # 自动去除字符串首尾空格
str_min_length=1, # 字符串最小长度
str_max_length=None, # 字符串最大长度
# 验证配置
strict=False, # 严格模式(不自动转换类型)
extra="forbid", # 禁止额外字段:forbid, ignore, allow
validate_default=True, # 验证默认值
validate_assignment=True, # 赋值时验证
revalidate_instances="always", # 重新验证实例
# 序列化配置
from_attributes=True, # 支持从 ORM 模型转换
populate_by_name=True, # 支持字段名和别名
use_enum_values=True, # 使用枚举值而非枚举对象
# JSON 配置
ser_json_timedelta="iso8601", # timedelta 序列化格式
ser_json_bytes="base64", # bytes 序列化格式
ser_json_inf_nan="null", # inf/nan 序列化为 null
# 其他
frozen=False, # 是否不可变
arbitrary_types_allowed=True, # 允许任意类型
coerce_numbers_to_str=False, # 数字转字符串
)
id: int
username: str
email: str
created_at: datetime = None
不可变模型
from pydantic import BaseModel, ConfigDict
class ImmutableUser(BaseModel):
model_config = ConfigDict(frozen=True)
id: int
username: str
# 创建后不可修改
user = ImmutableUser(id=1, username="john")
# user.username = "jane" # 这会抛出错误
# 可以用于字典键和集合元素
users_set = {user}
users_dict = {user: "value"}
私有属性
from pydantic import BaseModel, PrivateAttr
from datetime import datetime
class User(BaseModel):
username: str
email: str
# 私有属性(不参与验证和序列化)
_created_at: datetime = PrivateAttr(default_factory=datetime.now)
_internal_id: str = PrivateAttr(default="")
def __init__(self, **data):
super().__init__(**data)
self._internal_id = f"user_{self.username}_{id(self)}"
user = User(username="john", email="john@example.com")
print(user._created_at) # 可以访问
print(user.model_dump()) # 不包含私有属性
计算字段
from pydantic import BaseModel, computed_field
from typing import List
class Order(BaseModel):
items: List[dict]
discount_rate: float = 0
@computed_field
@property
def subtotal(self) -> float:
"""计算小计"""
return sum(item["price"] * item["quantity"] for item in self.items)
@computed_field
@property
def discount(self) -> float:
"""计算折扣"""
return self.subtotal * self.discount_rate
@computed_field
@property
def total(self) -> float:
"""计算总价"""
return self.subtotal - self.discount
order = Order(
items=[
{"name": "商品A", "price": 100, "quantity": 2},
{"name": "商品B", "price": 50, "quantity": 3}
],
discount_rate=0.1
)
print(order.subtotal) # 350
print(order.discount) # 35
print(order.total) # 315
print(order.model_dump()) # 包含计算字段
第四章:模型继承与组合
模型继承
from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime
# 基础模型
class BaseEntity(BaseModel):
id: int
created_at: datetime = Field(default_factory=datetime.now)
updated_at: Optional[datetime] = None
# 继承基础模型
class User(BaseEntity):
username: str
email: str
is_active: bool = True
class Product(BaseEntity):
name: str
price: float
stock: int = 0
# 多层继承
class AdminUser(User):
permissions: list[str] = []
department: str
user = User(id=1, username="john", email="john@example.com")
admin = AdminUser(
id=2,
username="admin",
email="admin@example.com",
permissions=["read", "write", "delete"],
department="IT"
)
模型组合
from pydantic import BaseModel
from typing import Optional, List
# 组件模型
class Address(BaseModel):
street: str
city: str
country: str = "中国"
zip_code: Optional[str] = None
class ContactInfo(BaseModel):
email: str
phone: Optional[str] = None
address: Optional[Address] = None
class SocialLinks(BaseModel):
twitter: Optional[str] = None
github: Optional[str] = None
linkedin: Optional[str] = None
# 组合模型
class UserProfile(BaseModel):
username: str
display_name: str
bio: Optional[str] = None
contact: ContactInfo
social: Optional[SocialLinks] = None
tags: List[str] = []
# 使用
profile = UserProfile(
username="john",
display_name="John Doe",
contact=ContactInfo(
email="john@example.com",
phone="13800138000",
address=Address(
street="123 Main St",
city="北京"
)
),
social=SocialLinks(github="johndoe")
)
泛型模型
from pydantic import BaseModel
from typing import TypeVar, Generic, List, Optional
# 定义类型变量
T = TypeVar("T")
# 泛型响应模型
class Response(BaseModel, Generic[T]):
success: bool = True
message: str = ""
data: Optional[T] = None
# 分页响应模型
class PaginatedResponse(BaseModel, Generic[T]):
items: List[T]
total: int
page: int
page_size: int
total_pages: int
@property
def has_next(self) -> bool:
return self.page < self.total_pages
@property
def has_prev(self) -> bool:
return self.page > 1
# 使用
class User(BaseModel):
id: int
username: str
class Product(BaseModel):
id: int
name: str
# 类型安全的响应
user_response: Response[User] = Response(
data=User(id=1, username="john")
)
users_response: PaginatedResponse[User] = PaginatedResponse(
items=[User(id=1, username="john")],
total=100,
page=1,
page_size=10,
total_pages=10
)
第五章:序列化与反序列化
模型序列化
from pydantic import BaseModel, Field
from datetime import datetime
from typing import Optional
class User(BaseModel):
id: int
username: str
email: str
password: str # 敏感字段
created_at: datetime
deleted_at: Optional[datetime] = None
user = User(
id=1,
username="john",
email="john@example.com",
password="secret123",
created_at=datetime.now()
)
# 基础序列化
print(user.model_dump()) # 转为字典
print(user.model_dump_json()) # 转为 JSON 字符串
# 排除字段
print(user.model_dump(exclude={"password"}))
print(user.model_dump(exclude={"password", "deleted_at"}))
# 包含字段
print(user.model_dump(include={"id", "username"}))
# 排除未设置的字段
print(user.model_dump(exclude_unset=True))
# 排除默认值
print(user.model_dump(exclude_defaults=True))
# 排除 None 值
print(user.model_dump(exclude_none=True))
# 使用别名
class Item(BaseModel):
item_id: int = Field(..., alias="id")
item = Item(id=1)
print(item.model_dump()) # {"item_id": 1}
print(item.model_dump(by_alias=True)) # {"id": 1}
自定义序列化
from pydantic import BaseModel, field_serializer, model_serializer
from datetime import datetime
from typing import Any
from decimal import Decimal
class Order(BaseModel):
id: int
total: Decimal
created_at: datetime
items: list
# 字段序列化器
@field_serializer("total")
def serialize_total(self, v: Decimal) -> str:
return f"¥{v:.2f}"
@field_serializer("created_at")
def serialize_created_at(self, v: datetime) -> str:
return v.strftime("%Y-%m-%d %H:%M:%S")
@field_serializer("items")
def serialize_items(self, v: list, _info) -> list:
return [{"name": item["name"]} for item in v]
order = Order(
id=1,
total=Decimal("99.99"),
created_at=datetime.now(),
items=[{"name": "商品A", "price": 99.99}]
)
print(order.model_dump())
# {"id": 1, "total": "¥99.99", "created_at": "2024-01-01 12:00:00", "items": [{"name": "商品A"}]}
# 模型序列化器(完全控制)
class CustomOrder(BaseModel):
id: int
data: dict
@model_serializer
def serialize_model(self) -> dict[str, Any]:
return {
"order_id": self.id,
"order_data": self.data,
"serialized_at": datetime.now().isoformat()
}
JSON Schema 生成
from pydantic import BaseModel, Field
from typing import Optional, List
import json
class Address(BaseModel):
"""地址信息"""
street: str = Field(..., description="街道地址")
city: str = Field(..., description="城市")
country: str = Field("中国", description="国家")
class User(BaseModel):
"""用户模型"""
id: int = Field(..., description="用户ID", examples=[1, 2, 3])
username: str = Field(..., min_length=3, max_length=20, description="用户名")
email: str = Field(..., description="邮箱地址")
age: Optional[int] = Field(None, ge=0, le=150, description="年龄")
tags: List[str] = Field(default=[], description="用户标签")
address: Optional[Address] = Field(None, description="地址")
model_config = {
"json_schema_extra": {
"examples": [
{
"id": 1,
"username": "john",
"email": "john@example.com",
"age": 25,
"tags": ["developer", "python"]
}
]
}
}
# 生成 JSON Schema
schema = User.model_json_schema()
print(json.dumps(schema, indent=2, ensure_ascii=False))
第六章:FastAPI 中的 Pydantic 应用
请求和响应模型
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, EmailStr, Field
from typing import Optional, List
from datetime import datetime
app = FastAPI()
# 创建用户请求模型
class UserCreate(BaseModel):
username: str = Field(..., min_length=3, max_length=20)
email: EmailStr
password: str = Field(..., min_length=8)
full_name: Optional[str] = None
# 更新用户请求模型
class UserUpdate(BaseModel):
email: Optional[EmailStr] = None
full_name: Optional[str] = None
is_active: Optional[bool] = None
# 用户响应模型(不包含密码)
class UserResponse(BaseModel):
id: int
username: str
email: EmailStr
full_name: Optional[str] = None
is_active: bool = True
created_at: datetime
model_config = {"from_attributes": True}
# 用户列表响应
class UserListResponse(BaseModel):
users: List[UserResponse]
total: int
page: int
page_size: int
# API 端点
@app.post("/users/", response_model=UserResponse, status_code=201)
async def create_user(user: UserCreate):
# 创建用户逻辑
return {
"id": 1,
"username": user.username,
"email": user.email,
"full_name": user.full_name,
"is_active": True,
"created_at": datetime.now()
}
@app.get("/users/", response_model=UserListResponse)
async def list_users(page: int = 1, page_size: int = 10):
return {
"users": [],
"total": 0,
"page": page,
"page_size": page_size
}
@app.patch("/users/{user_id}", response_model=UserResponse)
async def update_user(user_id: int, user: UserUpdate):
# 更新用户逻辑
pass
响应模型过滤
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
app = FastAPI()
class Item(BaseModel):
name: str
description: Optional[str] = None
price: float
tax: Optional[float] = None
internal_code: str # 内部字段
# 排除特定字段
@app.get(
"/items/{item_id}",
response_model=Item,
response_model_exclude={"internal_code"}
)
async def read_item(item_id: int):
return Item(
name="商品",
price=100,
internal_code="INT123"
)
# 只包含特定字段
@app.get(
"/items/{item_id}/summary",
response_model=Item,
response_model_include={"name", "price"}
)
async def read_item_summary(item_id: int):
return Item(
name="商品",
description="描述",
price=100,
internal_code="INT123"
)
# 排除未设置的字段
@app.get(
"/items/{item_id}/minimal",
response_model=Item,
response_model_exclude_unset=True
)
async def read_item_minimal(item_id: int):
return Item(
name="商品",
price=100,
internal_code="INT123"
)
多种输入输出模型
from fastapi import FastAPI
from pydantic import BaseModel, EmailStr
from typing import Union
app = FastAPI()
class UserBase(BaseModel):
email: EmailStr
full_name: str
class UserCreate(UserBase):
password: str
class UserUpdate(BaseModel):
email: Optional[EmailStr] = None
full_name: Optional[str] = None
class UserInDB(UserBase):
id: int
hashed_password: str
class UserResponse(UserBase):
id: int
# 同一个端点支持不同输入
@app.post("/users/", response_model=UserResponse)
async def create_user(user: UserCreate):
# 处理密码哈希
hashed_password = "hashed_" + user.password
user_in_db = UserInDB(
id=1,
email=user.email,
full_name=user.full_name,
hashed_password=hashed_password
)
return user_in_db
# 联合类型响应
class Cat(BaseModel):
type: str = "cat"
name: str
meows: bool
class Dog(BaseModel):
type: str = "dog"
name: str
barks: bool
@app.get("/pets/{pet_id}", response_model=Union[Cat, Dog])
async def get_pet(pet_id: int):
if pet_id % 2 == 0:
return Cat(name="Whiskers", meows=True)
return Dog(name="Buddy", barks=True)
常见问题
Q1:如何处理复杂的嵌套验证?
from pydantic import BaseModel, model_validator
from typing import List
class Item(BaseModel):
name: str
quantity: int
price: float
class Order(BaseModel):
items: List[Item]
discount: float = 0
@model_validator(mode="after")
def validate_order(self):
# 验证折扣不能超过总价
total = sum(item.price * item.quantity for item in self.items)
if self.discount > total:
raise ValueError("折扣不能超过总价")
# 验证至少有一个商品
if len(self.items) == 0:
raise ValueError("订单至少需要一个商品")
return self
Q2:如何实现条件必填字段?
from pydantic import BaseModel, model_validator
from typing import Optional
class Payment(BaseModel):
method: str # credit_card, bank_transfer, cash
card_number: Optional[str] = None
bank_account: Optional[str] = None
@model_validator(mode="after")
def validate_payment_details(self):
if self.method == "credit_card" and not self.card_number:
raise ValueError("信用卡支付需要提供卡号")
if self.method == "bank_transfer" and not self.bank_account:
raise ValueError("银行转账需要提供银行账号")
return self
Q3:如何处理循环引用?
from pydantic import BaseModel
from typing import Optional, List, TYPE_CHECKING
if TYPE_CHECKING:
from .department import Department
class User(BaseModel):
id: int
name: str
department: Optional["Department"] = None
class Department(BaseModel):
id: int
name: str
manager: Optional[User] = None
members: List[User] = []
# 重建模型以解决循环引用
User.model_rebuild()
Department.model_rebuild()
学习资源
- Pydantic 官方文档:https://docs.pydantic.dev/
- Pydantic V2 迁移指南:https://docs.pydantic.dev/latest/migration/
- FastAPI 请求体文档:https://fastapi.tiangolo.com/tutorial/body/
- JSON Schema 规范:https://json-schema.org/