ensure_track_schema(df: DataFrame, schema_name: str, strict: bool = False) -> tuple[pd.DataFrame, Dict[str, Iterable[str]]]
Validate that df satisfies schema. Returns (df, report_dict).
report_dict contains keys: missing_required, missing_prefixes, missing_recommended.
If strict=True and required are missing, raises ValueError.
Source code in src/mosaic/core/schema.py
| def ensure_track_schema(df: pd.DataFrame, schema_name: str, strict: bool = False) -> tuple[pd.DataFrame, Dict[str, Iterable[str]]]:
"""
Validate that df satisfies schema. Returns (df, report_dict).
report_dict contains keys: missing_required, missing_prefixes, missing_recommended.
If strict=True and required are missing, raises ValueError.
"""
if schema_name not in TRACK_SCHEMAS:
# no schema registered -> nothing to validate
return df, {}
sch = TRACK_SCHEMAS[schema_name]
missing_required = sorted([c for c in (sch.required or set()) if c not in df.columns])
missing_prefixes = []
if sch.required_prefixes:
for pref in sch.required_prefixes:
if not any(col.startswith(pref) for col in df.columns):
missing_prefixes.append(pref)
missing_recommended = sorted([c for c in (sch.recommended or set()) if c not in df.columns])
report = {
"missing_required": missing_required,
"missing_prefixes": missing_prefixes,
"missing_recommended": missing_recommended,
}
if strict and (missing_required or missing_prefixes):
raise ValueError(f"Schema '{schema_name}' validation failed: {report}")
if missing_required or missing_prefixes or missing_recommended:
print(f"[schema:{schema_name}] Validation report -> {report}")
return df, report
|