Files
FiDA-3D-Trellis/multi_image_to_3D.py
2026-04-13 11:21:23 +08:00

113 lines
3.9 KiB
Python
Executable File

#!/usr/bin/env python3
#-*- coding: utf-8 -*-
import os
import argparse
import numpy as np
import imageio
from PIL import Image
from trellis.pipelines import TrellisImageTo3DPipeline
from trellis.utils import render_utils, postprocessing_utils
def build_parser():
p = argparse.ArgumentParser("TRELLIS CLI: multi-image -> 3D (video + optional GLB/PLY)")
p.add_argument(
"-i", "--images",
nargs="+",
required=True,
help="Input image paths (space-separated), e.g. -i a.png b.png c.png"
)
p.add_argument(
"-o", "--out_dir",
default="trellis_out_multi",
help="Output directory.")
p.add_argument("--seed", type=int, default=1)
p.add_argument("--steps_sparse", type=int, default=12)
p.add_argument("--cfg_sparse", type=float, default=7.5)
p.add_argument("--steps_slat", type=int, default=12)
p.add_argument("--cfg_slat", type=float, default=3.0)
# Render video (default True)
p.add_argument("--save_video", dest="save_video", action="store_true", default=True)
p.add_argument("--no-save_video", dest="save_video", action="store_false")
p.add_argument("--video_name", type=str, default="sample_multi.mp4")
p.add_argument("--fps", type=int, default=30)
# Export GLB (default True)
p.add_argument("--export_glb", dest="export_glb", action="store_true", default=True)
p.add_argument("--no-export_glb", dest="export_glb", action="store_false")
p.add_argument("--glb_name", type=str, default="sample_multi.glb")
p.add_argument("--texture_size", type=int, default=1024)
p.add_argument("--simplify", type=float, default=0.95)
# Save PLY (default True)
p.add_argument("--save_ply", dest="save_ply", action="store_true", default=True)
p.add_argument("--no-save_ply", dest="save_ply", action="store_false")
p.add_argument("--ply_name", type=str, default="sample_multi.ply")
# Env passthrough (optional)
p.add_argument("--spconv_algo", type=str, default="native", choices=["native", "auto"])
# p.add_argument("--attn_backend", type=str, default="", choices=["", "flash-attn", "xformers"])
return p
def main():
args = build_parser().parse_args()
os.makedirs(args.out_dir, exist_ok=True)
os.environ["SPCONV_ALGO"] = args.spconv_algo
# if args.attn_backend:
# os.environ["ATTN_BACKEND"] = args.attn_backend
pipeline = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
pipeline.cuda()
images = [Image.open(p) for p in args.images]
outputs = pipeline.run_multi_image(
images,
seed=args.seed,
sparse_structure_sampler_params={
"steps": args.steps_sparse,
"cfg_strength": args.cfg_sparse,
},
slat_sampler_params={
"steps": args.steps_slat,
"cfg_strength": args.cfg_slat,
},
)
# For multi-image, TRELLIS still returns list-like outputs; export the first asset by default.
gs = outputs["gaussian"][0]
mesh = outputs["mesh"][0]
if args.save_video:
video_gs = render_utils.render_video(gs)["color"]
video_mesh = render_utils.render_video(mesh)["normal"]
video = [np.concatenate([fg, fm], axis=1) for fg, fm in zip(video_gs, video_mesh)]
out_mp4 = os.path.join(args.out_dir, args.video_name)
imageio.mimsave(out_mp4, video, fps=args.fps)
print(f"[ok] saved video: {out_mp4}")
if args.export_glb:
out_glb = os.path.join(args.out_dir, args.glb_name)
glb = postprocessing_utils.to_glb(
gs,
mesh,
simplify=args.simplify,
texture_size=args.texture_size,
)
glb.export(out_glb)
print(f"[ok] exported glb: {out_glb}")
if args.save_ply:
out_ply = os.path.join(args.out_dir, args.ply_name)
gs.save_ply(out_ply)
print(f"[ok] saved ply: {out_ply}")
if __name__ == "__main__":
main()