Talking Avatar Videos
Create realistic talking avatar videos by combining portrait images with audio. The Mirako API generates lip-synced videos where avatars speak with natural mouth movements and expressions.
Overview
Talking avatar videos are perfect for:
- Personalized video messages
- Educational content creation
- Marketing and advertising
- Virtual presentations
- Customer service videos
- Social media content
Quick Start
Start Generating Talking Avatar Video
Generating a talking avatar video is an async process that involves:
- Start the generating task - Send a request with an image and audio file.
- Poll for status - Check the status of the video generation task.
or, you can make use of webhooks
go get notified when the video is ready.
To start generating a talking avatar video:
python
import requests
import base64
import time
# API configuration
API_KEY = "your_api_key_here"
BASE_URL = "https://mirako.co"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
def create_talking_avatar(image_path, audio_path):
"""Create talking avatar video from image and audio"""
# Encode image to base64
with open(image_path, "rb") as image_file:
image_data = base64.b64encode(image_file.read()).decode('utf-8')
# Encode audio to base64
with open(audio_path, "rb") as audio_file:
audio_data = base64.b64encode(audio_file.read()).decode('utf-8')
payload = {
"image": image_data,
"audio": audio_data
}
response = requests.post(
f"{BASE_URL}/v1/video/async_generate_talking_avatar",
headers=headers,
json=payload
)
if response.status_code == 200:
result = response.json()
task_id = result['data']['task_id']
print(f"✅ Talking avatar generation started!")
print(f"Task ID: {task_id}")
return task_id
else:
print(f"❌ Error: {response.status_code}")
print(response.text)
return None
# Generate talking avatar
task_id = create_talking_avatar("portrait.jpg", "speech.wav")
Input Requirements
Image Requirements
- Formats: JPG, PNG
- Size: Minimum 512x512 pixels, maximum 1920x1080 pixels.
- Quality: Clear frontal face, good lighting
- Face: Single person, looking forward, with its mouth remains closed
- Background: Simple background preferred
Audio Requirements
- Formats: WAV, MP3
- Duration: Up to 60 seconds per video
- Quality: Clear speech, minimal background noise
- Sample Rate: 44.1kHz or 48kHz recommended.
Polling for Video Status
Video generation typically takes 15 seconds to 2 minutes depending on audio length:
python
def check_video_status(task_id):
"""Check talking avatar generation status"""
response = requests.get(
f"{BASE_URL}/v1/video/async_generate_talking_avatar/{task_id}/status",
headers=headers
)
if response.status_code == 200:
result = response.json()['data']
status = result['status']
print(f"Status: {status}")
if status == "COMPLETED":
video_url = result.get('file_url')
duration = result.get('output_duration')
print(f"🎉 Video generation completed!")
print(f"Video URL: {video_url}")
print(f"Duration: {duration} seconds")
return {"status": "completed", "video_url": video_url, "duration": duration}
elif status in ["IN_QUEUE", "IN_PROGRESS"]:
print("⏳ Video generation in progress...")
return {"status": "processing"}
elif status in ["FAILED", "CANCELED", "TIMED_OUT"]:
print(f"❌ Video generation failed: {status}")
return {"status": "failed"}
else:
print(f"Unknown status: {status}")
return {"status": "unknown"}
else:
print(f"Error checking status: {response.text}")
return {"status": "error"}
def wait_for_video_completion(task_id, max_wait_time=300): # 5 minutes
"""Wait for video generation to complete"""
start_time = time.time()
while time.time() - start_time < max_wait_time:
result = check_video_status(task_id)
if result["status"] == "completed":
return result
elif result["status"] == "failed":
return None
# Wait 10 seconds before next check
time.sleep(10)
print("⏰ Timeout: Video didn't complete within time limit")
return None
# Wait for completion
if task_id:
video_result = wait_for_video_completion(task_id)
if video_result:
print(f"✅ Video ready: {video_result['video_url']}")
else:
print("❌ Video generation failed or timed out")
Webhook Support
Using webhooks for callback is useful when you have a server-less node, which a long-running polling process is not ideal.
python
def create_talking_avatar_with_webhook(image_path, audio_path, webhook_url, webhook_auth_token=None):
"""Create talking avatar with webhook notification"""
# Encode files
with open(image_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode('utf-8')
with open(audio_path, "rb") as f:
audio_data = base64.b64encode(f.read()).decode('utf-8')
payload = {
"image": image_data,
"audio": audio_data,
"webhook": {
"url": webhook_url,
"auth_token": webhook_auth_token # Optional
}
}
response = requests.post(
f"{BASE_URL}/v1/video/async_generate_talking_avatar",
headers=headers,
json=payload
)
if response.status_code == 200:
task_id = response.json()['data']['task_id']
print(f"✅ Video generation started with webhook notification")
print(f"Task ID: {task_id}")
return task_id
else:
print(f"❌ Error: {response.text}")
return None
# Use webhook for notifications
task_id = create_talking_avatar_with_webhook(
"portrait.jpg",
"speech.wav",
"https://your-app.com/webhook/video-complete",
"your_webhook_auth_token"
)
Response Video Format
The generated video will be in MP4 format with H.264 encoding @25fps, with the same dimemnsions as the input image.