มีใครทำSubtitle จาก AI แนะนำทีครับ

แปลซับ

### เซลล์ 1 ###

!pip install requests -q

### เซลล์ 2 ###

import os
import re

import requests

### เซลล์ 3 ###

from google.colab import drive

drive.mount("/content/drive", force_remount=True)

### เซลล์ 4 ###

input_subtitle = "/content/drive/MyDrive/Colab Notebooks/untitled.srt"

output_subtitle = os.path.splitext(input_subtitle)[0] + "_thai.srt"

### เซลล์ 5 ###

OPENROUTER_API_KEY = "YOUR_API_KEY"

MODEL = "google/gemini-2.5-pro"

BATCH_SIZE = 5000

### เซลล์ 6 ###

with open(input_subtitle, "r", encoding="utf-8") as f:
srt_text = f.read()

blocks = re.split(r"\n\s*\n", srt_text.strip())

subs = []

for block in blocks:
lines = block.splitlines()

if len(lines) < 3:
continue

subs.append(
{
"index": lines[0].strip(),
"timestamp": lines[1].strip(),
"text": " ".join(
line.strip()
for line in lines[2:]
),
}
)

print("Loaded:", len(subs), "blocks")

### เซลล์ 7 ###

from textwrap import dedent

translated_texts = {}

for start in range(0, len(subs), BATCH_SIZE):

batch = subs[start:start + BATCH_SIZE]

source_text = "\n".join(
f"[{item['index']}] {item['text']}"
for item in batch
)

prompt = dedent(f"""

แปล subtitle เป็น ภาษาไทย

แปล subtitle เหมือน บทสนทนา

แปล ทุก บรรทัด ห้าม แปล ข้าม บรรทัด

ห้าม มี อักษร อังกฤษ/จีน/ญี่ปุ่น อยู่ใน ผลลัพธ์

จำนวน index ต้นฉบับ เท่ากับ จำนวน index ผลลัพธ์

หมวดคำ อนุภาค ห้าม ใช้ คำว่า "ครับ" , "คะ" , "ค่ะ" เด็ดขาด

หมวดคำ สรรพนาม บุคคล 1 , 2 ให้ ใช้ คำว่า "ฉัน" , "คุณ" เท่านั้น

ห้าม มี คำ/พยัญชนะ/สระ ซ้ำ ๆๆๆ ติด ๆๆๆ มากกว่า 5 ครั้ง อยู่ใน บรรทัด เดียวกัน

บรรทัด ยาวกว่า 25 อักษร ตัดแบ่ง หลาย ๆๆๆ บรรทัด อยู่ใน index/timestamp เดียวกัน

{source_text}

""")

response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": MODEL,
"messages": [
{
"role": "user",
"content": prompt,
}
],
"temperature": 0,
"max_tokens": 200000,
},
timeout=2000,
)

print()

print(
f"HTTP status: "
f"{response.status_code} "
f"{response.reason}"
)

response.raise_for_status()
result = response.json()

choice = result["choices"][0]

raw_content = (
result["choices"][0]["message"]["content"]
.strip()
)

print(
f"Model: {result.get('model')}"
)

print(
f"Provider: {result.get('provider')}"
)

print(
f"Prompt tokens: "
f"{result.get('usage', {}).get('prompt_tokens', 0)}"
)

print(
f"Completion tokens: "
f"{result.get('usage', {}).get('completion_tokens', 0)}"
)

print(
f"Reasoning tokens: "
f"{result.get('usage', {})
.get('completion_tokens_details', {})
.get('reasoning_tokens', 0)}"
)

print(
f"Finish reason: "
f"{choice.get('finish_reason')}"
)

print(
f"Native finish reason: "
f"{choice.get('native_finish_reason')}"
)

match = re.findall(
r"\[(\d+)\]\s*(.*?)(?=\[\d+\]|\Z)",
raw_content,
re.S,
)

expected_count = len(batch)
found_count = len(match)

print(
f"Expected: {expected_count}, "
f"Found: {found_count}"
)

for index, text in match:

translated_texts[index] = (
text.strip()
)

batch_start = start + 1

batch_end = min(
start + BATCH_SIZE,
len(subs),
)

print(
f"Done: {batch_start}-{batch_end}"
)

print()

print(
f"Raw response: "
f"{' '.join(raw_content.split())}"
)

print()

### เซลล์ 8 ###

output = []

for item in subs:

output.append(item["index"])
output.append(item["timestamp"])

output.append(
translated_texts.get(
item["index"],
item["text"],
)
)

output.append("")

with open(
output_subtitle,
"w",
encoding="utf-8",
) as f:
f.write("\n".join(output))

print("Saved:", output_subtitle)

### เซลล์ 9 ###

from google.colab import files

files.download(output_subtitle)