google-video-subtitle


This is a Gauche program to fetch and convert subtitles from Youtube video

use as a shell script with the youtube video uri as argument

 #!/usr/bin/gosh 
  
 (use sxml.ssax) 
 (use sxml.sxpath) 
 (use rfc.uri) 
 (use rfc.http) 
  
 (define-class <line> () 
   ((num   :init-keyword :num) 
    (start :init-keyword :start) 
    (dur   :init-keyword :dur) 
    (text  :init-keyword :text))) 
  
 (define sec&micro  
   (lambda (num) 
     (let ((i (truncate->exact num))) 
       (values i (inexact->exact (* (- num i) 1000)))))) 
  
 (define google-time->srt-time  
   (lambda (google-time) 
     (let*-values (((in-seconds microseconds) (sec&micro google-time)) 
                   ((minutes seconds) (quotient&remainder in-seconds 60)) 
                   ((hours minutes) (quotient&remainder minutes 60))) 
                  (values hours minutes seconds microseconds)))) 
  
 (define format-time 
   (lambda (h m s mi) 
     (format #f "~s:~s:~s,~,,,,3a" h m s mi))) 
  
 (define-method write-object ((L <line>) out) 
   (let-values (((sh sm ss smi) (google-time->srt-time (~ L 'start))) 
                ((eh em es emi) (google-time->srt-time  
                                 (+ (~ L 'start) 
                                    (~ L 'dur))))) 
     (let ((start-time-string (format-time sh sm ss smi)) 
           (end-time-string  (format-time eh em es emi)))       
       (format out  
               "~s\n~a --> ~a\n~a\n\n" 
               (~ L 'num) 
               start-time-string 
               end-time-string 
               (~ L 'text))))) 
  
 (define-method get-youtube-id ((uri <string>)) 
   (let ((qs (values-ref (uri-parse uri) 5))) 
     (if qs  
         (cadr (assoc "v"  
                      (map (lambda (x) (string-split x #\=))  
                           (string-split qs #\&)))) 
         (error "invalid uri")))) 
  
 (define-method languages-list ((id <string>)) 
   (let ((xml (values-ref (http-get "video.google.com" 
                                    `("/timedtext" 
                                      (type "list") 
                                      (v ,id))) 2))) 
     (call-with-input-string 
      xml 
      (lambda (p) 
        (let* ((sxml (ssax:xml->sxml p '())) 
               (tracks ((sxpath "//track") sxml))) 
  
          (map (lambda (track)  
  
                 (list (cadar ((sxpath "//@name") track)) 
                       (cadar ((sxpath "//@lang_code") track))))                
               tracks)))))) 
  
 (define-method xml->srt ((xml-port <port>)(srt-port <port>)) 
   (let ((i 0) 
         (L ((sxpath "//text") (ssax:xml->sxml xml-port '())))) 
     (for-each  
      (lambda (x) 
        (inc! i 1) 
        (guard (exc (#t (display "error with line ")(display i)(newline))) 
               (let ((l (make <line> 
                          :num i 
                          :start (string->number (cadr (cadadr x))) 
                          :dur (string->number (cadar (cddadr x))) 
                          :text (caddr x)))) 
                 (write l srt-port))) ) 
      L))) 
  
 (define-method xml-subtitles ((id <string>) 
                               (name <string>) 
                               (lang <string>)) 
   (values-ref (http-get "video.google.com" 
                         `("/timedtext" 
                           (type track) 
                           (name ,name) 
                           (lang ,lang) 
                           (v ,id))) 2)) 
  
 (define main 
   (lambda (args)  
     (let ((id (get-youtube-id (cadr args)))) 
       (for-each 
        (lambda (l) 
          (let ((srt-file (string-append "subtitle-" (car l) ".srt"))) 
            (display "Writing ")(display srt-file)(newline) 
            (call-with-output-file 
                srt-file 
              (lambda (out) 
                (call-with-input-string  
                 (xml-subtitles id (car l) (cadr l)) 
                 (lambda (in) (xml->srt in out))))))) 
        (languages-list id)))))