ソースコード公開 - COBOL技術者の憂鬱

今年初めにリリースしましたRetroTubeですが、ソースコード公開すると宣言しつつ、お化粧直しする余裕が取れなくてこれまでズルズルときてしまいました。

気が付けばもう６月…これではいかんということで、一念発起してなんとか皆様のお目に触れても恥ずかしくない程度にまで整形することができました。
ですので、本日より数日間に渡って公開していきたいと思います。GPLで公開します。

RetroTubeのシステムは、バッチ部分とオンライン部分の二つに大きく分かれているので、まずはバッチ部分から掲載します。
リスト１〜４は、それぞれ音楽・アニメ・邦画・洋画の各カテゴリについて、YouTubeから一括検索して結果を出力するプログラムです。
リスト５は、上記検索結果から新着情報を出力するものです。

あと、ここからは超重要なお知らせになってしまうのですが、日経ソフトウエア８月号（6/23発売）から、短期集中連載という形で「RetroTube開発記」を執筆させていただくことになっています。
（写真は８月号の予告で、「COBOL プログラマによるRubyプログラミング挑戦記」とありますが、これ実は、私のことです。）

単にソースコードの解説に留まらず、開発のきっかけとなった出来事から、開発中に考えていた事、公開後の世間からの反応など、様々な角度から楽しんでいただける内容になっていますので、是非大勢の方に読んでいただきたいと思っています。
よろしくお願いします。

それでは以下より、ソースコード垂れ流していきます。

【リスト１】search_music.rb

#####################################
#【RetroTube】一括検索プログラム（邦楽）
#####################################

require "youtube"
require "kconv"
require "fastercsv"

#YouTubeAPIアクセス用ライブラリ初期化
youtube = YouTube.new '(DeveloperID)'

#前回検索時の結果を退避（newdataフォルダからolddataフォルダへ）
File.rename("newdata/music.csv","olddata/music.old.csv")

#ここから検索処理
musicout = open("newdata/music.csv",'w')
beforeartist = nil
FasterCSV.foreach("source/source_music.csv") { |line|
#アーティスト名がキーブレイクした時、タグ検索（最大３００件）を行い、$getdataに格納する
  if line[3] != beforeartist
    $getdata = Array.new
    i = 1
    loop {
      begin
        videos = youtube.videos_by_tag(Kconv.toutf8(line[3]),i,100)
        if videos == []
          break
        end
        videos.each { |video|
          targettitle = Kconv.tosjis(video.title)
          convtitle = targettitle.gsub(',',' ')
          $getdata.push([convtitle,video.id,video.thumbnail_url,video.url])
          }
      rescue
        break
      end
      i += 1
      if i > 3
        break
      end
      }
  end
#アーティスト名でタグ検索した結果（$getdataに格納されている）から、
#さらに曲タイトルを含むものを検索し、結果を出力
  $getdata.each { |getline|
    compare = Kconv.toutf8(line[4])
    pattern = Regexp.new(compare.upcase)
    compare2 = Kconv.toutf8(getline[0])
    if pattern =~ compare2.upcase
      musicout.puts line[0] + ',' + line[1] + ',' + line[2] + ',' + 
                           getline[1] + ',' + getline[2] + ',' + getline[3]
      break
    end
    }
  beforeartist = line[3]
  }
musicout.close

【リスト２】search_anime.rb

#####################################
#【RetroTube】一括検索プログラム（アニメ）
#####################################

require "kconv"
require "net/http"
require "fastercsv"

#前回検索時の結果を退避（newdataフォルダからolddataフォルダへ）
File.rename("newdata/anime.csv","olddata/anime.old.csv")

#ここから検索処理
address = "www.youtube.com"
path = "/api2_rest?method=youtube.videos.list_by_category_and_tag&dev_id=6BQYABg1UFM&category_id=1&page=1&per_page=20&tag="
newdataout = open("newdata/anime.csv",'w')
FasterCSV.foreach("source/source_anime.csv") { |line|
  p line
  begin
    searchword_utf8 = line[4].kconv(Kconv::UTF8,  Kconv::SJIS)
    searchpath = path + searchword_utf8
    body = Net::HTTP.get( address , searchpath ) 
    re = %r|<id>(.*?)</id>|u
    searchresult_id = body.scan(re)
    re = %r|<title>(.*?)</title>|u
    searchresult_title = body.scan(re)
    re = %r|<thumbnail_url>(.*?)</thumbnail_url>|u
    searchresult_thumbnail_url = body.scan(re)
    re = %r|<url>(.*?)</url>|u
    searchresult_url = body.scan(re)
    opflag = 0
    edflag = 0
    re_op = %r|op|u
    re_ed = %r|ed|u
    re_end = %r|end|u
    searchresult_id.zip(searchresult_title,searchresult_thumbnail_url,searchresult_url){|zip_id,zip_title,zip_thumbnail_url,zip_url|
      if opflag == 0
        if zip_title[0].downcase =~ re_op
          newdataout.print line[0] + "," + line[1] + "," + line[2] + "," + zip_id[0] + "," + zip_thumbnail_url[0] + "," + zip_url[0] + ",OP" + "\n"
          opflag = 1
          if zip_title[0].downcase =~ re_ed || zip_title[0].downcase =~ re_end
            edflag = 1
          end
        end
      end
      if edflag == 0
        if zip_title[0].downcase =~ re_ed || zip_title[0].downcase =~ re_end
          newdataout.print line[0] + "," + line[1] + "," + line[2] + "," + zip_id[0] + "," + zip_thumbnail_url[0] + "," + zip_url[0] + ",ED" + "\n"
          edflag = 1
        end
      end
      if opflag == 1 && edflag == 1
        break
      end
      }
    if opflag == 0 && edflag == 0
      if searchresult_id != []
        newdataout.print line[0] + "," + line[1] + "," + line[2] + "," + searchresult_id[0][0] + "," + searchresult_thumbnail_url[0][0] + "," + searchresult_url[0][0] + ",NA" + "\n"
      end
    end
  rescue
    p 'timeout err'
  end
  }
newdataout.close

【リスト３】search_movie_j.rb

#####################################
#【RetroTube】一括検索プログラム（邦画）
#####################################

require "kconv"
require "net/http"
require "fastercsv"

#前回検索時の結果を退避（newdataフォルダからolddataフォルダへ）
File.rename("newdata/movie_j.csv","olddata/movie_j.old.csv")

#ここから検索処理
address = "www.youtube.com"
path = "/api2_rest?method=youtube.videos.list_by_category_and_tag&dev_id=6BQYABg1UFM&category_id=1&page=1&per_page=1&tag="
newdataout = open("newdata/movie_j.csv",'w')
FasterCSV.foreach("source/source_movie.csv") { |line|
  begin
    re_japan = %r|日本|s
    if re_japan =~ line[6]
      p line[0] + " " + line[1] + " " + line[5]
      if line[5].split(//s).length > 10
        searchword = line[5]
      else
        searchword = line[5] + " " + line[4]
      end
      searchword_utf8 = searchword.kconv(Kconv::UTF8,  Kconv::SJIS)
      searchpath = path + searchword_utf8
      convpath = searchpath.gsub(' ','%20')
      body = Net::HTTP.get( address , convpath ) 
      re = %r|<id>(.*?)</id>|u
      result_id = body.scan(re)
      re = %r|<thumbnail_url>(.*?)</thumbnail_url>|u
      result_thumbnail_url = body.scan(re)
      re = %r|<url>(.*?)</url>|u
      result_url = body.scan(re)
      result_id.zip(result_thumbnail_url,result_url) { |zip_id,zip_thumbnail_url,zip_url|
        newdataout.print line[0] + ',"' + line[2].gsub('"','""') + '","' + line[3].gsub('"','""') + '","' + 
        zip_id[0] + '","' + zip_thumbnail_url[0] + '","' + zip_url[0] + '"' + "\n"
      }
    end
  rescue
    p 'err'
  end
  }
newdataout.close

【リスト４】search_movie_a.rb

#####################################
#【RetroTube】一括検索プログラム（洋画）
#####################################

require "kconv"
require "net/http"
require "fastercsv"

#前回検索時の結果を退避（newdataフォルダからolddataフォルダへ）
File.rename("newdata/movie_a.csv","olddata/movie_a.old.csv")

#ここから検索処理
address = "www.youtube.com"
path = "/api2_rest?method=youtube.videos.list_by_category_and_tag&dev_id=6BQYABg1UFM&category_id=1&page=1&per_page=1&tag="
newdataout = open("newdata/movie_a.csv",'w')
FasterCSV.foreach("source/source_movie.csv") { |line|
  begin
    re_japan = %r|日本|s
    if re_japan =~ line[6]
    else
      p line[0] + " " + line[1] + " " + line[5]
      searchword = line[5] + " " + line[4]
      searchword_utf8 = searchword.kconv(Kconv::UTF8,  Kconv::SJIS)
      searchpath = path + searchword_utf8
      convpath = searchpath.gsub(' ','%20')
      body = Net::HTTP.get( address , convpath ) 
      re = %r|<id>(.*?)</id>|u
      result_id = body.scan(re)
      re = %r|<thumbnail_url>(.*?)</thumbnail_url>|u
      result_thumbnail_url = body.scan(re)
      re = %r|<url>(.*?)</url>|u
      result_url = body.scan(re)
      result_id.zip(result_thumbnail_url,result_url) { |zip_id,zip_thumbnail_url,zip_url|
        newdataout.print line[0] + ',"' + line[2].gsub('"','""') + '","' + line[3].gsub('"','""') + '","' + 
        zip_id[0] + '","' + zip_thumbnail_url[0] + '","' + zip_url[0] + '"' + "\n"
        }
    end
  rescue
    p 'err'
  end
  }
newdataout.close

【リスト５】compare.rb

#####################################
#【RetroTube】新着情報出力プログラム
#####################################

require "kconv"
require "fastercsv"

#前回検索時の結果を退避（newdataフォルダからolddataフォルダへ）
File.rename("newdata/compare.csv","olddata/compare.old.csv")
File.rename("newdata/rss.rdf","olddata/rss.old.rdf")

#前回検索結果と今回検索結果を比較し、差分を新着情報として出力（邦楽）
compareout = open("newdata/compare.csv",'w')
$olddata = Array.new
FasterCSV.foreach("olddata/music.old.csv") { |line|
  $olddata.push(line)
  }
FasterCSV.foreach("newdata/music.csv") { |line|
  $hitsw = 0
  $olddata.each { |oldline|
  if oldline[1] == line[1] &&
     oldline[2] == line[2]
    $hitsw = 1
    break
  end
  }
  if $hitsw == 0
    compareout.puts line[0] + ',' + line[1] + ',' + line[2] + ',' + 
                             line[3] + ',' + line[4] + ',' + line[5] + ',music'
  end
  }

#前回検索結果と今回検索結果を比較し、差分を新着情報として出力（アニメ）
$olddata = Array.new
FasterCSV.foreach("olddata/anime.old.csv") { |line|
  $olddata.push(line)
  }
FasterCSV.foreach("newdata/anime.csv") { |line|
  $hitsw = 0
  $olddata.each { |oldline|
    if oldline[1] == line[1] &&
       oldline[2] == line[2] &&
       oldline[6] == line[6]
      $hitsw = 1
      break
    end
    }
  if $hitsw == 0
    if line[6] == "NA"
      compareout.puts line[0] + ',' + line[1] + ',' + line[2] + ',' + 
                               line[3] + ',' + line[4] + ',' + line[5] + ',anime'
    else
      compareout.puts line[0] + ',' + line[1] + ',' + line[2] + ' ' + line[6] + ',' + 
                               line[3] + ',' + line[4] + ',' + line[5] + ',anime'
    end
  end
  }

#前回検索結果と今回検索結果を比較し、差分を新着情報として出力（邦画）
$olddata = Array.new
FasterCSV.foreach("olddata/movie_j.old.csv") { |line|
  $olddata.push(line)
  }
FasterCSV.foreach("newdata/movie_j.csv") { |line|
  $hitsw = 0
  $olddata.each { |oldline|
  if oldline[1] == line[1] &&
     oldline[2] == line[2]
    $hitsw = 1
    break
  end
  }
  if $hitsw == 0
    compareout.puts line[0] + ',' + line[1] + ',' + line[2] + ',' + 
                             line[3] + ',' + line[4] + ',' + line[5] + ',movie_j'
  end
  }

#前回検索結果と今回検索結果を比較し、差分を新着情報として出力（洋画）
$olddata = Array.new
FasterCSV.foreach("olddata/movie_a.old.csv") { |line|
  $olddata.push(line)
  }
FasterCSV.foreach("newdata/movie_a.csv") { |line|
  $hitsw = 0
  $olddata.each { |oldline|
  if oldline[1] == line[1] &&
     oldline[2] == line[2]
    $hitsw = 1
    break
  end
  }
  if $hitsw == 0
    compareout.puts line[0] + ',' + line[1] + ',' + line[2] + ',' + 
                             line[3] + ',' + line[4] + ',' + line[5] + ',movie_a'
  end
  }
compareout.close

#新着情報(compare.csv)からRSS出力
rssout = open("newdata/rss.rdf",'w')
rssout.puts '<?xml version="1.0" encoding="utf-8" ?>
  <rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:dc="http://purl.org/dc/elements/1.1/"  
    xml:lang="ja">

    <channel rdf:about="http://www.retro-tube.com/rss.rdf">
      <title>RetroTube</title>
      <link>http://www.retro-tube.com/</link>
      <description>RetroTube</description>
      <items>
      <rdf:Seq>'
FasterCSV.foreach("newdata/compare.csv") { |rssline|
  rssout.puts '        <rdf:li rdf:resource="http://www.retro-tube.com/detail.cgi?movieid=' + 
                   Kconv.toutf8(rssline[3]) + '&amp;category=' + rssline[6] + '"/>'
  }
rssout.puts '      </rdf:Seq>
      </items>
    </channel>'
FasterCSV.foreach("newdata/compare.csv") { |rssline|
  rssout.puts '  <item rdf:about="http://www.retro-tube.com/detail.cgi?movieid=' + 
                   Kconv.toutf8(rssline[3]) + '&amp;category=' + rssline[6] + '">'
  case rssline[6]
  when "music"
    categoryname = Kconv.toutf8('邦楽')
  when "anime"
    categoryname = Kconv.toutf8('アニメ')
  when "movie_j"
    categoryname = Kconv.toutf8('邦画')
  when "movie_a"
    categoryname = Kconv.toutf8('洋画')
  end
  rssout.puts '    <title>[' + categoryname + '] ' + Kconv.toutf8(rssline[2].gsub('&','&amp;')) + 
                   ' - ' + Kconv.toutf8(rssline[1].gsub('&','&amp;')) + ' (' + rssline[0] +')</title>'
  rssout.puts '    <link>http://www.retro-tube.com/detail.cgi?movieid=' + Kconv.toutf8(rssline[3]) + 
                   '&amp;category=' + rssline[6] + '</link>'
  rssout.puts '  </item>'
  }
rssout.print '</rdf:RDF>'
rssout.close