LatexStripper

stripper.rb

Latex書式をtextに直すプログラムです.ついでに,句読点の変換もしてます. あ,UTF-8やったけど...

require 'pp'
require 'kconv'
require 'fileutils'

filename = ARGV[0]
text = File.open(filename).read
text2 = NKF.nkf("-w",text)
text2.gsub!(/\r\n?/,'\n') #なんか変な改行コードが入った時のチェック
text2.gsub!(/\\%/,'%') #\%をうまく表示させるため
text2.gsub!(".","。") #句読点の変換
text2.gsub!(",","、")

#単純なテキストに...
fig_num=0 #figure number
bib_num=0 #bib number
text2.each_line {|line|
  case line
  when /^\%/
  when /(.*)\\\\/
    print $1+"\n"
  when /\\*section\{(.*)\}(.*)/
    print "\n #{$1} \n"
  when /\\*section\*\{(.*)\}(.*)/
    print "\n #{$1} \n"
  when /\\paragraph\{(.*)\}(.*)/
    print "\n #{$1} \n"
  when /\\item\{(.*)\}(.*)/
    print "・"+$1+$2+"\n"
  when /\\item(.*)/
    print "・"+$1+"\n"
  when /^\\begin/
  when /^\\end/
  when /^\\includegraphics/
  when /^\\label/
  when /^\\caption\{(.*)\}/
    print "\n\t図#{fig_num +=1} #{$1}\n\n"
  when /\\bibitem\{(.*)\}(.*)/
    print "#{bib_num +=1} #{$2}\n"
  else
    print line
  end
}


=begin #もっとも初期のversion, 何から始めたか分かりやすいでしょ...
text2.each {|line|    
  line.scan(/\\*section\{(.*)\}(.*)/) {|matched|
    print $'.chomp[0..-2]+"\n"
    print $1+"\n"
  }
}
=end
#grep '\\title{' *.tex
Last modified:2016/07/19 12:42:18
Keyword(s):
References: